clang  9.0.0svn
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCXXABI.h"
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 
34 namespace {
35 /// Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37 public:
38  /// Kinds of OpenMP regions used in codegen.
39  enum CGOpenMPRegionKind {
40  /// Region with outlined function for standalone 'parallel'
41  /// directive.
42  ParallelOutlinedRegion,
43  /// Region with outlined function for standalone 'task' directive.
44  TaskOutlinedRegion,
45  /// Region for constructs that do not require function outlining,
46  /// like 'for', 'sections', 'atomic' etc. directives.
47  InlinedRegion,
48  /// Region with outlined function for standalone 'target' directive.
49  TargetRegion,
50  };
51 
52  CGOpenMPRegionInfo(const CapturedStmt &CS,
53  const CGOpenMPRegionKind RegionKind,
55  bool HasCancel)
56  : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57  CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58 
59  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
61  bool HasCancel)
62  : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63  Kind(Kind), HasCancel(HasCancel) {}
64 
65  /// Get a variable or parameter for storing global thread id
66  /// inside OpenMP construct.
67  virtual const VarDecl *getThreadIDVariable() const = 0;
68 
69  /// Emit the captured statement body.
70  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71 
72  /// Get an LValue for the current ThreadID variable.
73  /// \return LValue for thread id variable. This LValue always has type int32*.
74  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75 
76  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77 
78  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79 
80  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81 
82  bool hasCancel() const { return HasCancel; }
83 
84  static bool classof(const CGCapturedStmtInfo *Info) {
85  return Info->getKind() == CR_OpenMP;
86  }
87 
88  ~CGOpenMPRegionInfo() override = default;
89 
90 protected:
91  CGOpenMPRegionKind RegionKind;
92  RegionCodeGenTy CodeGen;
94  bool HasCancel;
95 };
96 
97 /// API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99 public:
100  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101  const RegionCodeGenTy &CodeGen,
102  OpenMPDirectiveKind Kind, bool HasCancel,
103  StringRef HelperName)
104  : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
105  HasCancel),
106  ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
108  }
109 
110  /// Get a variable or parameter for storing global thread id
111  /// inside OpenMP construct.
112  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
113 
114  /// Get the name of the capture helper.
115  StringRef getHelperName() const override { return HelperName; }
116 
117  static bool classof(const CGCapturedStmtInfo *Info) {
118  return CGOpenMPRegionInfo::classof(Info) &&
119  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
120  ParallelOutlinedRegion;
121  }
122 
123 private:
124  /// A variable or parameter storing global thread id for OpenMP
125  /// constructs.
126  const VarDecl *ThreadIDVar;
127  StringRef HelperName;
128 };
129 
130 /// API for captured statement code generation in OpenMP constructs.
131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
132 public:
133  class UntiedTaskActionTy final : public PrePostActionTy {
134  bool Untied;
135  const VarDecl *PartIDVar;
136  const RegionCodeGenTy UntiedCodeGen;
137  llvm::SwitchInst *UntiedSwitch = nullptr;
138 
139  public:
140  UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
141  const RegionCodeGenTy &UntiedCodeGen)
142  : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
143  void Enter(CodeGenFunction &CGF) override {
144  if (Untied) {
145  // Emit task switching point.
146  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
147  CGF.GetAddrOfLocalVar(PartIDVar),
148  PartIDVar->getType()->castAs<PointerType>());
149  llvm::Value *Res =
150  CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151  llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
152  UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153  CGF.EmitBlock(DoneBB);
155  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156  UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157  CGF.Builder.GetInsertBlock());
158  emitUntiedSwitch(CGF);
159  }
160  }
161  void emitUntiedSwitch(CodeGenFunction &CGF) const {
162  if (Untied) {
163  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164  CGF.GetAddrOfLocalVar(PartIDVar),
165  PartIDVar->getType()->castAs<PointerType>());
166  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167  PartIdLVal);
168  UntiedCodeGen(CGF);
169  CodeGenFunction::JumpDest CurPoint =
170  CGF.getJumpDestInCurrentScope(".untied.next.");
172  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173  UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174  CGF.Builder.GetInsertBlock());
175  CGF.EmitBranchThroughCleanup(CurPoint);
176  CGF.EmitBlock(CurPoint.getBlock());
177  }
178  }
179  unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180  };
181  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182  const VarDecl *ThreadIDVar,
183  const RegionCodeGenTy &CodeGen,
184  OpenMPDirectiveKind Kind, bool HasCancel,
185  const UntiedTaskActionTy &Action)
186  : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187  ThreadIDVar(ThreadIDVar), Action(Action) {
188  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189  }
190 
191  /// Get a variable or parameter for storing global thread id
192  /// inside OpenMP construct.
193  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195  /// Get an LValue for the current ThreadID variable.
196  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198  /// Get the name of the capture helper.
199  StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201  void emitUntiedSwitch(CodeGenFunction &CGF) override {
202  Action.emitUntiedSwitch(CGF);
203  }
204 
205  static bool classof(const CGCapturedStmtInfo *Info) {
206  return CGOpenMPRegionInfo::classof(Info) &&
207  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208  TaskOutlinedRegion;
209  }
210 
211 private:
212  /// A variable or parameter storing global thread id for OpenMP
213  /// constructs.
214  const VarDecl *ThreadIDVar;
215  /// Action for emitting code for untied tasks.
216  const UntiedTaskActionTy &Action;
217 };
218 
219 /// API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224  const RegionCodeGenTy &CodeGen,
225  OpenMPDirectiveKind Kind, bool HasCancel)
226  : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227  OldCSI(OldCSI),
228  OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230  // Retrieve the value of the context parameter.
231  llvm::Value *getContextValue() const override {
232  if (OuterRegionInfo)
233  return OuterRegionInfo->getContextValue();
234  llvm_unreachable("No context value for inlined OpenMP region");
235  }
236 
237  void setContextValue(llvm::Value *V) override {
238  if (OuterRegionInfo) {
239  OuterRegionInfo->setContextValue(V);
240  return;
241  }
242  llvm_unreachable("No context value for inlined OpenMP region");
243  }
244 
245  /// Lookup the captured field decl for a variable.
246  const FieldDecl *lookup(const VarDecl *VD) const override {
247  if (OuterRegionInfo)
248  return OuterRegionInfo->lookup(VD);
249  // If there is no outer outlined region,no need to lookup in a list of
250  // captured variables, we can use the original one.
251  return nullptr;
252  }
253 
254  FieldDecl *getThisFieldDecl() const override {
255  if (OuterRegionInfo)
256  return OuterRegionInfo->getThisFieldDecl();
257  return nullptr;
258  }
259 
260  /// Get a variable or parameter for storing global thread id
261  /// inside OpenMP construct.
262  const VarDecl *getThreadIDVariable() const override {
263  if (OuterRegionInfo)
264  return OuterRegionInfo->getThreadIDVariable();
265  return nullptr;
266  }
267 
268  /// Get an LValue for the current ThreadID variable.
269  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270  if (OuterRegionInfo)
271  return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272  llvm_unreachable("No LValue for inlined OpenMP construct");
273  }
274 
275  /// Get the name of the capture helper.
276  StringRef getHelperName() const override {
277  if (auto *OuterRegionInfo = getOldCSI())
278  return OuterRegionInfo->getHelperName();
279  llvm_unreachable("No helper name for inlined OpenMP construct");
280  }
281 
282  void emitUntiedSwitch(CodeGenFunction &CGF) override {
283  if (OuterRegionInfo)
284  OuterRegionInfo->emitUntiedSwitch(CGF);
285  }
286 
287  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289  static bool classof(const CGCapturedStmtInfo *Info) {
290  return CGOpenMPRegionInfo::classof(Info) &&
291  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292  }
293 
294  ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297  /// CodeGen info about outer OpenMP region.
299  CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310  const RegionCodeGenTy &CodeGen, StringRef HelperName)
311  : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312  /*HasCancel=*/false),
313  HelperName(HelperName) {}
314 
315  /// This is unused for target regions because each starts executing
316  /// with a single thread.
317  const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319  /// Get the name of the capture helper.
320  StringRef getHelperName() const override { return HelperName; }
321 
322  static bool classof(const CGCapturedStmtInfo *Info) {
323  return CGOpenMPRegionInfo::classof(Info) &&
324  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325  }
326 
327 private:
328  StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332  llvm_unreachable("No codegen for expressions");
333 }
334 /// API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339  : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340  OMPD_unknown,
341  /*HasCancel=*/false),
342  PrivScope(CGF) {
343  // Make sure the globals captured in the provided statement are local by
344  // using the privatization logic. We assume the same variable is not
345  // captured more than once.
346  for (const auto &C : CS.captures()) {
347  if (!C.capturesVariable() && !C.capturesVariableByCopy())
348  continue;
349 
350  const VarDecl *VD = C.getCapturedVar();
351  if (VD->isLocalVarDeclOrParm())
352  continue;
353 
354  DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
355  /*RefersToEnclosingVariableOrCapture=*/false,
357  C.getLocation());
358  PrivScope.addPrivate(
359  VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
360  }
361  (void)PrivScope.Privatize();
362  }
363 
364  /// Lookup the captured field decl for a variable.
365  const FieldDecl *lookup(const VarDecl *VD) const override {
366  if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
367  return FD;
368  return nullptr;
369  }
370 
371  /// Emit the captured statement body.
372  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
373  llvm_unreachable("No body for expressions");
374  }
375 
376  /// Get a variable or parameter for storing global thread id
377  /// inside OpenMP construct.
378  const VarDecl *getThreadIDVariable() const override {
379  llvm_unreachable("No thread id for expressions");
380  }
381 
382  /// Get the name of the capture helper.
383  StringRef getHelperName() const override {
384  llvm_unreachable("No helper name for expressions");
385  }
386 
387  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
388 
389 private:
390  /// Private scope to capture global variables.
392 };
393 
394 /// RAII for emitting code of OpenMP constructs.
395 class InlinedOpenMPRegionRAII {
396  CodeGenFunction &CGF;
397  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
398  FieldDecl *LambdaThisCaptureField = nullptr;
399  const CodeGen::CGBlockInfo *BlockInfo = nullptr;
400 
401 public:
402  /// Constructs region for combined constructs.
403  /// \param CodeGen Code generation sequence for combined directives. Includes
404  /// a list of functions used for code generation of implicitly inlined
405  /// regions.
406  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407  OpenMPDirectiveKind Kind, bool HasCancel)
408  : CGF(CGF) {
409  // Start emission for the construct.
410  CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411  CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413  LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414  CGF.LambdaThisCaptureField = nullptr;
415  BlockInfo = CGF.BlockInfo;
416  CGF.BlockInfo = nullptr;
417  }
418 
419  ~InlinedOpenMPRegionRAII() {
420  // Restore original CapturedStmtInfo only if we're done with code emission.
421  auto *OldCSI =
422  cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
423  delete CGF.CapturedStmtInfo;
424  CGF.CapturedStmtInfo = OldCSI;
425  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426  CGF.LambdaThisCaptureField = LambdaThisCaptureField;
427  CGF.BlockInfo = BlockInfo;
428  }
429 };
430 
431 /// Values for bit flags used in the ident_t to describe the fields.
432 /// All enumeric elements are named and described in accordance with the code
433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
434 enum OpenMPLocationFlags : unsigned {
435  /// Use trampoline for internal microtask.
436  OMP_IDENT_IMD = 0x01,
437  /// Use c-style ident structure.
438  OMP_IDENT_KMPC = 0x02,
439  /// Atomic reduction option for kmpc_reduce.
440  OMP_ATOMIC_REDUCE = 0x10,
441  /// Explicit 'barrier' directive.
442  OMP_IDENT_BARRIER_EXPL = 0x20,
443  /// Implicit barrier in code.
444  OMP_IDENT_BARRIER_IMPL = 0x40,
445  /// Implicit barrier in 'for' directive.
446  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
447  /// Implicit barrier in 'sections' directive.
448  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
449  /// Implicit barrier in 'single' directive.
450  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
451  /// Call of __kmp_for_static_init for static loop.
452  OMP_IDENT_WORK_LOOP = 0x200,
453  /// Call of __kmp_for_static_init for sections.
454  OMP_IDENT_WORK_SECTIONS = 0x400,
455  /// Call of __kmp_for_static_init for distribute.
456  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
457  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
458 };
459 
460 /// Describes ident structure that describes a source location.
461 /// All descriptions are taken from
462 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
463 /// Original structure:
464 /// typedef struct ident {
465 /// kmp_int32 reserved_1; /**< might be used in Fortran;
466 /// see above */
467 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
468 /// KMP_IDENT_KMPC identifies this union
469 /// member */
470 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
471 /// see above */
472 ///#if USE_ITT_BUILD
473 /// /* but currently used for storing
474 /// region-specific ITT */
475 /// /* contextual information. */
476 ///#endif /* USE_ITT_BUILD */
477 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
478 /// C++ */
479 /// char const *psource; /**< String describing the source location.
480 /// The string is composed of semi-colon separated
481 // fields which describe the source file,
482 /// the function and a pair of line numbers that
483 /// delimit the construct.
484 /// */
485 /// } ident_t;
487  /// might be used in Fortran
489  /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
491  /// Not really used in Fortran any more
493  /// Source[4] in Fortran, do not use for C++
495  /// String describing the source location. The string is composed of
496  /// semi-colon separated fields which describe the source file, the function
497  /// and a pair of line numbers that delimit the construct.
499 };
500 
501 /// Schedule types for 'omp for' loops (these enumerators are taken from
502 /// the enum sched_type in kmp.h).
504  /// Lower bound for default (unordered) versions.
512  /// static with chunk adjustment (e.g., simd)
514  /// Lower bound for 'ordered' versions.
523  /// dist_schedule types
526  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
527  /// Set if the monotonic schedule modifier was present.
529  /// Set if the nonmonotonic schedule modifier was present.
531 };
532 
534  /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
535  /// kmpc_micro microtask, ...);
537  /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
538  /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
540  /// Call to void __kmpc_threadprivate_register( ident_t *,
541  /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
543  // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
545  // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
546  // kmp_critical_name *crit);
548  // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
549  // global_tid, kmp_critical_name *crit, uintptr_t hint);
551  // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
552  // kmp_critical_name *crit);
554  // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
555  // global_tid);
557  // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
559  // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
561  // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
562  // global_tid);
564  // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
565  // global_tid);
567  // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
568  // kmp_int32 num_threads);
570  // Call to void __kmpc_flush(ident_t *loc);
572  // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
574  // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
576  // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
577  // int end_part);
579  // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
581  // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
583  // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
584  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
585  // kmp_routine_entry_t *task_entry);
587  // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
588  // new_task);
590  // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
591  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
592  // kmp_int32 didit);
594  // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
595  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
596  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
598  // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
599  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
600  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
601  // *lck);
603  // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
604  // kmp_critical_name *lck);
606  // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
607  // kmp_critical_name *lck);
609  // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
610  // kmp_task_t * new_task);
612  // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
613  // kmp_task_t * new_task);
615  // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
617  // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
619  // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
620  // global_tid);
622  // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
624  // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
626  // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
627  // int proc_bind);
629  // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
630  // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
631  // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
633  // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
634  // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
635  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
637  // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
638  // global_tid, kmp_int32 cncl_kind);
640  // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
641  // kmp_int32 cncl_kind);
643  // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
644  // kmp_int32 num_teams, kmp_int32 thread_limit);
646  // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
647  // microtask, ...);
649  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
650  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
651  // sched, kmp_uint64 grainsize, void *task_dup);
653  // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
654  // num_dims, struct kmp_dim *dims);
656  // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
658  // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
659  // *vec);
661  // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
662  // *vec);
664  // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
665  // *data);
667  // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
668  // *d);
670 
671  //
672  // Offloading related calls
673  //
674  // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
675  // size);
677  // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
678  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
679  // *arg_types);
681  // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
682  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
683  // *arg_types);
685  // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
686  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
687  // *arg_types, int32_t num_teams, int32_t thread_limit);
689  // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
690  // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
691  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
693  // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
695  // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
697  // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
698  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
700  // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
701  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
702  // *arg_types);
704  // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
705  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
707  // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
708  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
709  // *arg_types);
711  // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
712  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
714  // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
715  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
716  // *arg_types);
718 };
719 
720 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
721 /// region.
722 class CleanupTy final : public EHScopeStack::Cleanup {
723  PrePostActionTy *Action;
724 
725 public:
726  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
727  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
728  if (!CGF.HaveInsertPoint())
729  return;
730  Action->Exit(CGF);
731  }
732 };
733 
734 } // anonymous namespace
735 
738  if (PrePostAction) {
739  CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
740  Callback(CodeGen, CGF, *PrePostAction);
741  } else {
742  PrePostActionTy Action;
743  Callback(CodeGen, CGF, Action);
744  }
745 }
746 
747 /// Check if the combiner is a call to UDR combiner and if it is so return the
748 /// UDR decl used for reduction.
749 static const OMPDeclareReductionDecl *
750 getReductionInit(const Expr *ReductionOp) {
751  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
752  if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
753  if (const auto *DRE =
754  dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
755  if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
756  return DRD;
757  return nullptr;
758 }
759 
761  const OMPDeclareReductionDecl *DRD,
762  const Expr *InitOp,
763  Address Private, Address Original,
764  QualType Ty) {
765  if (DRD->getInitializer()) {
766  std::pair<llvm::Function *, llvm::Function *> Reduction =
767  CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
768  const auto *CE = cast<CallExpr>(InitOp);
769  const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
770  const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
771  const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
772  const auto *LHSDRE =
773  cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
774  const auto *RHSDRE =
775  cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
776  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
777  PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
778  [=]() { return Private; });
779  PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
780  [=]() { return Original; });
781  (void)PrivateScope.Privatize();
782  RValue Func = RValue::get(Reduction.second);
783  CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
784  CGF.EmitIgnoredExpr(InitOp);
785  } else {
786  llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
787  std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
788  auto *GV = new llvm::GlobalVariable(
789  CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
790  llvm::GlobalValue::PrivateLinkage, Init, Name);
791  LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
792  RValue InitRVal;
793  switch (CGF.getEvaluationKind(Ty)) {
794  case TEK_Scalar:
795  InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
796  break;
797  case TEK_Complex:
798  InitRVal =
800  break;
801  case TEK_Aggregate:
802  InitRVal = RValue::getAggregate(LV.getAddress());
803  break;
804  }
805  OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
806  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
807  CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
808  /*IsInitializer=*/false);
809  }
810 }
811 
812 /// Emit initialization of arrays of complex types.
813 /// \param DestAddr Address of the array.
814 /// \param Type Type of array.
815 /// \param Init Initial expression of array.
816 /// \param SrcAddr Address of the original array.
817 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
818  QualType Type, bool EmitDeclareReductionInit,
819  const Expr *Init,
820  const OMPDeclareReductionDecl *DRD,
821  Address SrcAddr = Address::invalid()) {
822  // Perform element-by-element initialization.
823  QualType ElementTy;
824 
825  // Drill down to the base element type on both arrays.
826  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
827  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
828  DestAddr =
829  CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
830  if (DRD)
831  SrcAddr =
832  CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
833 
834  llvm::Value *SrcBegin = nullptr;
835  if (DRD)
836  SrcBegin = SrcAddr.getPointer();
837  llvm::Value *DestBegin = DestAddr.getPointer();
838  // Cast from pointer to array type to pointer to single element.
839  llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
840  // The basic structure here is a while-do loop.
841  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
842  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
843  llvm::Value *IsEmpty =
844  CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
845  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
846 
847  // Enter the loop body, making that address the current address.
848  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
849  CGF.EmitBlock(BodyBB);
850 
851  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
852 
853  llvm::PHINode *SrcElementPHI = nullptr;
854  Address SrcElementCurrent = Address::invalid();
855  if (DRD) {
856  SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
857  "omp.arraycpy.srcElementPast");
858  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
859  SrcElementCurrent =
860  Address(SrcElementPHI,
861  SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
862  }
863  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
864  DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
865  DestElementPHI->addIncoming(DestBegin, EntryBB);
866  Address DestElementCurrent =
867  Address(DestElementPHI,
868  DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
869 
870  // Emit copy.
871  {
872  CodeGenFunction::RunCleanupsScope InitScope(CGF);
873  if (EmitDeclareReductionInit) {
874  emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
875  SrcElementCurrent, ElementTy);
876  } else
877  CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
878  /*IsInitializer=*/false);
879  }
880 
881  if (DRD) {
882  // Shift the address forward by one element.
883  llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
884  SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
885  SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
886  }
887 
888  // Shift the address forward by one element.
889  llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
890  DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
891  // Check whether we've reached the end.
892  llvm::Value *Done =
893  CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
894  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
895  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
896 
897  // Done.
898  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
899 }
900 
901 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
902  return CGF.EmitOMPSharedLValue(E);
903 }
904 
905 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
906  const Expr *E) {
907  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
908  return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
909  return LValue();
910 }
911 
912 void ReductionCodeGen::emitAggregateInitialization(
913  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
914  const OMPDeclareReductionDecl *DRD) {
915  // Emit VarDecl with copy init for arrays.
916  // Get the address of the original variable captured in current
917  // captured region.
918  const auto *PrivateVD =
919  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
920  bool EmitDeclareReductionInit =
921  DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
922  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
923  EmitDeclareReductionInit,
924  EmitDeclareReductionInit ? ClausesData[N].ReductionOp
925  : PrivateVD->getInit(),
926  DRD, SharedLVal.getAddress());
927 }
928 
931  ArrayRef<const Expr *> ReductionOps) {
932  ClausesData.reserve(Shareds.size());
933  SharedAddresses.reserve(Shareds.size());
934  Sizes.reserve(Shareds.size());
935  BaseDecls.reserve(Shareds.size());
936  auto IPriv = Privates.begin();
937  auto IRed = ReductionOps.begin();
938  for (const Expr *Ref : Shareds) {
939  ClausesData.emplace_back(Ref, *IPriv, *IRed);
940  std::advance(IPriv, 1);
941  std::advance(IRed, 1);
942  }
943 }
944 
945 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
946  assert(SharedAddresses.size() == N &&
947  "Number of generated lvalues must be exactly N.");
948  LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
949  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
950  SharedAddresses.emplace_back(First, Second);
951 }
952 
954  const auto *PrivateVD =
955  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
956  QualType PrivateType = PrivateVD->getType();
957  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
958  if (!PrivateType->isVariablyModifiedType()) {
959  Sizes.emplace_back(
960  CGF.getTypeSize(
961  SharedAddresses[N].first.getType().getNonReferenceType()),
962  nullptr);
963  return;
964  }
965  llvm::Value *Size;
966  llvm::Value *SizeInChars;
967  auto *ElemType =
968  cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
969  ->getElementType();
970  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
971  if (AsArraySection) {
972  Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
973  SharedAddresses[N].first.getPointer());
974  Size = CGF.Builder.CreateNUWAdd(
975  Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
976  SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
977  } else {
978  SizeInChars = CGF.getTypeSize(
979  SharedAddresses[N].first.getType().getNonReferenceType());
980  Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
981  }
982  Sizes.emplace_back(SizeInChars, Size);
984  CGF,
985  cast<OpaqueValueExpr>(
986  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
987  RValue::get(Size));
988  CGF.EmitVariablyModifiedType(PrivateType);
989 }
990 
992  llvm::Value *Size) {
993  const auto *PrivateVD =
994  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
995  QualType PrivateType = PrivateVD->getType();
996  if (!PrivateType->isVariablyModifiedType()) {
997  assert(!Size && !Sizes[N].second &&
998  "Size should be nullptr for non-variably modified reduction "
999  "items.");
1000  return;
1001  }
1003  CGF,
1004  cast<OpaqueValueExpr>(
1005  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1006  RValue::get(Size));
1007  CGF.EmitVariablyModifiedType(PrivateType);
1008 }
1009 
1011  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1012  llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1013  assert(SharedAddresses.size() > N && "No variable was generated");
1014  const auto *PrivateVD =
1015  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1016  const OMPDeclareReductionDecl *DRD =
1017  getReductionInit(ClausesData[N].ReductionOp);
1018  QualType PrivateType = PrivateVD->getType();
1019  PrivateAddr = CGF.Builder.CreateElementBitCast(
1020  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1021  QualType SharedType = SharedAddresses[N].first.getType();
1022  SharedLVal = CGF.MakeAddrLValue(
1023  CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1024  CGF.ConvertTypeForMem(SharedType)),
1025  SharedType, SharedAddresses[N].first.getBaseInfo(),
1026  CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1027  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1028  emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1029  } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1030  emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1031  PrivateAddr, SharedLVal.getAddress(),
1032  SharedLVal.getType());
1033  } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1034  !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1035  CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1036  PrivateVD->getType().getQualifiers(),
1037  /*IsInitializer=*/false);
1038  }
1039 }
1040 
1042  const auto *PrivateVD =
1043  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1044  QualType PrivateType = PrivateVD->getType();
1045  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1046  return DTorKind != QualType::DK_none;
1047 }
1048 
1050  Address PrivateAddr) {
1051  const auto *PrivateVD =
1052  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1053  QualType PrivateType = PrivateVD->getType();
1054  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1055  if (needCleanups(N)) {
1056  PrivateAddr = CGF.Builder.CreateElementBitCast(
1057  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1058  CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1059  }
1060 }
1061 
1063  LValue BaseLV) {
1064  BaseTy = BaseTy.getNonReferenceType();
1065  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1066  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1067  if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1068  BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1069  } else {
1070  LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1071  BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1072  }
1073  BaseTy = BaseTy->getPointeeType();
1074  }
1075  return CGF.MakeAddrLValue(
1077  CGF.ConvertTypeForMem(ElTy)),
1078  BaseLV.getType(), BaseLV.getBaseInfo(),
1079  CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1080 }
1081 
1083  llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1084  llvm::Value *Addr) {
1085  Address Tmp = Address::invalid();
1086  Address TopTmp = Address::invalid();
1087  Address MostTopTmp = Address::invalid();
1088  BaseTy = BaseTy.getNonReferenceType();
1089  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1090  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1091  Tmp = CGF.CreateMemTemp(BaseTy);
1092  if (TopTmp.isValid())
1093  CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1094  else
1095  MostTopTmp = Tmp;
1096  TopTmp = Tmp;
1097  BaseTy = BaseTy->getPointeeType();
1098  }
1099  llvm::Type *Ty = BaseLVType;
1100  if (Tmp.isValid())
1101  Ty = Tmp.getElementType();
1102  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1103  if (Tmp.isValid()) {
1104  CGF.Builder.CreateStore(Addr, Tmp);
1105  return MostTopTmp;
1106  }
1107  return Address(Addr, BaseLVAlignment);
1108 }
1109 
1110 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1111  const VarDecl *OrigVD = nullptr;
1112  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1113  const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1114  while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1115  Base = TempOASE->getBase()->IgnoreParenImpCasts();
1116  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1117  Base = TempASE->getBase()->IgnoreParenImpCasts();
1118  DE = cast<DeclRefExpr>(Base);
1119  OrigVD = cast<VarDecl>(DE->getDecl());
1120  } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1121  const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1122  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1123  Base = TempASE->getBase()->IgnoreParenImpCasts();
1124  DE = cast<DeclRefExpr>(Base);
1125  OrigVD = cast<VarDecl>(DE->getDecl());
1126  }
1127  return OrigVD;
1128 }
1129 
1131  Address PrivateAddr) {
1132  const DeclRefExpr *DE;
1133  if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1134  BaseDecls.emplace_back(OrigVD);
1135  LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1136  LValue BaseLValue =
1137  loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1138  OriginalBaseLValue);
1139  llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1140  BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1141  llvm::Value *PrivatePointer =
1143  PrivateAddr.getPointer(),
1144  SharedAddresses[N].first.getAddress().getType());
1145  llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1146  return castToBase(CGF, OrigVD->getType(),
1147  SharedAddresses[N].first.getType(),
1148  OriginalBaseLValue.getAddress().getType(),
1149  OriginalBaseLValue.getAlignment(), Ptr);
1150  }
1151  BaseDecls.emplace_back(
1152  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1153  return PrivateAddr;
1154 }
1155 
1157  const OMPDeclareReductionDecl *DRD =
1158  getReductionInit(ClausesData[N].ReductionOp);
1159  return DRD && DRD->getInitializer();
1160 }
1161 
1162 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1163  return CGF.EmitLoadOfPointerLValue(
1164  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1165  getThreadIDVariable()->getType()->castAs<PointerType>());
1166 }
1167 
1168 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1169  if (!CGF.HaveInsertPoint())
1170  return;
1171  // 1.2.2 OpenMP Language Terminology
1172  // Structured block - An executable statement with a single entry at the
1173  // top and a single exit at the bottom.
1174  // The point of exit cannot be a branch out of the structured block.
1175  // longjmp() and throw() must not violate the entry/exit criteria.
1176  CGF.EHStack.pushTerminate();
1177  CodeGen(CGF);
1178  CGF.EHStack.popTerminate();
1179 }
1180 
1181 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1182  CodeGenFunction &CGF) {
1183  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1184  getThreadIDVariable()->getType(),
1186 }
1187 
1189  QualType FieldTy) {
1190  auto *Field = FieldDecl::Create(
1191  C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1193  /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1194  Field->setAccess(AS_public);
1195  DC->addDecl(Field);
1196  return Field;
1197 }
1198 
1199 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1200  StringRef Separator)
1201  : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1203  ASTContext &C = CGM.getContext();
1204  RecordDecl *RD = C.buildImplicitRecord("ident_t");
1205  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1206  RD->startDefinition();
1207  // reserved_1
1208  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1209  // flags
1210  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1211  // reserved_2
1212  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1213  // reserved_3
1214  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1215  // psource
1216  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1217  RD->completeDefinition();
1218  IdentQTy = C.getRecordType(RD);
1219  IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1220  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1221 
1223 }
1224 
1225 void CGOpenMPRuntime::clear() {
1226  InternalVars.clear();
1227  // Clean non-target variable declarations possibly used only in debug info.
1228  for (const auto &Data : EmittedNonTargetVariables) {
1229  if (!Data.getValue().pointsToAliveValue())
1230  continue;
1231  auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1232  if (!GV)
1233  continue;
1234  if (!GV->isDeclaration() || GV->getNumUses() > 0)
1235  continue;
1236  GV->eraseFromParent();
1237  }
1238 }
1239 
1240 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1241  SmallString<128> Buffer;
1242  llvm::raw_svector_ostream OS(Buffer);
1243  StringRef Sep = FirstSeparator;
1244  for (StringRef Part : Parts) {
1245  OS << Sep << Part;
1246  Sep = Separator;
1247  }
1248  return OS.str();
1249 }
1250 
1251 static llvm::Function *
1253  const Expr *CombinerInitializer, const VarDecl *In,
1254  const VarDecl *Out, bool IsCombiner) {
1255  // void .omp_combiner.(Ty *in, Ty *out);
1256  ASTContext &C = CGM.getContext();
1257  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1258  FunctionArgList Args;
1259  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1260  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1261  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1262  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1263  Args.push_back(&OmpOutParm);
1264  Args.push_back(&OmpInParm);
1265  const CGFunctionInfo &FnInfo =
1267  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1268  std::string Name = CGM.getOpenMPRuntime().getName(
1269  {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1271  Name, &CGM.getModule());
1272  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1273  Fn->removeFnAttr(llvm::Attribute::NoInline);
1274  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1275  Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1276  CodeGenFunction CGF(CGM);
1277  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1278  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1279  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1280  Out->getLocation());
1282  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1283  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1284  return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1285  .getAddress();
1286  });
1287  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1288  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1289  return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1290  .getAddress();
1291  });
1292  (void)Scope.Privatize();
1293  if (!IsCombiner && Out->hasInit() &&
1294  !CGF.isTrivialInitializer(Out->getInit())) {
1295  CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1296  Out->getType().getQualifiers(),
1297  /*IsInitializer=*/true);
1298  }
1299  if (CombinerInitializer)
1300  CGF.EmitIgnoredExpr(CombinerInitializer);
1301  Scope.ForceCleanup();
1302  CGF.FinishFunction();
1303  return Fn;
1304 }
1305 
1307  CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1308  if (UDRMap.count(D) > 0)
1309  return;
1310  llvm::Function *Combiner = emitCombinerOrInitializer(
1311  CGM, D->getType(), D->getCombiner(),
1312  cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1313  cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1314  /*IsCombiner=*/true);
1315  llvm::Function *Initializer = nullptr;
1316  if (const Expr *Init = D->getInitializer()) {
1317  Initializer = emitCombinerOrInitializer(
1318  CGM, D->getType(),
1320  : nullptr,
1321  cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1322  cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1323  /*IsCombiner=*/false);
1324  }
1325  UDRMap.try_emplace(D, Combiner, Initializer);
1326  if (CGF) {
1327  auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1328  Decls.second.push_back(D);
1329  }
1330 }
1331 
1332 std::pair<llvm::Function *, llvm::Function *>
1334  auto I = UDRMap.find(D);
1335  if (I != UDRMap.end())
1336  return I->second;
1337  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1338  return UDRMap.lookup(D);
1339 }
1340 
1342  CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1343  const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1344  const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1345  assert(ThreadIDVar->getType()->isPointerType() &&
1346  "thread id variable must be of type kmp_int32 *");
1347  CodeGenFunction CGF(CGM, true);
1348  bool HasCancel = false;
1349  if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1350  HasCancel = OPD->hasCancel();
1351  else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1352  HasCancel = OPSD->hasCancel();
1353  else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1354  HasCancel = OPFD->hasCancel();
1355  else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1356  HasCancel = OPFD->hasCancel();
1357  else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1358  HasCancel = OPFD->hasCancel();
1359  else if (const auto *OPFD =
1360  dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1361  HasCancel = OPFD->hasCancel();
1362  else if (const auto *OPFD =
1363  dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1364  HasCancel = OPFD->hasCancel();
1365  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1366  HasCancel, OutlinedHelperName);
1367  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1368  return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1369 }
1370 
1372  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1373  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1374  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1376  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1377 }
1378 
1380  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1381  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1382  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1384  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1385 }
1386 
1388  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1389  const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1390  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1391  bool Tied, unsigned &NumberOfParts) {
1392  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1393  PrePostActionTy &) {
1394  llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1395  llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1396  llvm::Value *TaskArgs[] = {
1397  UpLoc, ThreadID,
1398  CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1399  TaskTVar->getType()->castAs<PointerType>())
1400  .getPointer()};
1401  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1402  };
1403  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1404  UntiedCodeGen);
1405  CodeGen.setAction(Action);
1406  assert(!ThreadIDVar->getType()->isPointerType() &&
1407  "thread id variable must be of type kmp_int32 for tasks");
1408  const OpenMPDirectiveKind Region =
1409  isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1410  : OMPD_task;
1411  const CapturedStmt *CS = D.getCapturedStmt(Region);
1412  const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1413  CodeGenFunction CGF(CGM, true);
1414  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1415  InnermostKind,
1416  TD ? TD->hasCancel() : false, Action);
1417  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1418  llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1419  if (!Tied)
1420  NumberOfParts = Action.getNumberOfParts();
1421  return Res;
1422 }
1423 
1425  const RecordDecl *RD, const CGRecordLayout &RL,
1426  ArrayRef<llvm::Constant *> Data) {
1427  llvm::StructType *StructTy = RL.getLLVMType();
1428  unsigned PrevIdx = 0;
1429  ConstantInitBuilder CIBuilder(CGM);
1430  auto DI = Data.begin();
1431  for (const FieldDecl *FD : RD->fields()) {
1432  unsigned Idx = RL.getLLVMFieldNo(FD);
1433  // Fill the alignment.
1434  for (unsigned I = PrevIdx; I < Idx; ++I)
1435  Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1436  PrevIdx = Idx + 1;
1437  Fields.add(*DI);
1438  ++DI;
1439  }
1440 }
1441 
1442 template <class... As>
1443 static llvm::GlobalVariable *
1445  ArrayRef<llvm::Constant *> Data, const Twine &Name,
1446  As &&... Args) {
1447  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1448  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1449  ConstantInitBuilder CIBuilder(CGM);
1450  ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1451  buildStructValue(Fields, CGM, RD, RL, Data);
1452  return Fields.finishAndCreateGlobal(
1453  Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1454  std::forward<As>(Args)...);
1455 }
1456 
1457 template <typename T>
1458 static void
1460  ArrayRef<llvm::Constant *> Data,
1461  T &Parent) {
1462  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1463  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1464  ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1465  buildStructValue(Fields, CGM, RD, RL, Data);
1466  Fields.finishAndAddTo(Parent);
1467 }
1468 
1469 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1470  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1471  unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1472  FlagsTy FlagsKey(Flags, Reserved2Flags);
1473  llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1474  if (!Entry) {
1475  if (!DefaultOpenMPPSource) {
1476  // Initialize default location for psource field of ident_t structure of
1477  // all ident_t objects. Format is ";file;function;line;column;;".
1478  // Taken from
1479  // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1480  DefaultOpenMPPSource =
1481  CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1482  DefaultOpenMPPSource =
1483  llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1484  }
1485 
1486  llvm::Constant *Data[] = {
1487  llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1488  llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1489  llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1490  llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1491  llvm::GlobalValue *DefaultOpenMPLocation =
1492  createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1493  llvm::GlobalValue::PrivateLinkage);
1494  DefaultOpenMPLocation->setUnnamedAddr(
1495  llvm::GlobalValue::UnnamedAddr::Global);
1496 
1497  OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1498  }
1499  return Address(Entry, Align);
1500 }
1501 
1503  bool AtCurrentPoint) {
1504  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1505  assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1506 
1507  llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1508  if (AtCurrentPoint) {
1509  Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1510  Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1511  } else {
1512  Elem.second.ServiceInsertPt =
1513  new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1514  Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1515  }
1516 }
1517 
1519  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1520  if (Elem.second.ServiceInsertPt) {
1521  llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1522  Elem.second.ServiceInsertPt = nullptr;
1523  Ptr->eraseFromParent();
1524  }
1525 }
1526 
1528  SourceLocation Loc,
1529  unsigned Flags) {
1530  Flags |= OMP_IDENT_KMPC;
1531  // If no debug info is generated - return global default location.
1532  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1533  Loc.isInvalid())
1534  return getOrCreateDefaultLocation(Flags).getPointer();
1535 
1536  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1537 
1538  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1539  Address LocValue = Address::invalid();
1540  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1541  if (I != OpenMPLocThreadIDMap.end())
1542  LocValue = Address(I->second.DebugLoc, Align);
1543 
1544  // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1545  // GetOpenMPThreadID was called before this routine.
1546  if (!LocValue.isValid()) {
1547  // Generate "ident_t .kmpc_loc.addr;"
1548  Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1549  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1550  Elem.second.DebugLoc = AI.getPointer();
1551  LocValue = AI;
1552 
1553  if (!Elem.second.ServiceInsertPt)
1555  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1556  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1557  CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1558  CGF.getTypeSize(IdentQTy));
1559  }
1560 
1561  // char **psource = &.kmpc_loc_<flags>.addr.psource;
1562  LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1563  auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1564  LValue PSource =
1565  CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1566 
1567  llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1568  if (OMPDebugLoc == nullptr) {
1569  SmallString<128> Buffer2;
1570  llvm::raw_svector_ostream OS2(Buffer2);
1571  // Build debug location
1573  OS2 << ";" << PLoc.getFilename() << ";";
1574  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1575  OS2 << FD->getQualifiedNameAsString();
1576  OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1577  OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1578  OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1579  }
1580  // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1581  CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1582 
1583  // Our callers always pass this to a runtime function, so for
1584  // convenience, go ahead and return a naked pointer.
1585  return LocValue.getPointer();
1586 }
1587 
1589  SourceLocation Loc) {
1590  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1591 
1592  llvm::Value *ThreadID = nullptr;
1593  // Check whether we've already cached a load of the thread id in this
1594  // function.
1595  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1596  if (I != OpenMPLocThreadIDMap.end()) {
1597  ThreadID = I->second.ThreadID;
1598  if (ThreadID != nullptr)
1599  return ThreadID;
1600  }
1601  // If exceptions are enabled, do not use parameter to avoid possible crash.
1602  if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1603  !CGF.getLangOpts().CXXExceptions ||
1604  CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1605  if (auto *OMPRegionInfo =
1606  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1607  if (OMPRegionInfo->getThreadIDVariable()) {
1608  // Check if this an outlined function with thread id passed as argument.
1609  LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1610  ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1611  // If value loaded in entry block, cache it and use it everywhere in
1612  // function.
1613  if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1614  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1615  Elem.second.ThreadID = ThreadID;
1616  }
1617  return ThreadID;
1618  }
1619  }
1620  }
1621 
1622  // This is not an outlined function region - need to call __kmpc_int32
1623  // kmpc_global_thread_num(ident_t *loc).
1624  // Generate thread id value and cache this value for use across the
1625  // function.
1626  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1627  if (!Elem.second.ServiceInsertPt)
1629  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1630  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1631  llvm::CallInst *Call = CGF.Builder.CreateCall(
1633  emitUpdateLocation(CGF, Loc));
1634  Call->setCallingConv(CGF.getRuntimeCC());
1635  Elem.second.ThreadID = Call;
1636  return Call;
1637 }
1638 
1640  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1641  if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1643  OpenMPLocThreadIDMap.erase(CGF.CurFn);
1644  }
1645  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1646  for(auto *D : FunctionUDRMap[CGF.CurFn])
1647  UDRMap.erase(D);
1648  FunctionUDRMap.erase(CGF.CurFn);
1649  }
1650 }
1651 
1653  return IdentTy->getPointerTo();
1654 }
1655 
1657  if (!Kmpc_MicroTy) {
1658  // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1659  llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1660  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1661  Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1662  }
1663  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1664 }
1665 
1666 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1667  llvm::FunctionCallee RTLFn = nullptr;
1668  switch (static_cast<OpenMPRTLFunction>(Function)) {
1669  case OMPRTL__kmpc_fork_call: {
1670  // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1671  // microtask, ...);
1672  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1674  auto *FnTy =
1675  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1676  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1677  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1678  if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1679  llvm::LLVMContext &Ctx = F->getContext();
1680  llvm::MDBuilder MDB(Ctx);
1681  // Annotate the callback behavior of the __kmpc_fork_call:
1682  // - The callback callee is argument number 2 (microtask).
1683  // - The first two arguments of the callback callee are unknown (-1).
1684  // - All variadic arguments to the __kmpc_fork_call are passed to the
1685  // callback callee.
1686  F->addMetadata(
1687  llvm::LLVMContext::MD_callback,
1688  *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1689  2, {-1, -1},
1690  /* VarArgsArePassed */ true)}));
1691  }
1692  }
1693  break;
1694  }
1696  // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1697  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1698  auto *FnTy =
1699  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1700  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1701  break;
1702  }
1704  // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1705  // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1706  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1708  CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1709  auto *FnTy =
1710  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1711  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1712  break;
1713  }
1714  case OMPRTL__kmpc_critical: {
1715  // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1716  // kmp_critical_name *crit);
1717  llvm::Type *TypeParams[] = {
1719  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1720  auto *FnTy =
1721  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1722  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1723  break;
1724  }
1726  // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1727  // kmp_critical_name *crit, uintptr_t hint);
1728  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1729  llvm::PointerType::getUnqual(KmpCriticalNameTy),
1730  CGM.IntPtrTy};
1731  auto *FnTy =
1732  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1733  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1734  break;
1735  }
1737  // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1738  // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1739  // typedef void *(*kmpc_ctor)(void *);
1740  auto *KmpcCtorTy =
1741  llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1742  /*isVarArg*/ false)->getPointerTo();
1743  // typedef void *(*kmpc_cctor)(void *, void *);
1744  llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1745  auto *KmpcCopyCtorTy =
1746  llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1747  /*isVarArg*/ false)
1748  ->getPointerTo();
1749  // typedef void (*kmpc_dtor)(void *);
1750  auto *KmpcDtorTy =
1751  llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1752  ->getPointerTo();
1753  llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1754  KmpcCopyCtorTy, KmpcDtorTy};
1755  auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1756  /*isVarArg*/ false);
1757  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1758  break;
1759  }
1761  // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1762  // kmp_critical_name *crit);
1763  llvm::Type *TypeParams[] = {
1765  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1766  auto *FnTy =
1767  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1768  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1769  break;
1770  }
1772  // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1773  // global_tid);
1774  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1775  auto *FnTy =
1776  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1777  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1778  break;
1779  }
1780  case OMPRTL__kmpc_barrier: {
1781  // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1782  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1783  auto *FnTy =
1784  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1785  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1786  break;
1787  }
1789  // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1790  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1791  auto *FnTy =
1792  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1793  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1794  break;
1795  }
1797  // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1798  // kmp_int32 num_threads)
1799  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1800  CGM.Int32Ty};
1801  auto *FnTy =
1802  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1803  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1804  break;
1805  }
1807  // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1808  // global_tid);
1809  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1810  auto *FnTy =
1811  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1812  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1813  break;
1814  }
1816  // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1817  // global_tid);
1818  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1819  auto *FnTy =
1820  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1821  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1822  break;
1823  }
1824  case OMPRTL__kmpc_flush: {
1825  // Build void __kmpc_flush(ident_t *loc);
1826  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1827  auto *FnTy =
1828  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1829  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1830  break;
1831  }
1832  case OMPRTL__kmpc_master: {
1833  // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1834  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1835  auto *FnTy =
1836  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1837  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1838  break;
1839  }
1840  case OMPRTL__kmpc_end_master: {
1841  // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1842  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1843  auto *FnTy =
1844  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1845  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1846  break;
1847  }
1849  // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1850  // int end_part);
1851  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1852  auto *FnTy =
1853  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1854  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1855  break;
1856  }
1857  case OMPRTL__kmpc_single: {
1858  // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1859  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1860  auto *FnTy =
1861  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1862  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1863  break;
1864  }
1865  case OMPRTL__kmpc_end_single: {
1866  // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1867  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1868  auto *FnTy =
1869  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1870  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1871  break;
1872  }
1874  // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1875  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1876  // kmp_routine_entry_t *task_entry);
1877  assert(KmpRoutineEntryPtrTy != nullptr &&
1878  "Type kmp_routine_entry_t must be created.");
1879  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1880  CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1881  // Return void * and then cast to particular kmp_task_t type.
1882  auto *FnTy =
1883  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1884  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1885  break;
1886  }
1887  case OMPRTL__kmpc_omp_task: {
1888  // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1889  // *new_task);
1890  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1891  CGM.VoidPtrTy};
1892  auto *FnTy =
1893  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1894  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1895  break;
1896  }
1897  case OMPRTL__kmpc_copyprivate: {
1898  // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1899  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1900  // kmp_int32 didit);
1901  llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1902  auto *CpyFnTy =
1903  llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1904  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1905  CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1906  CGM.Int32Ty};
1907  auto *FnTy =
1908  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1909  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1910  break;
1911  }
1912  case OMPRTL__kmpc_reduce: {
1913  // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1914  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1915  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1916  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1917  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1918  /*isVarArg=*/false);
1919  llvm::Type *TypeParams[] = {
1921  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1922  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1923  auto *FnTy =
1924  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1925  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1926  break;
1927  }
1929  // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1930  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1931  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1932  // *lck);
1933  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1934  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1935  /*isVarArg=*/false);
1936  llvm::Type *TypeParams[] = {
1938  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1939  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1940  auto *FnTy =
1941  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1942  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1943  break;
1944  }
1945  case OMPRTL__kmpc_end_reduce: {
1946  // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1947  // kmp_critical_name *lck);
1948  llvm::Type *TypeParams[] = {
1950  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1951  auto *FnTy =
1952  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1953  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1954  break;
1955  }
1957  // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1958  // kmp_critical_name *lck);
1959  llvm::Type *TypeParams[] = {
1961  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1962  auto *FnTy =
1963  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1964  RTLFn =
1965  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1966  break;
1967  }
1969  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1970  // *new_task);
1971  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1972  CGM.VoidPtrTy};
1973  auto *FnTy =
1974  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1975  RTLFn =
1976  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1977  break;
1978  }
1980  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1981  // *new_task);
1982  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1983  CGM.VoidPtrTy};
1984  auto *FnTy =
1985  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1986  RTLFn = CGM.CreateRuntimeFunction(FnTy,
1987  /*Name=*/"__kmpc_omp_task_complete_if0");
1988  break;
1989  }
1990  case OMPRTL__kmpc_ordered: {
1991  // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1992  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1993  auto *FnTy =
1994  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1995  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1996  break;
1997  }
1998  case OMPRTL__kmpc_end_ordered: {
1999  // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2000  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2001  auto *FnTy =
2002  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2003  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2004  break;
2005  }
2007  // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2008  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2009  auto *FnTy =
2010  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2011  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2012  break;
2013  }
2014  case OMPRTL__kmpc_taskgroup: {
2015  // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2016  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2017  auto *FnTy =
2018  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2019  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2020  break;
2021  }
2023  // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2024  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2025  auto *FnTy =
2026  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2027  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2028  break;
2029  }
2031  // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2032  // int proc_bind)
2033  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2034  auto *FnTy =
2035  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2036  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2037  break;
2038  }
2040  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2041  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2042  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2043  llvm::Type *TypeParams[] = {
2046  auto *FnTy =
2047  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2048  RTLFn =
2049  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2050  break;
2051  }
2053  // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2054  // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2055  // kmp_depend_info_t *noalias_dep_list);
2056  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2059  auto *FnTy =
2060  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2061  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2062  break;
2063  }
2065  // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2066  // global_tid, kmp_int32 cncl_kind)
2067  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2068  auto *FnTy =
2069  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2070  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2071  break;
2072  }
2073  case OMPRTL__kmpc_cancel: {
2074  // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2075  // kmp_int32 cncl_kind)
2076  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2077  auto *FnTy =
2078  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2079  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2080  break;
2081  }
2083  // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2084  // kmp_int32 num_teams, kmp_int32 num_threads)
2085  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2086  CGM.Int32Ty};
2087  auto *FnTy =
2088  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2089  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2090  break;
2091  }
2092  case OMPRTL__kmpc_fork_teams: {
2093  // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2094  // microtask, ...);
2095  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2097  auto *FnTy =
2098  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2099  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2100  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2101  if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2102  llvm::LLVMContext &Ctx = F->getContext();
2103  llvm::MDBuilder MDB(Ctx);
2104  // Annotate the callback behavior of the __kmpc_fork_teams:
2105  // - The callback callee is argument number 2 (microtask).
2106  // - The first two arguments of the callback callee are unknown (-1).
2107  // - All variadic arguments to the __kmpc_fork_teams are passed to the
2108  // callback callee.
2109  F->addMetadata(
2110  llvm::LLVMContext::MD_callback,
2111  *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2112  2, {-1, -1},
2113  /* VarArgsArePassed */ true)}));
2114  }
2115  }
2116  break;
2117  }
2118  case OMPRTL__kmpc_taskloop: {
2119  // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2120  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2121  // sched, kmp_uint64 grainsize, void *task_dup);
2122  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2123  CGM.IntTy,
2124  CGM.VoidPtrTy,
2125  CGM.IntTy,
2126  CGM.Int64Ty->getPointerTo(),
2127  CGM.Int64Ty->getPointerTo(),
2128  CGM.Int64Ty,
2129  CGM.IntTy,
2130  CGM.IntTy,
2131  CGM.Int64Ty,
2132  CGM.VoidPtrTy};
2133  auto *FnTy =
2134  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2135  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2136  break;
2137  }
2139  // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2140  // num_dims, struct kmp_dim *dims);
2141  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2142  CGM.Int32Ty,
2143  CGM.Int32Ty,
2144  CGM.VoidPtrTy};
2145  auto *FnTy =
2146  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2147  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2148  break;
2149  }
2151  // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2152  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2153  auto *FnTy =
2154  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2155  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2156  break;
2157  }
2159  // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2160  // *vec);
2161  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2162  CGM.Int64Ty->getPointerTo()};
2163  auto *FnTy =
2164  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2165  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2166  break;
2167  }
2169  // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2170  // *vec);
2171  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2172  CGM.Int64Ty->getPointerTo()};
2173  auto *FnTy =
2174  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2175  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2176  break;
2177  }
2179  // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2180  // *data);
2181  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2182  auto *FnTy =
2183  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2184  RTLFn =
2185  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2186  break;
2187  }
2189  // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2190  // *d);
2191  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2192  auto *FnTy =
2193  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2194  RTLFn = CGM.CreateRuntimeFunction(
2195  FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2196  break;
2197  }
2199  // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2200  // size);
2201  llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2202  llvm::FunctionType *FnTy =
2203  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2204  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2205  break;
2206  }
2207  case OMPRTL__tgt_target: {
2208  // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2209  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2210  // *arg_types);
2211  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2212  CGM.VoidPtrTy,
2213  CGM.Int32Ty,
2214  CGM.VoidPtrPtrTy,
2215  CGM.VoidPtrPtrTy,
2216  CGM.SizeTy->getPointerTo(),
2217  CGM.Int64Ty->getPointerTo()};
2218  auto *FnTy =
2219  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2220  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2221  break;
2222  }
2224  // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2225  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2226  // int64_t *arg_types);
2227  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2228  CGM.VoidPtrTy,
2229  CGM.Int32Ty,
2230  CGM.VoidPtrPtrTy,
2231  CGM.VoidPtrPtrTy,
2232  CGM.SizeTy->getPointerTo(),
2233  CGM.Int64Ty->getPointerTo()};
2234  auto *FnTy =
2235  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2236  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2237  break;
2238  }
2239  case OMPRTL__tgt_target_teams: {
2240  // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2241  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2242  // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2243  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2244  CGM.VoidPtrTy,
2245  CGM.Int32Ty,
2246  CGM.VoidPtrPtrTy,
2247  CGM.VoidPtrPtrTy,
2248  CGM.SizeTy->getPointerTo(),
2249  CGM.Int64Ty->getPointerTo(),
2250  CGM.Int32Ty,
2251  CGM.Int32Ty};
2252  auto *FnTy =
2253  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2254  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2255  break;
2256  }
2258  // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2259  // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
2260  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2261  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2262  CGM.VoidPtrTy,
2263  CGM.Int32Ty,
2264  CGM.VoidPtrPtrTy,
2265  CGM.VoidPtrPtrTy,
2266  CGM.SizeTy->getPointerTo(),
2267  CGM.Int64Ty->getPointerTo(),
2268  CGM.Int32Ty,
2269  CGM.Int32Ty};
2270  auto *FnTy =
2271  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2272  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2273  break;
2274  }
2275  case OMPRTL__tgt_register_lib: {
2276  // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2277  QualType ParamTy =
2279  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2280  auto *FnTy =
2281  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2282  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2283  break;
2284  }
2286  // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2287  QualType ParamTy =
2289  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2290  auto *FnTy =
2291  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2292  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2293  break;
2294  }
2296  // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2297  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2298  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2299  CGM.Int32Ty,
2300  CGM.VoidPtrPtrTy,
2301  CGM.VoidPtrPtrTy,
2302  CGM.SizeTy->getPointerTo(),
2303  CGM.Int64Ty->getPointerTo()};
2304  auto *FnTy =
2305  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2306  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2307  break;
2308  }
2310  // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2311  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2312  // *arg_types);
2313  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2314  CGM.Int32Ty,
2315  CGM.VoidPtrPtrTy,
2316  CGM.VoidPtrPtrTy,
2317  CGM.SizeTy->getPointerTo(),
2318  CGM.Int64Ty->getPointerTo()};
2319  auto *FnTy =
2320  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2321  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2322  break;
2323  }
2325  // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2326  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2327  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2328  CGM.Int32Ty,
2329  CGM.VoidPtrPtrTy,
2330  CGM.VoidPtrPtrTy,
2331  CGM.SizeTy->getPointerTo(),
2332  CGM.Int64Ty->getPointerTo()};
2333  auto *FnTy =
2334  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2335  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2336  break;
2337  }
2339  // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2340  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2341  // *arg_types);
2342  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2343  CGM.Int32Ty,
2344  CGM.VoidPtrPtrTy,
2345  CGM.VoidPtrPtrTy,
2346  CGM.SizeTy->getPointerTo(),
2347  CGM.Int64Ty->getPointerTo()};
2348  auto *FnTy =
2349  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2350  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2351  break;
2352  }
2354  // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2355  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2356  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2357  CGM.Int32Ty,
2358  CGM.VoidPtrPtrTy,
2359  CGM.VoidPtrPtrTy,
2360  CGM.SizeTy->getPointerTo(),
2361  CGM.Int64Ty->getPointerTo()};
2362  auto *FnTy =
2363  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2364  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2365  break;
2366  }
2368  // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2369  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2370  // *arg_types);
2371  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2372  CGM.Int32Ty,
2373  CGM.VoidPtrPtrTy,
2374  CGM.VoidPtrPtrTy,
2375  CGM.SizeTy->getPointerTo(),
2376  CGM.Int64Ty->getPointerTo()};
2377  auto *FnTy =
2378  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2379  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2380  break;
2381  }
2382  }
2383  assert(RTLFn && "Unable to find OpenMP runtime function");
2384  return RTLFn;
2385 }
2386 
2387 llvm::FunctionCallee
2388 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2389  assert((IVSize == 32 || IVSize == 64) &&
2390  "IV size is not compatible with the omp runtime");
2391  StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2392  : "__kmpc_for_static_init_4u")
2393  : (IVSigned ? "__kmpc_for_static_init_8"
2394  : "__kmpc_for_static_init_8u");
2395  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2396  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2397  llvm::Type *TypeParams[] = {
2398  getIdentTyPointerTy(), // loc
2399  CGM.Int32Ty, // tid
2400  CGM.Int32Ty, // schedtype
2401  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2402  PtrTy, // p_lower
2403  PtrTy, // p_upper
2404  PtrTy, // p_stride
2405  ITy, // incr
2406  ITy // chunk
2407  };
2408  auto *FnTy =
2409  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2410  return CGM.CreateRuntimeFunction(FnTy, Name);
2411 }
2412 
2413 llvm::FunctionCallee
2414 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2415  assert((IVSize == 32 || IVSize == 64) &&
2416  "IV size is not compatible with the omp runtime");
2417  StringRef Name =
2418  IVSize == 32
2419  ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2420  : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2421  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2422  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2423  CGM.Int32Ty, // tid
2424  CGM.Int32Ty, // schedtype
2425  ITy, // lower
2426  ITy, // upper
2427  ITy, // stride
2428  ITy // chunk
2429  };
2430  auto *FnTy =
2431  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2432  return CGM.CreateRuntimeFunction(FnTy, Name);
2433 }
2434 
2435 llvm::FunctionCallee
2436 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2437  assert((IVSize == 32 || IVSize == 64) &&
2438  "IV size is not compatible with the omp runtime");
2439  StringRef Name =
2440  IVSize == 32
2441  ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2442  : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2443  llvm::Type *TypeParams[] = {
2444  getIdentTyPointerTy(), // loc
2445  CGM.Int32Ty, // tid
2446  };
2447  auto *FnTy =
2448  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2449  return CGM.CreateRuntimeFunction(FnTy, Name);
2450 }
2451 
2452 llvm::FunctionCallee
2453 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2454  assert((IVSize == 32 || IVSize == 64) &&
2455  "IV size is not compatible with the omp runtime");
2456  StringRef Name =
2457  IVSize == 32
2458  ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2459  : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2460  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2461  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2462  llvm::Type *TypeParams[] = {
2463  getIdentTyPointerTy(), // loc
2464  CGM.Int32Ty, // tid
2465  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2466  PtrTy, // p_lower
2467  PtrTy, // p_upper
2468  PtrTy // p_stride
2469  };
2470  auto *FnTy =
2471  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2472  return CGM.CreateRuntimeFunction(FnTy, Name);
2473 }
2474 
2476  if (CGM.getLangOpts().OpenMPSimd)
2477  return Address::invalid();
2479  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2480  if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) {
2481  SmallString<64> PtrName;
2482  {
2483  llvm::raw_svector_ostream OS(PtrName);
2484  OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr";
2485  }
2486  llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2487  if (!Ptr) {
2488  QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2490  PtrName);
2491  if (!CGM.getLangOpts().OpenMPIsDevice) {
2492  auto *GV = cast<llvm::GlobalVariable>(Ptr);
2493  GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2494  GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2495  }
2496  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2497  registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2498  }
2499  return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2500  }
2501  return Address::invalid();
2502 }
2503 
2504 llvm::Constant *
2506  assert(!CGM.getLangOpts().OpenMPUseTLS ||
2508  // Lookup the entry, lazily creating it if necessary.
2509  std::string Suffix = getName({"cache", ""});
2511  CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2512 }
2513 
2515  const VarDecl *VD,
2516  Address VDAddr,
2517  SourceLocation Loc) {
2518  if (CGM.getLangOpts().OpenMPUseTLS &&
2520  return VDAddr;
2521 
2522  llvm::Type *VarTy = VDAddr.getElementType();
2523  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2524  CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2525  CGM.Int8PtrTy),
2528  return Address(CGF.EmitRuntimeCall(
2530  VDAddr.getAlignment());
2531 }
2532 
2534  CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2535  llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2536  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2537  // library.
2538  llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2540  OMPLoc);
2541  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2542  // to register constructor/destructor for variable.
2543  llvm::Value *Args[] = {
2544  OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2545  Ctor, CopyCtor, Dtor};
2546  CGF.EmitRuntimeCall(
2548 }
2549 
2551  const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2552  bool PerformInit, CodeGenFunction *CGF) {
2553  if (CGM.getLangOpts().OpenMPUseTLS &&
2555  return nullptr;
2556 
2557  VD = VD->getDefinition(CGM.getContext());
2558  if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2559  QualType ASTTy = VD->getType();
2560 
2561  llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2562  const Expr *Init = VD->getAnyInitializer();
2563  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2564  // Generate function that re-emits the declaration's initializer into the
2565  // threadprivate copy of the variable VD
2566  CodeGenFunction CtorCGF(CGM);
2567  FunctionArgList Args;
2568  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2569  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2571  Args.push_back(&Dst);
2572 
2573  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2574  CGM.getContext().VoidPtrTy, Args);
2575  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2576  std::string Name = getName({"__kmpc_global_ctor_", ""});
2577  llvm::Function *Fn =
2578  CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2579  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2580  Args, Loc, Loc);
2581  llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2582  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2583  CGM.getContext().VoidPtrTy, Dst.getLocation());
2584  Address Arg = Address(ArgVal, VDAddr.getAlignment());
2585  Arg = CtorCGF.Builder.CreateElementBitCast(
2586  Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2587  CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2588  /*IsInitializer=*/true);
2589  ArgVal = CtorCGF.EmitLoadOfScalar(
2590  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2591  CGM.getContext().VoidPtrTy, Dst.getLocation());
2592  CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2593  CtorCGF.FinishFunction();
2594  Ctor = Fn;
2595  }
2596  if (VD->getType().isDestructedType() != QualType::DK_none) {
2597  // Generate function that emits destructor call for the threadprivate copy
2598  // of the variable VD
2599  CodeGenFunction DtorCGF(CGM);
2600  FunctionArgList Args;
2601  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2602  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2604  Args.push_back(&Dst);
2605 
2606  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2607  CGM.getContext().VoidTy, Args);
2608  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2609  std::string Name = getName({"__kmpc_global_dtor_", ""});
2610  llvm::Function *Fn =
2611  CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2612  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2613  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2614  Loc, Loc);
2615  // Create a scope with an artificial location for the body of this function.
2616  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2617  llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2618  DtorCGF.GetAddrOfLocalVar(&Dst),
2619  /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2620  DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2621  DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2622  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2623  DtorCGF.FinishFunction();
2624  Dtor = Fn;
2625  }
2626  // Do not emit init function if it is not required.
2627  if (!Ctor && !Dtor)
2628  return nullptr;
2629 
2630  llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2631  auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2632  /*isVarArg=*/false)
2633  ->getPointerTo();
2634  // Copying constructor for the threadprivate variable.
2635  // Must be NULL - reserved by runtime, but currently it requires that this
2636  // parameter is always NULL. Otherwise it fires assertion.
2637  CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2638  if (Ctor == nullptr) {
2639  auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2640  /*isVarArg=*/false)
2641  ->getPointerTo();
2642  Ctor = llvm::Constant::getNullValue(CtorTy);
2643  }
2644  if (Dtor == nullptr) {
2645  auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2646  /*isVarArg=*/false)
2647  ->getPointerTo();
2648  Dtor = llvm::Constant::getNullValue(DtorTy);
2649  }
2650  if (!CGF) {
2651  auto *InitFunctionTy =
2652  llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2653  std::string Name = getName({"__omp_threadprivate_init_", ""});
2654  llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2655  InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2656  CodeGenFunction InitCGF(CGM);
2657  FunctionArgList ArgList;
2658  InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2659  CGM.getTypes().arrangeNullaryFunction(), ArgList,
2660  Loc, Loc);
2661  emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2662  InitCGF.FinishFunction();
2663  return InitFunction;
2664  }
2665  emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2666  }
2667  return nullptr;
2668 }
2669 
2670 /// Obtain information that uniquely identifies a target entry. This
2671 /// consists of the file and device IDs as well as line number associated with
2672 /// the relevant entry source location.
2674  unsigned &DeviceID, unsigned &FileID,
2675  unsigned &LineNum) {
2677 
2678  // The loc should be always valid and have a file ID (the user cannot use
2679  // #pragma directives in macros)
2680 
2681  assert(Loc.isValid() && "Source location is expected to be always valid.");
2682 
2683  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2684  assert(PLoc.isValid() && "Source location is expected to be always valid.");
2685 
2686  llvm::sys::fs::UniqueID ID;
2687  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2688  SM.getDiagnostics().Report(diag::err_cannot_open_file)
2689  << PLoc.getFilename() << EC.message();
2690 
2691  DeviceID = ID.getDevice();
2692  FileID = ID.getFile();
2693  LineNum = PLoc.getLine();
2694 }
2695 
2697  llvm::GlobalVariable *Addr,
2698  bool PerformInit) {
2700  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2701  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
2702  return CGM.getLangOpts().OpenMPIsDevice;
2703  VD = VD->getDefinition(CGM.getContext());
2704  if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2705  return CGM.getLangOpts().OpenMPIsDevice;
2706 
2707  QualType ASTTy = VD->getType();
2708 
2710  // Produce the unique prefix to identify the new target regions. We use
2711  // the source location of the variable declaration which we know to not
2712  // conflict with any target region.
2713  unsigned DeviceID;
2714  unsigned FileID;
2715  unsigned Line;
2716  getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2717  SmallString<128> Buffer, Out;
2718  {
2719  llvm::raw_svector_ostream OS(Buffer);
2720  OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2721  << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2722  }
2723 
2724  const Expr *Init = VD->getAnyInitializer();
2725  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2726  llvm::Constant *Ctor;
2727  llvm::Constant *ID;
2728  if (CGM.getLangOpts().OpenMPIsDevice) {
2729  // Generate function that re-emits the declaration's initializer into
2730  // the threadprivate copy of the variable VD
2731  CodeGenFunction CtorCGF(CGM);
2732 
2734  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2735  llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2736  FTy, Twine(Buffer, "_ctor"), FI, Loc);
2737  auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2738  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2739  FunctionArgList(), Loc, Loc);
2740  auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2741  CtorCGF.EmitAnyExprToMem(Init,
2742  Address(Addr, CGM.getContext().getDeclAlign(VD)),
2743  Init->getType().getQualifiers(),
2744  /*IsInitializer=*/true);
2745  CtorCGF.FinishFunction();
2746  Ctor = Fn;
2747  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2748  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2749  } else {
2750  Ctor = new llvm::GlobalVariable(
2751  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2752  llvm::GlobalValue::PrivateLinkage,
2753  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2754  ID = Ctor;
2755  }
2756 
2757  // Register the information for the entry associated with the constructor.
2758  Out.clear();
2760  DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2762  }
2763  if (VD->getType().isDestructedType() != QualType::DK_none) {
2764  llvm::Constant *Dtor;
2765  llvm::Constant *ID;
2766  if (CGM.getLangOpts().OpenMPIsDevice) {
2767  // Generate function that emits destructor call for the threadprivate
2768  // copy of the variable VD
2769  CodeGenFunction DtorCGF(CGM);
2770 
2772  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2773  llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2774  FTy, Twine(Buffer, "_dtor"), FI, Loc);
2775  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2776  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2777  FunctionArgList(), Loc, Loc);
2778  // Create a scope with an artificial location for the body of this
2779  // function.
2780  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2781  DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2782  ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2783  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2784  DtorCGF.FinishFunction();
2785  Dtor = Fn;
2786  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2787  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2788  } else {
2789  Dtor = new llvm::GlobalVariable(
2790  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2791  llvm::GlobalValue::PrivateLinkage,
2792  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2793  ID = Dtor;
2794  }
2795  // Register the information for the entry associated with the destructor.
2796  Out.clear();
2798  DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2800  }
2801  return CGM.getLangOpts().OpenMPIsDevice;
2802 }
2803 
2805  QualType VarType,
2806  StringRef Name) {
2807  std::string Suffix = getName({"artificial", ""});
2808  std::string CacheSuffix = getName({"cache", ""});
2809  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2810  llvm::Value *GAddr =
2811  getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2812  llvm::Value *Args[] = {
2814  getThreadID(CGF, SourceLocation()),
2816  CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2817  /*IsSigned=*/false),
2819  CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2820  return Address(
2822  CGF.EmitRuntimeCall(
2824  VarLVType->getPointerTo(/*AddrSpace=*/0)),
2825  CGM.getPointerAlign());
2826 }
2827 
2829  const RegionCodeGenTy &ThenGen,
2830  const RegionCodeGenTy &ElseGen) {
2831  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2832 
2833  // If the condition constant folds and can be elided, try to avoid emitting
2834  // the condition and the dead arm of the if/else.
2835  bool CondConstant;
2836  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2837  if (CondConstant)
2838  ThenGen(CGF);
2839  else
2840  ElseGen(CGF);
2841  return;
2842  }
2843 
2844  // Otherwise, the condition did not fold, or we couldn't elide it. Just
2845  // emit the conditional branch.
2846  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2847  llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2848  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2849  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2850 
2851  // Emit the 'then' code.
2852  CGF.EmitBlock(ThenBlock);
2853  ThenGen(CGF);
2854  CGF.EmitBranch(ContBlock);
2855  // Emit the 'else' code if present.
2856  // There is no need to emit line number for unconditional branch.
2858  CGF.EmitBlock(ElseBlock);
2859  ElseGen(CGF);
2860  // There is no need to emit line number for unconditional branch.
2862  CGF.EmitBranch(ContBlock);
2863  // Emit the continuation block for code after the if.
2864  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2865 }
2866 
2868  llvm::Function *OutlinedFn,
2869  ArrayRef<llvm::Value *> CapturedVars,
2870  const Expr *IfCond) {
2871  if (!CGF.HaveInsertPoint())
2872  return;
2873  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2874  auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2875  PrePostActionTy &) {
2876  // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2877  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2878  llvm::Value *Args[] = {
2879  RTLoc,
2880  CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2881  CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2883  RealArgs.append(std::begin(Args), std::end(Args));
2884  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2885 
2886  llvm::FunctionCallee RTLFn =
2887  RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2888  CGF.EmitRuntimeCall(RTLFn, RealArgs);
2889  };
2890  auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2891  PrePostActionTy &) {
2892  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2893  llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2894  // Build calls:
2895  // __kmpc_serialized_parallel(&Loc, GTid);
2896  llvm::Value *Args[] = {RTLoc, ThreadID};
2897  CGF.EmitRuntimeCall(
2898  RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2899 
2900  // OutlinedFn(&GTid, &zero, CapturedStruct);
2901  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2902  /*Name*/ ".zero.addr");
2903  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2904  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2905  // ThreadId for serialized parallels is 0.
2906  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2907  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2908  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2909  RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2910 
2911  // __kmpc_end_serialized_parallel(&Loc, GTid);
2912  llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2913  CGF.EmitRuntimeCall(
2914  RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2915  EndArgs);
2916  };
2917  if (IfCond) {
2918  emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2919  } else {
2920  RegionCodeGenTy ThenRCG(ThenGen);
2921  ThenRCG(CGF);
2922  }
2923 }
2924 
2925 // If we're inside an (outlined) parallel region, use the region info's
2926 // thread-ID variable (it is passed in a first argument of the outlined function
2927 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2928 // regular serial code region, get thread ID by calling kmp_int32
2929 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2930 // return the address of that temp.
2932  SourceLocation Loc) {
2933  if (auto *OMPRegionInfo =
2934  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2935  if (OMPRegionInfo->getThreadIDVariable())
2936  return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2937 
2938  llvm::Value *ThreadID = getThreadID(CGF, Loc);
2939  QualType Int32Ty =
2940  CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2941  Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2942  CGF.EmitStoreOfScalar(ThreadID,
2943  CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2944 
2945  return ThreadIDTemp;
2946 }
2947 
2948 llvm::Constant *
2950  const llvm::Twine &Name) {
2951  SmallString<256> Buffer;
2952  llvm::raw_svector_ostream Out(Buffer);
2953  Out << Name;
2954  StringRef RuntimeName = Out.str();
2955  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2956  if (Elem.second) {
2957  assert(Elem.second->getType()->getPointerElementType() == Ty &&
2958  "OMP internal variable has different type than requested");
2959  return &*Elem.second;
2960  }
2961 
2962  return Elem.second = new llvm::GlobalVariable(
2963  CGM.getModule(), Ty, /*IsConstant*/ false,
2964  llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2965  Elem.first());
2966 }
2967 
2969  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2970  std::string Name = getName({Prefix, "var"});
2971  return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2972 }
2973 
2974 namespace {
2975 /// Common pre(post)-action for different OpenMP constructs.
2976 class CommonActionTy final : public PrePostActionTy {
2977  llvm::FunctionCallee EnterCallee;
2978  ArrayRef<llvm::Value *> EnterArgs;
2979  llvm::FunctionCallee ExitCallee;
2980  ArrayRef<llvm::Value *> ExitArgs;
2981  bool Conditional;
2982  llvm::BasicBlock *ContBlock = nullptr;
2983 
2984 public:
2985  CommonActionTy(llvm::FunctionCallee EnterCallee,
2986  ArrayRef<llvm::Value *> EnterArgs,
2987  llvm::FunctionCallee ExitCallee,
2988  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2989  : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2990  ExitArgs(ExitArgs), Conditional(Conditional) {}
2991  void Enter(CodeGenFunction &CGF) override {
2992  llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2993  if (Conditional) {
2994  llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2995  auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2996  ContBlock = CGF.createBasicBlock("omp_if.end");
2997  // Generate the branch (If-stmt)
2998  CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2999  CGF.EmitBlock(ThenBlock);
3000  }
3001  }
3002  void Done(CodeGenFunction &CGF) {
3003  // Emit the rest of blocks/branches
3004  CGF.EmitBranch(ContBlock);
3005  CGF.EmitBlock(ContBlock, true);
3006  }
3007  void Exit(CodeGenFunction &CGF) override {
3008  CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3009  }
3010 };
3011 } // anonymous namespace
3012 
3014  StringRef CriticalName,
3015  const RegionCodeGenTy &CriticalOpGen,
3016  SourceLocation Loc, const Expr *Hint) {
3017  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3018  // CriticalOpGen();
3019  // __kmpc_end_critical(ident_t *, gtid, Lock);
3020  // Prepare arguments and build a call to __kmpc_critical
3021  if (!CGF.HaveInsertPoint())
3022  return;
3023  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3024  getCriticalRegionLock(CriticalName)};
3025  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3026  std::end(Args));
3027  if (Hint) {
3028  EnterArgs.push_back(CGF.Builder.CreateIntCast(
3029  CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3030  }
3031  CommonActionTy Action(
3035  CriticalOpGen.setAction(Action);
3036  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3037 }
3038 
3040  const RegionCodeGenTy &MasterOpGen,
3041  SourceLocation Loc) {
3042  if (!CGF.HaveInsertPoint())
3043  return;
3044  // if(__kmpc_master(ident_t *, gtid)) {
3045  // MasterOpGen();
3046  // __kmpc_end_master(ident_t *, gtid);
3047  // }
3048  // Prepare arguments and build a call to __kmpc_master
3049  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3050  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3052  /*Conditional=*/true);
3053  MasterOpGen.setAction(Action);
3054  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3055  Action.Done(CGF);
3056 }
3057 
3059  SourceLocation Loc) {
3060  if (!CGF.HaveInsertPoint())
3061  return;
3062  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3063  llvm::Value *Args[] = {
3064  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3065  llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3067  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3068  Region->emitUntiedSwitch(CGF);
3069 }
3070 
3072  const RegionCodeGenTy &TaskgroupOpGen,
3073  SourceLocation Loc) {
3074  if (!CGF.HaveInsertPoint())
3075  return;
3076  // __kmpc_taskgroup(ident_t *, gtid);
3077  // TaskgroupOpGen();
3078  // __kmpc_end_taskgroup(ident_t *, gtid);
3079  // Prepare arguments and build a call to __kmpc_taskgroup
3080  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3081  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3083  Args);
3084  TaskgroupOpGen.setAction(Action);
3085  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3086 }
3087 
3088 /// Given an array of pointers to variables, project the address of a
3089 /// given variable.
3091  unsigned Index, const VarDecl *Var) {
3092  // Pull out the pointer to the variable.
3093  Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3094  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3095 
3096  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3097  Addr = CGF.Builder.CreateElementBitCast(
3098  Addr, CGF.ConvertTypeForMem(Var->getType()));
3099  return Addr;
3100 }
3101 
3103  CodeGenModule &CGM, llvm::Type *ArgsType,
3104  ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3105  ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3106  SourceLocation Loc) {
3107  ASTContext &C = CGM.getContext();
3108  // void copy_func(void *LHSArg, void *RHSArg);
3109  FunctionArgList Args;
3110  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3112  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3114  Args.push_back(&LHSArg);
3115  Args.push_back(&RHSArg);
3116  const auto &CGFI =
3118  std::string Name =
3119  CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3120  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3122  &CGM.getModule());
3123  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3124  Fn->setDoesNotRecurse();
3125  CodeGenFunction CGF(CGM);
3126  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3127  // Dest = (void*[n])(LHSArg);
3128  // Src = (void*[n])(RHSArg);
3130  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3131  ArgsType), CGF.getPointerAlign());
3133  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3134  ArgsType), CGF.getPointerAlign());
3135  // *(Type0*)Dst[0] = *(Type0*)Src[0];
3136  // *(Type1*)Dst[1] = *(Type1*)Src[1];
3137  // ...
3138  // *(Typen*)Dst[n] = *(Typen*)Src[n];
3139  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3140  const auto *DestVar =
3141  cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3142  Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3143 
3144  const auto *SrcVar =
3145  cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3146  Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3147 
3148  const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3149  QualType Type = VD->getType();
3150  CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3151  }
3152  CGF.FinishFunction();
3153  return Fn;
3154 }
3155 
3157  const RegionCodeGenTy &SingleOpGen,
3158  SourceLocation Loc,
3159  ArrayRef<const Expr *> CopyprivateVars,
3160  ArrayRef<const Expr *> SrcExprs,
3161  ArrayRef<const Expr *> DstExprs,
3162  ArrayRef<const Expr *> AssignmentOps) {
3163  if (!CGF.HaveInsertPoint())
3164  return;
3165  assert(CopyprivateVars.size() == SrcExprs.size() &&
3166  CopyprivateVars.size() == DstExprs.size() &&
3167  CopyprivateVars.size() == AssignmentOps.size());
3168  ASTContext &C = CGM.getContext();
3169  // int32 did_it = 0;
3170  // if(__kmpc_single(ident_t *, gtid)) {
3171  // SingleOpGen();
3172  // __kmpc_end_single(ident_t *, gtid);
3173  // did_it = 1;
3174  // }
3175  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3176  // <copy_func>, did_it);
3177 
3178  Address DidIt = Address::invalid();
3179  if (!CopyprivateVars.empty()) {
3180  // int32 did_it = 0;
3181  QualType KmpInt32Ty =
3182  C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3183  DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3184  CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3185  }
3186  // Prepare arguments and build a call to __kmpc_single
3187  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3188  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3190  /*Conditional=*/true);
3191  SingleOpGen.setAction(Action);
3192  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3193  if (DidIt.isValid()) {
3194  // did_it = 1;
3195  CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3196  }
3197  Action.Done(CGF);
3198  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3199  // <copy_func>, did_it);
3200  if (DidIt.isValid()) {
3201  llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3202  QualType CopyprivateArrayTy =
3203  C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3204  /*IndexTypeQuals=*/0);
3205  // Create a list of all private variables for copyprivate.
3206  Address CopyprivateList =
3207  CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3208  for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3209  Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3210  CGF.Builder.CreateStore(
3212  CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3213  Elem);
3214  }
3215  // Build function that copies private values from single region to all other
3216  // threads in the corresponding parallel region.
3218  CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3219  CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3220  llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3221  Address CL =
3222  CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3223  CGF.VoidPtrTy);
3224  llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3225  llvm::Value *Args[] = {
3226  emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3227  getThreadID(CGF, Loc), // i32 <gtid>
3228  BufSize, // size_t <buf_size>
3229  CL.getPointer(), // void *<copyprivate list>
3230  CpyFn, // void (*) (void *, void *) <copy_func>
3231  DidItVal // i32 did_it
3232  };
3234  }
3235 }
3236 
3238  const RegionCodeGenTy &OrderedOpGen,
3239  SourceLocation Loc, bool IsThreads) {
3240  if (!CGF.HaveInsertPoint())
3241  return;
3242  // __kmpc_ordered(ident_t *, gtid);
3243  // OrderedOpGen();
3244  // __kmpc_end_ordered(ident_t *, gtid);
3245  // Prepare arguments and build a call to __kmpc_ordered
3246  if (IsThreads) {
3247  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3248  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3250  Args);
3251  OrderedOpGen.setAction(Action);
3252  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3253  return;
3254  }
3255  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3256 }
3257 
3259  unsigned Flags;
3260  if (Kind == OMPD_for)
3261  Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3262  else if (Kind == OMPD_sections)
3263  Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3264  else if (Kind == OMPD_single)
3265  Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3266  else if (Kind == OMPD_barrier)
3267  Flags = OMP_IDENT_BARRIER_EXPL;
3268  else
3269  Flags = OMP_IDENT_BARRIER_IMPL;
3270  return Flags;
3271 }
3272 
3274  OpenMPDirectiveKind Kind, bool EmitChecks,
3275  bool ForceSimpleCall) {
3276  if (!CGF.HaveInsertPoint())
3277  return;
3278  // Build call __kmpc_cancel_barrier(loc, thread_id);
3279  // Build call __kmpc_barrier(loc, thread_id);
3280  unsigned Flags = getDefaultFlagsForBarriers(Kind);
3281  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3282  // thread_id);
3283  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3284  getThreadID(CGF, Loc)};
3285  if (auto *OMPRegionInfo =
3286  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3287  if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3288  llvm::Value *Result = CGF.EmitRuntimeCall(
3290  if (EmitChecks) {
3291  // if (__kmpc_cancel_barrier()) {
3292  // exit from construct;
3293  // }
3294  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3295  llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3296  llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3297  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3298  CGF.EmitBlock(ExitBB);
3299  // exit from construct;
3300  CodeGenFunction::JumpDest CancelDestination =
3301  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3302  CGF.EmitBranchThroughCleanup(CancelDestination);
3303  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3304  }
3305  return;
3306  }
3307  }
3309 }
3310 
3311 /// Map the OpenMP loop schedule to the runtime enumeration.
3313  bool Chunked, bool Ordered) {
3314  switch (ScheduleKind) {
3315  case OMPC_SCHEDULE_static:
3316  return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3317  : (Ordered ? OMP_ord_static : OMP_sch_static);
3318  case OMPC_SCHEDULE_dynamic:
3320  case OMPC_SCHEDULE_guided:
3322  case OMPC_SCHEDULE_runtime:
3323  return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3324  case OMPC_SCHEDULE_auto:
3325  return Ordered ? OMP_ord_auto : OMP_sch_auto;
3326  case OMPC_SCHEDULE_unknown:
3327  assert(!Chunked && "chunk was specified but schedule kind not known");
3328  return Ordered ? OMP_ord_static : OMP_sch_static;
3329  }
3330  llvm_unreachable("Unexpected runtime schedule");
3331 }
3332 
3333 /// Map the OpenMP distribute schedule to the runtime enumeration.
3334 static OpenMPSchedType
3336  // only static is allowed for dist_schedule
3338 }
3339 
3341  bool Chunked) const {
3342  OpenMPSchedType Schedule =
3343  getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3344  return Schedule == OMP_sch_static;
3345 }
3346 
3348  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3349  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3350  return Schedule == OMP_dist_sch_static;
3351 }
3352 
3354  bool Chunked) const {
3355  OpenMPSchedType Schedule =
3356  getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3357  return Schedule == OMP_sch_static_chunked;
3358 }
3359 
3361  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3362  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3363  return Schedule == OMP_dist_sch_static_chunked;
3364 }
3365 
3367  OpenMPSchedType Schedule =
3368  getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3369  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3370  return Schedule != OMP_sch_static;
3371 }
3372 
3376  int Modifier = 0;
3377  switch (M1) {
3378  case OMPC_SCHEDULE_MODIFIER_monotonic:
3379  Modifier = OMP_sch_modifier_monotonic;
3380  break;
3381  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3382  Modifier = OMP_sch_modifier_nonmonotonic;
3383  break;
3384  case OMPC_SCHEDULE_MODIFIER_simd:
3385  if (Schedule == OMP_sch_static_chunked)
3387  break;
3390  break;
3391  }
3392  switch (M2) {
3393  case OMPC_SCHEDULE_MODIFIER_monotonic:
3394  Modifier = OMP_sch_modifier_monotonic;
3395  break;
3396  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3397  Modifier = OMP_sch_modifier_nonmonotonic;
3398  break;
3399  case OMPC_SCHEDULE_MODIFIER_simd:
3400  if (Schedule == OMP_sch_static_chunked)
3402  break;
3405  break;
3406  }
3407  return Schedule | Modifier;
3408 }
3409 
3411  CodeGenFunction &CGF, SourceLocation Loc,
3412  const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3413  bool Ordered, const DispatchRTInput &DispatchValues) {
3414  if (!CGF.HaveInsertPoint())
3415  return;
3417  ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3418  assert(Ordered ||
3419  (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3420  Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3421  Schedule != OMP_sch_static_balanced_chunked));
3422  // Call __kmpc_dispatch_init(
3423  // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3424  // kmp_int[32|64] lower, kmp_int[32|64] upper,
3425  // kmp_int[32|64] stride, kmp_int[32|64] chunk);
3426 
3427  // If the Chunk was not specified in the clause - use default value 1.
3428  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3429  : CGF.Builder.getIntN(IVSize, 1);
3430  llvm::Value *Args[] = {
3431  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3432  CGF.Builder.getInt32(addMonoNonMonoModifier(
3433  Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3434  DispatchValues.LB, // Lower
3435  DispatchValues.UB, // Upper
3436  CGF.Builder.getIntN(IVSize, 1), // Stride
3437  Chunk // Chunk
3438  };
3439  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3440 }
3441 
3443  CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3444  llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3446  const CGOpenMPRuntime::StaticRTInput &Values) {
3447  if (!CGF.HaveInsertPoint())
3448  return;
3449 
3450  assert(!Values.Ordered);
3451  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3452  Schedule == OMP_sch_static_balanced_chunked ||
3453  Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3454  Schedule == OMP_dist_sch_static ||
3455  Schedule == OMP_dist_sch_static_chunked);
3456 
3457  // Call __kmpc_for_static_init(
3458  // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3459  // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3460  // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3461  // kmp_int[32|64] incr, kmp_int[32|64] chunk);
3462  llvm::Value *Chunk = Values.Chunk;
3463  if (Chunk == nullptr) {
3464  assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3465  Schedule == OMP_dist_sch_static) &&
3466  "expected static non-chunked schedule");
3467  // If the Chunk was not specified in the clause - use default value 1.
3468  Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3469  } else {
3470  assert((Schedule == OMP_sch_static_chunked ||
3471  Schedule == OMP_sch_static_balanced_chunked ||
3472  Schedule == OMP_ord_static_chunked ||
3473  Schedule == OMP_dist_sch_static_chunked) &&
3474  "expected static chunked schedule");
3475  }
3476  llvm::Value *Args[] = {
3477  UpdateLocation,
3478  ThreadId,
3479  CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3480  M2)), // Schedule type
3481  Values.IL.getPointer(), // &isLastIter
3482  Values.LB.getPointer(), // &LB
3483  Values.UB.getPointer(), // &UB
3484  Values.ST.getPointer(), // &Stride
3485  CGF.Builder.getIntN(Values.IVSize, 1), // Incr
3486  Chunk // Chunk
3487  };
3488  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3489 }
3490 
3492  SourceLocation Loc,
3493  OpenMPDirectiveKind DKind,
3494  const OpenMPScheduleTy &ScheduleKind,
3495  const StaticRTInput &Values) {
3496  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3497  ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3498  assert(isOpenMPWorksharingDirective(DKind) &&
3499  "Expected loop-based or sections-based directive.");
3500  llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3501  isOpenMPLoopDirective(DKind)
3502  ? OMP_IDENT_WORK_LOOP
3503  : OMP_IDENT_WORK_SECTIONS);
3504  llvm::Value *ThreadId = getThreadID(CGF, Loc);
3505  llvm::FunctionCallee StaticInitFunction =
3507  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3508  ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3509 }
3510 
3512  CodeGenFunction &CGF, SourceLocation Loc,
3513  OpenMPDistScheduleClauseKind SchedKind,
3514  const CGOpenMPRuntime::StaticRTInput &Values) {
3515  OpenMPSchedType ScheduleNum =
3516  getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3517  llvm::Value *UpdatedLocation =
3518  emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3519  llvm::Value *ThreadId = getThreadID(CGF, Loc);
3520  llvm::FunctionCallee StaticInitFunction =
3521  createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3522  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3523  ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3525 }
3526 
3528  SourceLocation Loc,
3529  OpenMPDirectiveKind DKind) {
3530  if (!CGF.HaveInsertPoint())
3531  return;
3532  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3533  llvm::Value *Args[] = {
3534  emitUpdateLocation(CGF, Loc,
3536  ? OMP_IDENT_WORK_DISTRIBUTE
3537  : isOpenMPLoopDirective(DKind)
3538  ? OMP_IDENT_WORK_LOOP
3539  : OMP_IDENT_WORK_SECTIONS),
3540  getThreadID(CGF, Loc)};
3542  Args);
3543 }
3544 
3546  SourceLocation Loc,
3547  unsigned IVSize,
3548  bool IVSigned) {
3549  if (!CGF.HaveInsertPoint())
3550  return;
3551  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3552  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3553  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3554 }
3555 
3557  SourceLocation Loc, unsigned IVSize,
3558  bool IVSigned, Address IL,
3559  Address LB, Address UB,
3560  Address ST) {
3561  // Call __kmpc_dispatch_next(
3562  // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3563  // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3564  // kmp_int[32|64] *p_stride);
3565  llvm::Value *Args[] = {
3566  emitUpdateLocation(CGF, Loc),
3567  getThreadID(CGF, Loc),
3568  IL.getPointer(), // &isLastIter
3569  LB.getPointer(), // &Lower
3570  UB.getPointer(), // &Upper
3571  ST.getPointer() // &Stride
3572  };
3573  llvm::Value *Call =
3574  CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3575  return CGF.EmitScalarConversion(
3576  Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3577  CGF.getContext().BoolTy, Loc);
3578 }
3579 
3581  llvm::Value *NumThreads,
3582  SourceLocation Loc) {
3583  if (!CGF.HaveInsertPoint())
3584  return;
3585  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3586  llvm::Value *Args[] = {
3587  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3588  CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3590  Args);
3591 }
3592 
3594  OpenMPProcBindClauseKind ProcBind,
3595  SourceLocation Loc) {
3596  if (!CGF.HaveInsertPoint())
3597  return;
3598  // Constants for proc bind value accepted by the runtime.
3599  enum ProcBindTy {
3600  ProcBindFalse = 0,
3601  ProcBindTrue,
3602  ProcBindMaster,
3603  ProcBindClose,
3604  ProcBindSpread,
3605  ProcBindIntel,
3606  ProcBindDefault
3607  } RuntimeProcBind;
3608  switch (ProcBind) {
3609  case OMPC_PROC_BIND_master:
3610  RuntimeProcBind = ProcBindMaster;
3611  break;
3612  case OMPC_PROC_BIND_close:
3613  RuntimeProcBind = ProcBindClose;
3614  break;
3615  case OMPC_PROC_BIND_spread:
3616  RuntimeProcBind = ProcBindSpread;
3617  break;
3619  llvm_unreachable("Unsupported proc_bind value.");
3620  }
3621  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3622  llvm::Value *Args[] = {
3623  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3624  llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3626 }
3627 
3628 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3629  SourceLocation Loc) {
3630  if (!CGF.HaveInsertPoint())
3631  return;
3632  // Build call void __kmpc_flush(ident_t *loc)
3634  emitUpdateLocation(CGF, Loc));
3635 }
3636 
3637 namespace {
3638 /// Indexes of fields for type kmp_task_t.
3640  /// List of shared variables.
3641  KmpTaskTShareds,
3642  /// Task routine.
3643  KmpTaskTRoutine,
3644  /// Partition id for the untied tasks.
3645  KmpTaskTPartId,
3646  /// Function with call of destructors for private variables.
3647  Data1,
3648  /// Task priority.
3649  Data2,
3650  /// (Taskloops only) Lower bound.
3651  KmpTaskTLowerBound,
3652  /// (Taskloops only) Upper bound.
3653  KmpTaskTUpperBound,
3654  /// (Taskloops only) Stride.
3655  KmpTaskTStride,
3656  /// (Taskloops only) Is last iteration flag.
3657  KmpTaskTLastIter,
3658  /// (Taskloops only) Reduction data.
3659  KmpTaskTReductions,
3660 };
3661 } // anonymous namespace
3662 
3663 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3664  return OffloadEntriesTargetRegion.empty() &&
3665  OffloadEntriesDeviceGlobalVar.empty();
3666 }
3667 
3668 /// Initialize target region entry.
3669 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3670  initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3671  StringRef ParentName, unsigned LineNum,
3672  unsigned Order) {
3673  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3674  "only required for the device "
3675  "code generation.");
3676  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3677  OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3678  OMPTargetRegionEntryTargetRegion);
3679  ++OffloadingEntriesNum;
3680 }
3681 
3682 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3683  registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3684  StringRef ParentName, unsigned LineNum,
3685  llvm::Constant *Addr, llvm::Constant *ID,
3686  OMPTargetRegionEntryKind Flags) {
3687  // If we are emitting code for a target, the entry is already initialized,
3688  // only has to be registered.
3689  if (CGM.getLangOpts().OpenMPIsDevice) {
3690  if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3691  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3693  "Unable to find target region on line '%0' in the device code.");
3694  CGM.getDiags().Report(DiagID) << LineNum;
3695  return;
3696  }
3697  auto &Entry =
3698  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3699  assert(Entry.isValid() && "Entry not initialized!");
3700  Entry.setAddress(Addr);
3701  Entry.setID(ID);
3702  Entry.setFlags(Flags);
3703  } else {
3704  OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3705  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3706  ++OffloadingEntriesNum;
3707  }
3708 }
3709 
3710 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3711  unsigned DeviceID, unsigned FileID, StringRef ParentName,
3712  unsigned LineNum) const {
3713  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3714  if (PerDevice == OffloadEntriesTargetRegion.end())
3715  return false;
3716  auto PerFile = PerDevice->second.find(FileID);
3717  if (PerFile == PerDevice->second.end())
3718  return false;
3719  auto PerParentName = PerFile->second.find(ParentName);
3720  if (PerParentName == PerFile->second.end())
3721  return false;
3722  auto PerLine = PerParentName->second.find(LineNum);
3723  if (PerLine == PerParentName->second.end())
3724  return false;
3725  // Fail if this entry is already registered.
3726  if (PerLine->second.getAddress() || PerLine->second.getID())
3727  return false;
3728  return true;
3729 }
3730 
3731 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3732  const OffloadTargetRegionEntryInfoActTy &Action) {
3733  // Scan all target region entries and perform the provided action.
3734  for (const auto &D : OffloadEntriesTargetRegion)
3735  for (const auto &F : D.second)
3736  for (const auto &P : F.second)
3737  for (const auto &L : P.second)
3738  Action(D.first, F.first, P.first(), L.first, L.second);
3739 }
3740 
3741 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3742  initializeDeviceGlobalVarEntryInfo(StringRef Name,
3743  OMPTargetGlobalVarEntryKind Flags,
3744  unsigned Order) {
3745  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3746  "only required for the device "
3747  "code generation.");
3748  OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3749  ++OffloadingEntriesNum;
3750 }
3751 
3752 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3753  registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3754  CharUnits VarSize,
3755  OMPTargetGlobalVarEntryKind Flags,
3756  llvm::GlobalValue::LinkageTypes Linkage) {
3757  if (CGM.getLangOpts().OpenMPIsDevice) {
3758  auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3759  assert(Entry.isValid() && Entry.getFlags() == Flags &&
3760  "Entry not initialized!");
3761  assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3762  "Resetting with the new address.");
3763  if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName))
3764  return;
3765  Entry.setAddress(Addr);
3766  Entry.setVarSize(VarSize);
3767  Entry.setLinkage(Linkage);
3768  } else {
3769  if (hasDeviceGlobalVarEntryInfo(VarName))
3770  return;
3771  OffloadEntriesDeviceGlobalVar.try_emplace(
3772  VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3773  ++OffloadingEntriesNum;
3774  }
3775 }
3776 
3777 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3778  actOnDeviceGlobalVarEntriesInfo(
3779  const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3780  // Scan all target region entries and perform the provided action.
3781  for (const auto &E : OffloadEntriesDeviceGlobalVar)
3782  Action(E.getKey(), E.getValue());
3783 }
3784 
3785 llvm::Function *
3787  // If we don't have entries or if we are emitting code for the device, we
3788  // don't need to do anything.
3789  if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3790  return nullptr;
3791 
3792  llvm::Module &M = CGM.getModule();
3793  ASTContext &C = CGM.getContext();
3794 
3795  // Get list of devices we care about
3796  const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3797 
3798  // We should be creating an offloading descriptor only if there are devices
3799  // specified.
3800  assert(!Devices.empty() && "No OpenMP offloading devices??");
3801 
3802  // Create the external variables that will point to the begin and end of the
3803  // host entries section. These will be defined by the linker.
3804  llvm::Type *OffloadEntryTy =
3806  std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3807  auto *HostEntriesBegin = new llvm::GlobalVariable(
3808  M, OffloadEntryTy, /*isConstant=*/true,
3809  llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3810  EntriesBeginName);
3811  std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3812  auto *HostEntriesEnd =
3813  new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3815  /*Initializer=*/nullptr, EntriesEndName);
3816 
3817  // Create all device images
3818  auto *DeviceImageTy = cast<llvm::StructType>(
3820  ConstantInitBuilder DeviceImagesBuilder(CGM);
3821  ConstantArrayBuilder DeviceImagesEntries =
3822  DeviceImagesBuilder.beginArray(DeviceImageTy);
3823 
3824  for (const llvm::Triple &Device : Devices) {
3825  StringRef T = Device.getTriple();
3826  std::string BeginName = getName({"omp_offloading", "img_start", ""});
3827  auto *ImgBegin = new llvm::GlobalVariable(
3828  M, CGM.Int8Ty, /*isConstant=*/true,
3829  llvm::GlobalValue::ExternalWeakLinkage,
3830  /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3831  std::string EndName = getName({"omp_offloading", "img_end", ""});
3832  auto *ImgEnd = new llvm::GlobalVariable(
3833  M, CGM.Int8Ty, /*isConstant=*/true,
3834  llvm::GlobalValue::ExternalWeakLinkage,
3835  /*Initializer=*/nullptr, Twine(EndName).concat(T));
3836 
3837  llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3838  HostEntriesEnd};
3840  DeviceImagesEntries);
3841  }
3842 
3843  // Create device images global array.
3844  std::string ImagesName = getName({"omp_offloading", "device_images"});
3845  llvm::GlobalVariable *DeviceImages =
3846  DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
3847  CGM.getPointerAlign(),
3848  /*isConstant=*/true);
3849  DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3850 
3851  // This is a Zero array to be used in the creation of the constant expressions
3852  llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3853  llvm::Constant::getNullValue(CGM.Int32Ty)};
3854 
3855  // Create the target region descriptor.
3856  llvm::Constant *Data[] = {
3857  llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
3858  llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3859  DeviceImages, Index),
3860  HostEntriesBegin, HostEntriesEnd};
3861  std::string Descriptor = getName({"omp_offloading", "descriptor"});
3862  llvm::GlobalVariable *Desc = createGlobalStruct(
3863  CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
3864 
3865  // Emit code to register or unregister the descriptor at execution
3866  // startup or closing, respectively.
3867 
3868  llvm::Function *UnRegFn;
3869  {
3870  FunctionArgList Args;
3872  Args.push_back(&DummyPtr);
3873 
3874  CodeGenFunction CGF(CGM);
3875  // Disable debug info for global (de-)initializer because they are not part
3876  // of some particular construct.
3877  CGF.disableDebugInfo();
3878  const auto &FI =
3880  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3881  std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
3882  UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
3883  CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
3885  Desc);
3886  CGF.FinishFunction();
3887  }
3888  llvm::Function *RegFn;
3889  {
3890  CodeGenFunction CGF(CGM);
3891  // Disable debug info for global (de-)initializer because they are not part
3892  // of some particular construct.
3893  CGF.disableDebugInfo();
3894  const auto &FI = CGM.getTypes().arrangeNullaryFunction();
3895  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3896 
3897  // Encode offload target triples into the registration function name. It
3898  // will serve as a comdat key for the registration/unregistration code for
3899  // this particular combination of offloading targets.
3900  SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
3901  RegFnNameParts[0] = "omp_offloading";
3902  RegFnNameParts[1] = "descriptor_reg";
3903  llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
3904  [](const llvm::Triple &T) -> const std::string& {
3905  return T.getTriple();
3906  });
3907  llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
3908  std::string Descriptor = getName(RegFnNameParts);
3909  RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
3910  CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
3912  // Create a variable to drive the registration and unregistration of the
3913  // descriptor, so we can reuse the logic that emits Ctors and Dtors.
3914  ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
3915  SourceLocation(), nullptr, C.CharTy,
3917  CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
3918  CGF.FinishFunction();
3919  }
3920  if (CGM.supportsCOMDAT()) {
3921  // It is sufficient to call registration function only once, so create a
3922  // COMDAT group for registration/unregistration functions and associated
3923  // data. That would reduce startup time and code size. Registration
3924  // function serves as a COMDAT group key.
3925  llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
3926  RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
3927  RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
3928  RegFn->setComdat(ComdatKey);
3929  UnRegFn->setComdat(ComdatKey);
3930  DeviceImages->setComdat(ComdatKey);
3931  Desc->setComdat(ComdatKey);
3932  }
3933  return RegFn;
3934 }
3935 
3937  llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3938  llvm::GlobalValue::LinkageTypes Linkage) {
3939  StringRef Name = Addr->getName();
3940  llvm::Module &M = CGM.getModule();
3941  llvm::LLVMContext &C = M.getContext();
3942 
3943  // Create constant string with the name.
3944  llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3945 
3946  std::string StringName = getName({"omp_offloading", "entry_name"});
3947  auto *Str = new llvm::GlobalVariable(
3948  M, StrPtrInit->getType(), /*isConstant=*/true,
3949  llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3950  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3951 
3952  llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3953  llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3954  llvm::ConstantInt::get(CGM.SizeTy, Size),
3955  llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3956  llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3957  std::string EntryName = getName({"omp_offloading", "entry", ""});
3958  llvm::GlobalVariable *Entry = createGlobalStruct(
3959  CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3960  Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3961 
3962  // The entry has to be created in the section the linker expects it to be.
3963  std::string Section = getName({"omp_offloading", "entries"});
3964  Entry->setSection(Section);
3965 }
3966 
3968  // Emit the offloading entries and metadata so that the device codegen side
3969  // can easily figure out what to emit. The produced metadata looks like
3970  // this:
3971  //
3972  // !omp_offload.info = !{!1, ...}
3973  //
3974  // Right now we only generate metadata for function that contain target
3975  // regions.
3976 
3977  // If we do not have entries, we don't need to do anything.
3979  return;
3980 
3981  llvm::Module &M = CGM.getModule();
3982  llvm::LLVMContext &C = M.getContext();
3984  OrderedEntries(OffloadEntriesInfoManager.size());
3985  llvm::SmallVector<StringRef, 16> ParentFunctions(
3987 
3988  // Auxiliary methods to create metadata values and strings.
3989  auto &&GetMDInt = [this](unsigned V) {
3990  return llvm::ConstantAsMetadata::get(
3991  llvm::ConstantInt::get(CGM.Int32Ty, V));
3992  };
3993 
3994  auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3995 
3996  // Create the offloading info metadata node.
3997  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3998 
3999  // Create function that emits metadata for each target region entry;
4000  auto &&TargetRegionMetadataEmitter =
4001  [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
4002  unsigned DeviceID, unsigned FileID, StringRef ParentName,
4003  unsigned Line,
4005  // Generate metadata for target regions. Each entry of this metadata
4006  // contains:
4007  // - Entry 0 -> Kind of this type of metadata (0).
4008  // - Entry 1 -> Device ID of the file where the entry was identified.
4009  // - Entry 2 -> File ID of the file where the entry was identified.
4010  // - Entry 3 -> Mangled name of the function where the entry was
4011  // identified.
4012  // - Entry 4 -> Line in the file where the entry was identified.
4013  // - Entry 5 -> Order the entry was created.
4014  // The first element of the metadata node is the kind.
4015  llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4016  GetMDInt(FileID), GetMDString(ParentName),
4017  GetMDInt(Line), GetMDInt(E.getOrder())};
4018 
4019  // Save this entry in the right position of the ordered entries array.
4020  OrderedEntries[E.getOrder()] = &E;
4021  ParentFunctions[E.getOrder()] = ParentName;
4022 
4023  // Add metadata to the named metadata node.
4024  MD->addOperand(llvm::MDNode::get(C, Ops));
4025  };
4026 
4028  TargetRegionMetadataEmitter);
4029 
4030  // Create function that emits metadata for each device global variable entry;
4031  auto &&DeviceGlobalVarMetadataEmitter =
4032  [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4033  MD](StringRef MangledName,
4035  &E) {
4036  // Generate metadata for global variables. Each entry of this metadata
4037  // contains:
4038  // - Entry 0 -> Kind of this type of metadata (1).
4039  // - Entry 1 -> Mangled name of the variable.
4040  // - Entry 2 -> Declare target kind.
4041  // - Entry 3 -> Order the entry was created.
4042  // The first element of the metadata node is the kind.
4043  llvm::Metadata *Ops[] = {
4044  GetMDInt(E.getKind()), GetMDString(MangledName),
4045  GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4046 
4047  // Save this entry in the right position of the ordered entries array.
4048  OrderedEntries[E.getOrder()] = &E;
4049 
4050  // Add metadata to the named metadata node.
4051  MD->addOperand(llvm::MDNode::get(C, Ops));
4052  };
4053 
4055  DeviceGlobalVarMetadataEmitter);
4056 
4057  for (const auto *E : OrderedEntries) {
4058  assert(E && "All ordered entries must exist!");
4059  if (const auto *CE =
4060  dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4061  E)) {
4062  if (!CE->getID() || !CE->getAddress()) {
4063  // Do not blame the entry if the parent funtion is not emitted.
4064  StringRef FnName = ParentFunctions[CE->getOrder()];
4065  if (!CGM.GetGlobalValue(FnName))
4066  continue;
4067  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4069  "Offloading entry for target region is incorrect: either the "
4070  "address or the ID is invalid.");
4071  CGM.getDiags().Report(DiagID);
4072  continue;
4073  }
4074  createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4075  CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4076  } else if (const auto *CE =
4077  dyn_cast<OffloadEntriesInfoManagerTy::
4078  OffloadEntryInfoDeviceGlobalVar>(E)) {
4081  CE->getFlags());
4082  switch (Flags) {
4084  if (!CE->getAddress()) {
4085  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4087  "Offloading entry for declare target variable is incorrect: the "
4088  "address is invalid.");
4089  CGM.getDiags().Report(DiagID);
4090  continue;
4091  }
4092  // The vaiable has no definition - no need to add the entry.
4093  if (CE->getVarSize().isZero())
4094  continue;
4095  break;
4096  }
4098  assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4099  (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4100  "Declaret target link address is set.");
4101  if (CGM.getLangOpts().OpenMPIsDevice)
4102  continue;
4103  if (!CE->getAddress()) {
4104  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4106  "Offloading entry for declare target variable is incorrect: the "
4107  "address is invalid.");
4108  CGM.getDiags().Report(DiagID);
4109  continue;
4110  }
4111  break;
4112  }
4113  createOffloadEntry(CE->getAddress(), CE->getAddress(),
4114  CE->getVarSize().getQuantity(), Flags,
4115  CE->getLinkage());
4116  } else {
4117  llvm_unreachable("Unsupported entry kind.");
4118  }
4119  }
4120 }
4121 
4122 /// Loads all the offload entries information from the host IR
4123 /// metadata.
4125  // If we are in target mode, load the metadata from the host IR. This code has
4126  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4127 
4128  if (!CGM.getLangOpts().OpenMPIsDevice)
4129  return;
4130 
4131  if (CGM.getLangOpts().OMPHostIRFile.empty())
4132  return;
4133 
4134  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4135  if (auto EC = Buf.getError()) {
4136  CGM.getDiags().Report(diag::err_cannot_open_file)
4137  << CGM.getLangOpts().OMPHostIRFile << EC.message();
4138  return;
4139  }
4140 
4141  llvm::LLVMContext C;
4142  auto ME = expectedToErrorOrAndEmitErrors(
4143  C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4144 
4145  if (auto EC = ME.getError()) {
4146  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4147  DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4148  CGM.getDiags().Report(DiagID)
4149  << CGM.getLangOpts().OMPHostIRFile << EC.message();
4150  return;
4151  }
4152 
4153  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4154  if (!MD)
4155  return;
4156 
4157  for (llvm::MDNode *MN : MD->operands()) {
4158  auto &&GetMDInt = [MN](unsigned Idx) {
4159  auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4160  return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4161  };
4162 
4163  auto &&GetMDString = [MN](unsigned Idx) {
4164  auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4165  return V->getString();
4166  };
4167 
4168  switch (GetMDInt(0)) {
4169  default:
4170  llvm_unreachable("Unexpected metadata!");
4171  break;
4175  /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4176  /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4177  /*Order=*/GetMDInt(5));
4178  break;
4182  /*MangledName=*/GetMDString(1),
4183  static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4184  /*Flags=*/GetMDInt(2)),
4185  /*Order=*/GetMDInt(3));
4186  break;
4187  }
4188  }
4189 }
4190 
4192  if (!KmpRoutineEntryPtrTy) {
4193  // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4194  ASTContext &C = CGM.getContext();
4195  QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4197  KmpRoutineEntryPtrQTy = C.getPointerType(
4198  C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4199  KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4200  }
4201 }
4202 
4204  // Make sure the type of the entry is already created. This is the type we
4205  // have to create:
4206  // struct __tgt_offload_entry{
4207  // void *addr; // Pointer to the offload entry info.
4208  // // (function or global)
4209  // char *name; // Name of the function or global.
4210  // size_t size; // Size of the entry info (0 if it a function).
4211  // int32_t flags; // Flags associated with the entry, e.g. 'link'.
4212  // int32_t reserved; // Reserved, to use by the runtime library.
4213  // };
4214  if (TgtOffloadEntryQTy.isNull()) {
4215  ASTContext &C = CGM.getContext();
4216  RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4217  RD->startDefinition();
4218  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4220  addFieldToRecordDecl(C, RD, C.getSizeType());
4222  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4224  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4225  RD->completeDefinition();
4226  RD->addAttr(PackedAttr::CreateImplicit(C));
4228  }
4229  return TgtOffloadEntryQTy;
4230 }
4231 
4233  // These are the types we need to build:
4234  // struct __tgt_device_image{
4235  // void *ImageStart; // Pointer to the target code start.
4236  // void *ImageEnd; // Pointer to the target code end.
4237  // // We also add the host entries to the device image, as it may be useful
4238  // // for the target runtime to have access to that information.
4239  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all
4240  // // the entries.
4241  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
4242  // // entries (non inclusive).
4243  // };
4244  if (TgtDeviceImageQTy.isNull()) {
4245  ASTContext &C = CGM.getContext();
4246  RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4247  RD->startDefinition();
4248  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4249  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4252  RD->completeDefinition();
4254  }
4255  return TgtDeviceImageQTy;
4256 }
4257 
4259  // struct __tgt_bin_desc{
4260  // int32_t NumDevices; // Number of devices supported.
4261  // __tgt_device_image *DeviceImages; // Arrays of device images
4262  // // (one per device).
4263  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the
4264  // // entries.
4265  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
4266  // // entries (non inclusive).
4267  // };
4269  ASTContext &C = CGM.getContext();
4270  RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4271  RD->startDefinition();
4273  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4277  RD->completeDefinition();
4279  }
4280  return TgtBinaryDescriptorQTy;
4281 }
4282 
4283 namespace {
4284 struct PrivateHelpersTy {
4285  PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4286  const VarDecl *PrivateElemInit)
4287  : Original(Original), PrivateCopy(PrivateCopy),
4288  PrivateElemInit(PrivateElemInit) {}
4289  const VarDecl *Original;
4290  const VarDecl *PrivateCopy;
4291  const VarDecl *PrivateElemInit;
4292 };
4293 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4294 } // anonymous namespace
4295 
4296 static RecordDecl *
4298  if (!Privates.empty()) {
4299  ASTContext &C = CGM.getContext();
4300  // Build struct .kmp_privates_t. {
4301  // /* private vars */
4302  // };
4303  RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4304  RD->startDefinition();
4305  for (const auto &Pair : Privates) {
4306  const VarDecl *VD = Pair.second.Original;
4307  QualType Type = VD->getType().getNonReferenceType();
4308  FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4309  if (VD->hasAttrs()) {
4310  for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4311  E(VD->getAttrs().end());
4312  I != E; ++I)
4313  FD->addAttr(*I);
4314  }
4315  }
4316  RD->completeDefinition();
4317  return RD;
4318  }
4319  return nullptr;
4320 }
4321 
4322 static RecordDecl *
4324  QualType KmpInt32Ty,
4325  QualType KmpRoutineEntryPointerQTy) {
4326  ASTContext &C = CGM.getContext();
4327  // Build struct kmp_task_t {
4328  // void * shareds;
4329  // kmp_routine_entry_t routine;
4330  // kmp_int32 part_id;
4331  // kmp_cmplrdata_t data1;
4332  // kmp_cmplrdata_t data2;
4333  // For taskloops additional fields:
4334  // kmp_uint64 lb;
4335  // kmp_uint64 ub;
4336  // kmp_int64 st;
4337  // kmp_int32 liter;
4338  // void * reductions;
4339  // };
4340  RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4341  UD->startDefinition();
4342  addFieldToRecordDecl(C, UD, KmpInt32Ty);
4343  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4344  UD->completeDefinition();
4345  QualType KmpCmplrdataTy = C.getRecordType(UD);
4346  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4347  RD->startDefinition();
4348  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4349  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4350  addFieldToRecordDecl(C, RD, KmpInt32Ty);
4351  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4352  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4353  if (isOpenMPTaskLoopDirective(Kind)) {
4354  QualType KmpUInt64Ty =
4355  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4356  QualType KmpInt64Ty =
4357  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4358  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4359  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4360  addFieldToRecordDecl(C, RD, KmpInt64Ty);
4361  addFieldToRecordDecl(C, RD, KmpInt32Ty);
4362  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4363  }
4364  RD->completeDefinition();
4365  return RD;
4366 }
4367 
4368 static RecordDecl *
4370  ArrayRef<PrivateDataTy> Privates) {
4371  ASTContext &C = CGM.getContext();
4372  // Build struct kmp_task_t_with_privates {
4373  // kmp_task_t task_data;
4374  // .kmp_privates_t. privates;
4375  // };
4376  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4377  RD->startDefinition();
4378  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4379  if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4380  addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4381  RD->completeDefinition();
4382  return RD;
4383 }
4384 
4385 /// Emit a proxy function which accepts kmp_task_t as the second
4386 /// argument.
4387 /// \code
4388 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4389 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4390 /// For taskloops:
4391 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4392 /// tt->reductions, tt->shareds);
4393 /// return 0;
4394 /// }
4395 /// \endcode
4396 static llvm::Function *
4398  OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4399  QualType KmpTaskTWithPrivatesPtrQTy,
4400  QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4401  QualType SharedsPtrTy, llvm::Function *TaskFunction,
4402  llvm::Value *TaskPrivatesMap) {
4403  ASTContext &C = CGM.getContext();
4404  FunctionArgList Args;
4405  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4407  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4408  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4410  Args.push_back(&GtidArg);
4411  Args.push_back(&TaskTypeArg);
4412  const auto &TaskEntryFnInfo =
4413  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4414  llvm::FunctionType *TaskEntryTy =
4415  CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4416  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4417  auto *TaskEntry = llvm::Function::Create(
4418  TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4419  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4420  TaskEntry->setDoesNotRecurse();
4421  CodeGenFunction CGF(CGM);
4422  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4423  Loc, Loc);
4424 
4425  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4426  // tt,
4427  // For taskloops:
4428  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4429  // tt->task_data.shareds);
4430  llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4431  CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4432  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4433  CGF.GetAddrOfLocalVar(&TaskTypeArg),
4434  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4435  const auto *KmpTaskTWithPrivatesQTyRD =
4436  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4437  LValue Base =
4438  CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4439  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4440  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4441  LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4442  llvm::Value *PartidParam = PartIdLVal.getPointer();
4443 
4444  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4445  LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4447  CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4448  CGF.ConvertTypeForMem(SharedsPtrTy));
4449 
4450  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4451  llvm::Value *PrivatesParam;
4452  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4453  LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4454  PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4455  PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4456  } else {
4457  PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4458  }
4459 
4460  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4461  TaskPrivatesMap,
4462  CGF.Builder
4464  TDBase.getAddress(), CGF.VoidPtrTy)
4465  .getPointer()};
4466  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4467  std::end(CommonArgs));
4468  if (isOpenMPTaskLoopDirective(Kind)) {
4469  auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4470  LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4471  llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4472  auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4473  LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4474  llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4475  auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4476  LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4477  llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4478  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4479  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4480  llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4481  auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4482  LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4483  llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4484  CallArgs.push_back(LBParam);
4485  CallArgs.push_back(UBParam);
4486  CallArgs.push_back(StParam);
4487  CallArgs.push_back(LIParam);
4488  CallArgs.push_back(RParam);
4489  }
4490  CallArgs.push_back(SharedsParam);
4491 
4492  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4493  CallArgs);
4494  CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4495  CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4496  CGF.FinishFunction();
4497  return TaskEntry;
4498 }
4499 
4501  SourceLocation Loc,
4502  QualType KmpInt32Ty,
4503  QualType KmpTaskTWithPrivatesPtrQTy,
4504  QualType KmpTaskTWithPrivatesQTy) {
4505  ASTContext &C = CGM.getContext();
4506  FunctionArgList Args;
4507  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4509  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4510  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4512  Args.push_back(&GtidArg);
4513  Args.push_back(&TaskTypeArg);
4514  const auto &DestructorFnInfo =
4515  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4516  llvm::FunctionType *DestructorFnTy =
4517  CGM.getTypes().GetFunctionType(DestructorFnInfo);
4518  std::string Name =
4519  CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4520  auto *DestructorFn =
4522  Name, &CGM.getModule());
4523  CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4524  DestructorFnInfo);
4525  DestructorFn->setDoesNotRecurse();
4526  CodeGenFunction CGF(CGM);
4527  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4528  Args, Loc, Loc);
4529 
4531  CGF.GetAddrOfLocalVar(&TaskTypeArg),
4532  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4533  const auto *KmpTaskTWithPrivatesQTyRD =
4534  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4535  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4536  Base = CGF.EmitLValueForField(Base, *FI);
4537  for (const auto *Field :
4538  cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4539  if (QualType::DestructionKind DtorKind =
4540  Field->getType().isDestructedType()) {
4541  LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4542  CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4543  }
4544  }
4545  CGF.FinishFunction();
4546  return DestructorFn;
4547 }
4548 
4549 /// Emit a privates mapping function for correct handling of private and
4550 /// firstprivate variables.
4551 /// \code
4552 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4553 /// **noalias priv1,..., <tyn> **noalias privn) {
4554 /// *priv1 = &.privates.priv1;
4555 /// ...;
4556 /// *privn = &.privates.privn;
4557 /// }
4558 /// \endcode
4559 static llvm::Value *
4561  ArrayRef<const Expr *> PrivateVars,
4562  ArrayRef<const Expr *> FirstprivateVars,
4563  ArrayRef<const Expr *> LastprivateVars,
4564  QualType PrivatesQTy,
4565  ArrayRef<PrivateDataTy> Privates) {
4566  ASTContext &C = CGM.getContext();
4567  FunctionArgList Args;
4568  ImplicitParamDecl TaskPrivatesArg(
4569  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4570  C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4572  Args.push_back(&TaskPrivatesArg);
4573  llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4574  unsigned Counter = 1;
4575  for (const Expr *E : PrivateVars) {
4576  Args.push_back(ImplicitParamDecl::Create(
4577  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4579  .withConst()
4580  .withRestrict(),
4582  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4583  PrivateVarsPos[VD] = Counter;
4584  ++Counter;
4585  }
4586  for (const Expr *E : FirstprivateVars) {
4587  Args.push_back(ImplicitParamDecl::Create(
4588  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4590  .withConst()
4591  .withRestrict(),
4593  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4594  PrivateVarsPos[VD] = Counter;
4595  ++Counter;
4596  }
4597  for (const Expr *E : LastprivateVars) {
4598  Args.push_back(ImplicitParamDecl::Create(
4599  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4601  .withConst()
4602  .withRestrict(),
4604  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4605  PrivateVarsPos[VD] = Counter;
4606  ++Counter;
4607  }
4608  const auto &TaskPrivatesMapFnInfo =
4610  llvm::FunctionType *TaskPrivatesMapTy =
4611  CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4612  std::string Name =
4613  CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4614  auto *TaskPrivatesMap = llvm::Function::Create(
4615  TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4616  &CGM.getModule());
4617  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4618  TaskPrivatesMapFnInfo);
4619  TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4620  TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4621  TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4622  CodeGenFunction CGF(CGM);
4623  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4624  TaskPrivatesMapFnInfo, Args, Loc, Loc);
4625 
4626  // *privi = &.privates.privi;
4628  CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4629  TaskPrivatesArg.getType()->castAs<PointerType>());
4630  const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4631  Counter = 0;
4632  for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4633  LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4634  const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4635  LValue RefLVal =
4636  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4637  LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4638  RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4639  CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4640  ++Counter;
4641  }
4642  CGF.FinishFunction();
4643  return TaskPrivatesMap;
4644 }
4645 
4646 static bool stable_sort_comparator(const PrivateDataTy P1,
4647  const PrivateDataTy P2) {
4648  return P1.first > P2.first;
4649 }
4650 
4651 /// Emit initialization for private variables in task-based directives.
4653  const OMPExecutableDirective &D,
4654  Address KmpTaskSharedsPtr, LValue TDBase,
4655  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4656  QualType SharedsTy, QualType SharedsPtrTy,
4657  const OMPTaskDataTy &Data,
4658  ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4659  ASTContext &C = CGF.getContext();
4660  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->