clang  14.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/APValue.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/StmtOpenMP.h"
23 #include "clang/AST/StmtVisitor.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/SetOperations.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Bitcode/BitcodeReader.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51  /// Kinds of OpenMP regions used in codegen.
52  enum CGOpenMPRegionKind {
53  /// Region with outlined function for standalone 'parallel'
54  /// directive.
55  ParallelOutlinedRegion,
56  /// Region with outlined function for standalone 'task' directive.
57  TaskOutlinedRegion,
58  /// Region for constructs that do not require function outlining,
59  /// like 'for', 'sections', 'atomic' etc. directives.
60  InlinedRegion,
61  /// Region with outlined function for standalone 'target' directive.
62  TargetRegion,
63  };
64 
65  CGOpenMPRegionInfo(const CapturedStmt &CS,
66  const CGOpenMPRegionKind RegionKind,
68  bool HasCancel)
69  : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70  CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
74  bool HasCancel)
75  : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76  Kind(Kind), HasCancel(HasCancel) {}
77 
78  /// Get a variable or parameter for storing global thread id
79  /// inside OpenMP construct.
80  virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82  /// Emit the captured statement body.
83  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85  /// Get an LValue for the current ThreadID variable.
86  /// \return LValue for thread id variable. This LValue always has type int32*.
87  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95  bool hasCancel() const { return HasCancel; }
96 
97  static bool classof(const CGCapturedStmtInfo *Info) {
98  return Info->getKind() == CR_OpenMP;
99  }
100 
101  ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104  CGOpenMPRegionKind RegionKind;
105  RegionCodeGenTy CodeGen;
107  bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114  const RegionCodeGenTy &CodeGen,
115  OpenMPDirectiveKind Kind, bool HasCancel,
116  StringRef HelperName)
117  : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118  HasCancel),
119  ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121  }
122 
123  /// Get a variable or parameter for storing global thread id
124  /// inside OpenMP construct.
125  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127  /// Get the name of the capture helper.
128  StringRef getHelperName() const override { return HelperName; }
129 
130  static bool classof(const CGCapturedStmtInfo *Info) {
131  return CGOpenMPRegionInfo::classof(Info) &&
132  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133  ParallelOutlinedRegion;
134  }
135 
136 private:
137  /// A variable or parameter storing global thread id for OpenMP
138  /// constructs.
139  const VarDecl *ThreadIDVar;
140  StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146  class UntiedTaskActionTy final : public PrePostActionTy {
147  bool Untied;
148  const VarDecl *PartIDVar;
149  const RegionCodeGenTy UntiedCodeGen;
150  llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152  public:
153  UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154  const RegionCodeGenTy &UntiedCodeGen)
155  : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156  void Enter(CodeGenFunction &CGF) override {
157  if (Untied) {
158  // Emit task switching point.
159  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160  CGF.GetAddrOfLocalVar(PartIDVar),
161  PartIDVar->getType()->castAs<PointerType>());
162  llvm::Value *Res =
163  CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164  llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165  UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166  CGF.EmitBlock(DoneBB);
168  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169  UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170  CGF.Builder.GetInsertBlock());
171  emitUntiedSwitch(CGF);
172  }
173  }
174  void emitUntiedSwitch(CodeGenFunction &CGF) const {
175  if (Untied) {
176  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177  CGF.GetAddrOfLocalVar(PartIDVar),
178  PartIDVar->getType()->castAs<PointerType>());
179  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180  PartIdLVal);
181  UntiedCodeGen(CGF);
182  CodeGenFunction::JumpDest CurPoint =
183  CGF.getJumpDestInCurrentScope(".untied.next.");
184  CGF.EmitBranch(CGF.ReturnBlock.getBlock());
185  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186  UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187  CGF.Builder.GetInsertBlock());
188  CGF.EmitBranchThroughCleanup(CurPoint);
189  CGF.EmitBlock(CurPoint.getBlock());
190  }
191  }
192  unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193  };
194  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195  const VarDecl *ThreadIDVar,
196  const RegionCodeGenTy &CodeGen,
197  OpenMPDirectiveKind Kind, bool HasCancel,
198  const UntiedTaskActionTy &Action)
199  : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200  ThreadIDVar(ThreadIDVar), Action(Action) {
201  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202  }
203 
204  /// Get a variable or parameter for storing global thread id
205  /// inside OpenMP construct.
206  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208  /// Get an LValue for the current ThreadID variable.
209  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211  /// Get the name of the capture helper.
212  StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214  void emitUntiedSwitch(CodeGenFunction &CGF) override {
215  Action.emitUntiedSwitch(CGF);
216  }
217 
218  static bool classof(const CGCapturedStmtInfo *Info) {
219  return CGOpenMPRegionInfo::classof(Info) &&
220  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221  TaskOutlinedRegion;
222  }
223 
224 private:
225  /// A variable or parameter storing global thread id for OpenMP
226  /// constructs.
227  const VarDecl *ThreadIDVar;
228  /// Action for emitting code for untied tasks.
229  const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237  const RegionCodeGenTy &CodeGen,
238  OpenMPDirectiveKind Kind, bool HasCancel)
239  : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240  OldCSI(OldCSI),
241  OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243  // Retrieve the value of the context parameter.
244  llvm::Value *getContextValue() const override {
245  if (OuterRegionInfo)
246  return OuterRegionInfo->getContextValue();
247  llvm_unreachable("No context value for inlined OpenMP region");
248  }
249 
250  void setContextValue(llvm::Value *V) override {
251  if (OuterRegionInfo) {
252  OuterRegionInfo->setContextValue(V);
253  return;
254  }
255  llvm_unreachable("No context value for inlined OpenMP region");
256  }
257 
258  /// Lookup the captured field decl for a variable.
259  const FieldDecl *lookup(const VarDecl *VD) const override {
260  if (OuterRegionInfo)
261  return OuterRegionInfo->lookup(VD);
262  // If there is no outer outlined region,no need to lookup in a list of
263  // captured variables, we can use the original one.
264  return nullptr;
265  }
266 
267  FieldDecl *getThisFieldDecl() const override {
268  if (OuterRegionInfo)
269  return OuterRegionInfo->getThisFieldDecl();
270  return nullptr;
271  }
272 
273  /// Get a variable or parameter for storing global thread id
274  /// inside OpenMP construct.
275  const VarDecl *getThreadIDVariable() const override {
276  if (OuterRegionInfo)
277  return OuterRegionInfo->getThreadIDVariable();
278  return nullptr;
279  }
280 
281  /// Get an LValue for the current ThreadID variable.
282  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283  if (OuterRegionInfo)
284  return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285  llvm_unreachable("No LValue for inlined OpenMP construct");
286  }
287 
288  /// Get the name of the capture helper.
289  StringRef getHelperName() const override {
290  if (auto *OuterRegionInfo = getOldCSI())
291  return OuterRegionInfo->getHelperName();
292  llvm_unreachable("No helper name for inlined OpenMP construct");
293  }
294 
295  void emitUntiedSwitch(CodeGenFunction &CGF) override {
296  if (OuterRegionInfo)
297  OuterRegionInfo->emitUntiedSwitch(CGF);
298  }
299 
300  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302  static bool classof(const CGCapturedStmtInfo *Info) {
303  return CGOpenMPRegionInfo::classof(Info) &&
304  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305  }
306 
307  ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310  /// CodeGen info about outer OpenMP region.
312  CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323  const RegionCodeGenTy &CodeGen, StringRef HelperName)
324  : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325  /*HasCancel=*/false),
326  HelperName(HelperName) {}
327 
328  /// This is unused for target regions because each starts executing
329  /// with a single thread.
330  const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332  /// Get the name of the capture helper.
333  StringRef getHelperName() const override { return HelperName; }
334 
335  static bool classof(const CGCapturedStmtInfo *Info) {
336  return CGOpenMPRegionInfo::classof(Info) &&
337  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338  }
339 
340 private:
341  StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345  llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352  : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353  OMPD_unknown,
354  /*HasCancel=*/false),
355  PrivScope(CGF) {
356  // Make sure the globals captured in the provided statement are local by
357  // using the privatization logic. We assume the same variable is not
358  // captured more than once.
359  for (const auto &C : CS.captures()) {
360  if (!C.capturesVariable() && !C.capturesVariableByCopy())
361  continue;
362 
363  const VarDecl *VD = C.getCapturedVar();
364  if (VD->isLocalVarDeclOrParm())
365  continue;
366 
367  DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368  /*RefersToEnclosingVariableOrCapture=*/false,
370  C.getLocation());
371  PrivScope.addPrivate(
372  VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373  }
374  (void)PrivScope.Privatize();
375  }
376 
377  /// Lookup the captured field decl for a variable.
378  const FieldDecl *lookup(const VarDecl *VD) const override {
379  if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380  return FD;
381  return nullptr;
382  }
383 
384  /// Emit the captured statement body.
385  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386  llvm_unreachable("No body for expressions");
387  }
388 
389  /// Get a variable or parameter for storing global thread id
390  /// inside OpenMP construct.
391  const VarDecl *getThreadIDVariable() const override {
392  llvm_unreachable("No thread id for expressions");
393  }
394 
395  /// Get the name of the capture helper.
396  StringRef getHelperName() const override {
397  llvm_unreachable("No helper name for expressions");
398  }
399 
400  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403  /// Private scope to capture global variables.
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409  CodeGenFunction &CGF;
410  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411  FieldDecl *LambdaThisCaptureField = nullptr;
412  const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413  bool NoInheritance = false;
414 
415 public:
416  /// Constructs region for combined constructs.
417  /// \param CodeGen Code generation sequence for combined directives. Includes
418  /// a list of functions used for code generation of implicitly inlined
419  /// regions.
420  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
421  OpenMPDirectiveKind Kind, bool HasCancel,
422  bool NoInheritance = true)
423  : CGF(CGF), NoInheritance(NoInheritance) {
424  // Start emission for the construct.
425  CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
426  CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
427  if (NoInheritance) {
428  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
429  LambdaThisCaptureField = CGF.LambdaThisCaptureField;
430  CGF.LambdaThisCaptureField = nullptr;
431  BlockInfo = CGF.BlockInfo;
432  CGF.BlockInfo = nullptr;
433  }
434  }
435 
436  ~InlinedOpenMPRegionRAII() {
437  // Restore original CapturedStmtInfo only if we're done with code emission.
438  auto *OldCSI =
439  cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
440  delete CGF.CapturedStmtInfo;
441  CGF.CapturedStmtInfo = OldCSI;
442  if (NoInheritance) {
443  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
444  CGF.LambdaThisCaptureField = LambdaThisCaptureField;
445  CGF.BlockInfo = BlockInfo;
446  }
447  }
448 };
449 
450 /// Values for bit flags used in the ident_t to describe the fields.
451 /// All enumeric elements are named and described in accordance with the code
452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
453 enum OpenMPLocationFlags : unsigned {
454  /// Use trampoline for internal microtask.
455  OMP_IDENT_IMD = 0x01,
456  /// Use c-style ident structure.
457  OMP_IDENT_KMPC = 0x02,
458  /// Atomic reduction option for kmpc_reduce.
459  OMP_ATOMIC_REDUCE = 0x10,
460  /// Explicit 'barrier' directive.
461  OMP_IDENT_BARRIER_EXPL = 0x20,
462  /// Implicit barrier in code.
463  OMP_IDENT_BARRIER_IMPL = 0x40,
464  /// Implicit barrier in 'for' directive.
465  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
466  /// Implicit barrier in 'sections' directive.
467  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
468  /// Implicit barrier in 'single' directive.
469  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
470  /// Call of __kmp_for_static_init for static loop.
471  OMP_IDENT_WORK_LOOP = 0x200,
472  /// Call of __kmp_for_static_init for sections.
473  OMP_IDENT_WORK_SECTIONS = 0x400,
474  /// Call of __kmp_for_static_init for distribute.
475  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
476  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
477 };
478 
479 namespace {
481 /// Values for bit flags for marking which requires clauses have been used.
483  /// flag undefined.
484  OMP_REQ_UNDEFINED = 0x000,
485  /// no requires clause present.
486  OMP_REQ_NONE = 0x001,
487  /// reverse_offload clause.
488  OMP_REQ_REVERSE_OFFLOAD = 0x002,
489  /// unified_address clause.
490  OMP_REQ_UNIFIED_ADDRESS = 0x004,
491  /// unified_shared_memory clause.
492  OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
493  /// dynamic_allocators clause.
494  OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
495  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
496 };
497 
499  /// Device ID if the device was not defined, runtime should get it
500  /// from environment variables in the spec.
501  OMP_DEVICEID_UNDEF = -1,
502 };
503 } // anonymous namespace
504 
505 /// Describes ident structure that describes a source location.
506 /// All descriptions are taken from
507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
508 /// Original structure:
509 /// typedef struct ident {
510 /// kmp_int32 reserved_1; /**< might be used in Fortran;
511 /// see above */
512 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
513 /// KMP_IDENT_KMPC identifies this union
514 /// member */
515 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
516 /// see above */
517 ///#if USE_ITT_BUILD
518 /// /* but currently used for storing
519 /// region-specific ITT */
520 /// /* contextual information. */
521 ///#endif /* USE_ITT_BUILD */
522 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
523 /// C++ */
524 /// char const *psource; /**< String describing the source location.
525 /// The string is composed of semi-colon separated
526 // fields which describe the source file,
527 /// the function and a pair of line numbers that
528 /// delimit the construct.
529 /// */
530 /// } ident_t;
531 enum IdentFieldIndex {
532  /// might be used in Fortran
533  IdentField_Reserved_1,
534  /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
535  IdentField_Flags,
536  /// Not really used in Fortran any more
537  IdentField_Reserved_2,
538  /// Source[4] in Fortran, do not use for C++
539  IdentField_Reserved_3,
540  /// String describing the source location. The string is composed of
541  /// semi-colon separated fields which describe the source file, the function
542  /// and a pair of line numbers that delimit the construct.
543  IdentField_PSource
544 };
545 
546 /// Schedule types for 'omp for' loops (these enumerators are taken from
547 /// the enum sched_type in kmp.h).
548 enum OpenMPSchedType {
549  /// Lower bound for default (unordered) versions.
550  OMP_sch_lower = 32,
551  OMP_sch_static_chunked = 33,
552  OMP_sch_static = 34,
553  OMP_sch_dynamic_chunked = 35,
554  OMP_sch_guided_chunked = 36,
555  OMP_sch_runtime = 37,
556  OMP_sch_auto = 38,
557  /// static with chunk adjustment (e.g., simd)
558  OMP_sch_static_balanced_chunked = 45,
559  /// Lower bound for 'ordered' versions.
560  OMP_ord_lower = 64,
561  OMP_ord_static_chunked = 65,
562  OMP_ord_static = 66,
563  OMP_ord_dynamic_chunked = 67,
564  OMP_ord_guided_chunked = 68,
565  OMP_ord_runtime = 69,
566  OMP_ord_auto = 70,
567  OMP_sch_default = OMP_sch_static,
568  /// dist_schedule types
569  OMP_dist_sch_static_chunked = 91,
570  OMP_dist_sch_static = 92,
571  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
572  /// Set if the monotonic schedule modifier was present.
573  OMP_sch_modifier_monotonic = (1 << 29),
574  /// Set if the nonmonotonic schedule modifier was present.
575  OMP_sch_modifier_nonmonotonic = (1 << 30),
576 };
577 
578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
579 /// region.
580 class CleanupTy final : public EHScopeStack::Cleanup {
581  PrePostActionTy *Action;
582 
583 public:
584  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
585  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
586  if (!CGF.HaveInsertPoint())
587  return;
588  Action->Exit(CGF);
589  }
590 };
591 
592 } // anonymous namespace
593 
596  if (PrePostAction) {
597  CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
598  Callback(CodeGen, CGF, *PrePostAction);
599  } else {
600  PrePostActionTy Action;
601  Callback(CodeGen, CGF, Action);
602  }
603 }
604 
605 /// Check if the combiner is a call to UDR combiner and if it is so return the
606 /// UDR decl used for reduction.
607 static const OMPDeclareReductionDecl *
608 getReductionInit(const Expr *ReductionOp) {
609  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
610  if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
611  if (const auto *DRE =
612  dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
613  if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
614  return DRD;
615  return nullptr;
616 }
617 
619  const OMPDeclareReductionDecl *DRD,
620  const Expr *InitOp,
621  Address Private, Address Original,
622  QualType Ty) {
623  if (DRD->getInitializer()) {
624  std::pair<llvm::Function *, llvm::Function *> Reduction =
626  const auto *CE = cast<CallExpr>(InitOp);
627  const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
628  const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
629  const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
630  const auto *LHSDRE =
631  cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
632  const auto *RHSDRE =
633  cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
634  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
635  PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
636  [=]() { return Private; });
637  PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
638  [=]() { return Original; });
639  (void)PrivateScope.Privatize();
640  RValue Func = RValue::get(Reduction.second);
641  CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642  CGF.EmitIgnoredExpr(InitOp);
643  } else {
644  llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645  std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646  auto *GV = new llvm::GlobalVariable(
647  CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648  llvm::GlobalValue::PrivateLinkage, Init, Name);
649  LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650  RValue InitRVal;
651  switch (CGF.getEvaluationKind(Ty)) {
652  case TEK_Scalar:
653  InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654  break;
655  case TEK_Complex:
656  InitRVal =
658  break;
659  case TEK_Aggregate: {
660  OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662  CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663  /*IsInitializer=*/false);
664  return;
665  }
666  }
667  OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669  CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670  /*IsInitializer=*/false);
671  }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680  QualType Type, bool EmitDeclareReductionInit,
681  const Expr *Init,
682  const OMPDeclareReductionDecl *DRD,
683  Address SrcAddr = Address::invalid()) {
684  // Perform element-by-element initialization.
685  QualType ElementTy;
686 
687  // Drill down to the base element type on both arrays.
688  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690  DestAddr =
691  CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
692  if (DRD)
693  SrcAddr =
694  CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695 
696  llvm::Value *SrcBegin = nullptr;
697  if (DRD)
698  SrcBegin = SrcAddr.getPointer();
699  llvm::Value *DestBegin = DestAddr.getPointer();
700  // Cast from pointer to array type to pointer to single element.
701  llvm::Value *DestEnd =
702  CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
703  // The basic structure here is a while-do loop.
704  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
705  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
706  llvm::Value *IsEmpty =
707  CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
708  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
709 
710  // Enter the loop body, making that address the current address.
711  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
712  CGF.EmitBlock(BodyBB);
713 
714  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
715 
716  llvm::PHINode *SrcElementPHI = nullptr;
717  Address SrcElementCurrent = Address::invalid();
718  if (DRD) {
719  SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
720  "omp.arraycpy.srcElementPast");
721  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
722  SrcElementCurrent =
723  Address(SrcElementPHI,
724  SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
725  }
726  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
727  DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
728  DestElementPHI->addIncoming(DestBegin, EntryBB);
729  Address DestElementCurrent =
730  Address(DestElementPHI,
731  DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
732 
733  // Emit copy.
734  {
735  CodeGenFunction::RunCleanupsScope InitScope(CGF);
736  if (EmitDeclareReductionInit) {
737  emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
738  SrcElementCurrent, ElementTy);
739  } else
740  CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
741  /*IsInitializer=*/false);
742  }
743 
744  if (DRD) {
745  // Shift the address forward by one element.
746  llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
747  SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
748  "omp.arraycpy.dest.element");
749  SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
750  }
751 
752  // Shift the address forward by one element.
753  llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
754  DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
755  "omp.arraycpy.dest.element");
756  // Check whether we've reached the end.
757  llvm::Value *Done =
758  CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
759  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
760  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
761 
762  // Done.
763  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
764 }
765 
766 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
767  return CGF.EmitOMPSharedLValue(E);
768 }
769 
770 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
771  const Expr *E) {
772  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
773  return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
774  return LValue();
775 }
776 
777 void ReductionCodeGen::emitAggregateInitialization(
778  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
779  const OMPDeclareReductionDecl *DRD) {
780  // Emit VarDecl with copy init for arrays.
781  // Get the address of the original variable captured in current
782  // captured region.
783  const auto *PrivateVD =
784  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
785  bool EmitDeclareReductionInit =
786  DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
787  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
788  EmitDeclareReductionInit,
789  EmitDeclareReductionInit ? ClausesData[N].ReductionOp
790  : PrivateVD->getInit(),
791  DRD, SharedLVal.getAddress(CGF));
792 }
793 
796  ArrayRef<const Expr *> Privates,
797  ArrayRef<const Expr *> ReductionOps) {
798  ClausesData.reserve(Shareds.size());
799  SharedAddresses.reserve(Shareds.size());
800  Sizes.reserve(Shareds.size());
801  BaseDecls.reserve(Shareds.size());
802  const auto *IOrig = Origs.begin();
803  const auto *IPriv = Privates.begin();
804  const auto *IRed = ReductionOps.begin();
805  for (const Expr *Ref : Shareds) {
806  ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
807  std::advance(IOrig, 1);
808  std::advance(IPriv, 1);
809  std::advance(IRed, 1);
810  }
811 }
812 
814  assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
815  "Number of generated lvalues must be exactly N.");
816  LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
817  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
818  SharedAddresses.emplace_back(First, Second);
819  if (ClausesData[N].Shared == ClausesData[N].Ref) {
820  OrigAddresses.emplace_back(First, Second);
821  } else {
822  LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
823  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
824  OrigAddresses.emplace_back(First, Second);
825  }
826 }
827 
829  const auto *PrivateVD =
830  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
831  QualType PrivateType = PrivateVD->getType();
832  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
833  if (!PrivateType->isVariablyModifiedType()) {
834  Sizes.emplace_back(
835  CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
836  nullptr);
837  return;
838  }
839  llvm::Value *Size;
840  llvm::Value *SizeInChars;
841  auto *ElemType =
842  cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
843  ->getElementType();
844  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
845  if (AsArraySection) {
846  Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
847  OrigAddresses[N].first.getPointer(CGF));
848  Size = CGF.Builder.CreateNUWAdd(
849  Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
850  SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
851  } else {
852  SizeInChars =
853  CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
854  Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
855  }
856  Sizes.emplace_back(SizeInChars, Size);
858  CGF,
859  cast<OpaqueValueExpr>(
860  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
861  RValue::get(Size));
862  CGF.EmitVariablyModifiedType(PrivateType);
863 }
864 
866  llvm::Value *Size) {
867  const auto *PrivateVD =
868  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
869  QualType PrivateType = PrivateVD->getType();
870  if (!PrivateType->isVariablyModifiedType()) {
871  assert(!Size && !Sizes[N].second &&
872  "Size should be nullptr for non-variably modified reduction "
873  "items.");
874  return;
875  }
877  CGF,
878  cast<OpaqueValueExpr>(
879  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
880  RValue::get(Size));
881  CGF.EmitVariablyModifiedType(PrivateType);
882 }
883 
885  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
886  llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
887  assert(SharedAddresses.size() > N && "No variable was generated");
888  const auto *PrivateVD =
889  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
890  const OMPDeclareReductionDecl *DRD =
891  getReductionInit(ClausesData[N].ReductionOp);
892  QualType PrivateType = PrivateVD->getType();
893  PrivateAddr = CGF.Builder.CreateElementBitCast(
894  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
895  QualType SharedType = SharedAddresses[N].first.getType();
896  SharedLVal = CGF.MakeAddrLValue(
897  CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
898  CGF.ConvertTypeForMem(SharedType)),
899  SharedType, SharedAddresses[N].first.getBaseInfo(),
900  CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
901  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
902  if (DRD && DRD->getInitializer())
903  (void)DefaultInit(CGF);
904  emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
905  } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
906  (void)DefaultInit(CGF);
907  emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
908  PrivateAddr, SharedLVal.getAddress(CGF),
909  SharedLVal.getType());
910  } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
911  !CGF.isTrivialInitializer(PrivateVD->getInit())) {
912  CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
913  PrivateVD->getType().getQualifiers(),
914  /*IsInitializer=*/false);
915  }
916 }
917 
919  const auto *PrivateVD =
920  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
921  QualType PrivateType = PrivateVD->getType();
922  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
923  return DTorKind != QualType::DK_none;
924 }
925 
927  Address PrivateAddr) {
928  const auto *PrivateVD =
929  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
930  QualType PrivateType = PrivateVD->getType();
931  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
932  if (needCleanups(N)) {
933  PrivateAddr = CGF.Builder.CreateElementBitCast(
934  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
935  CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
936  }
937 }
938 
940  LValue BaseLV) {
941  BaseTy = BaseTy.getNonReferenceType();
942  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
943  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
944  if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
945  BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
946  } else {
947  LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
948  BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
949  }
950  BaseTy = BaseTy->getPointeeType();
951  }
952  return CGF.MakeAddrLValue(
953  CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
954  CGF.ConvertTypeForMem(ElTy)),
955  BaseLV.getType(), BaseLV.getBaseInfo(),
956  CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
957 }
958 
960  llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
961  llvm::Value *Addr) {
962  Address Tmp = Address::invalid();
963  Address TopTmp = Address::invalid();
964  Address MostTopTmp = Address::invalid();
965  BaseTy = BaseTy.getNonReferenceType();
966  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
967  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
968  Tmp = CGF.CreateMemTemp(BaseTy);
969  if (TopTmp.isValid())
970  CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
971  else
972  MostTopTmp = Tmp;
973  TopTmp = Tmp;
974  BaseTy = BaseTy->getPointeeType();
975  }
976  llvm::Type *Ty = BaseLVType;
977  if (Tmp.isValid())
978  Ty = Tmp.getElementType();
979  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
980  if (Tmp.isValid()) {
981  CGF.Builder.CreateStore(Addr, Tmp);
982  return MostTopTmp;
983  }
984  return Address(Addr, BaseLVAlignment);
985 }
986 
987 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
988  const VarDecl *OrigVD = nullptr;
989  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
990  const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
991  while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
992  Base = TempOASE->getBase()->IgnoreParenImpCasts();
993  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
994  Base = TempASE->getBase()->IgnoreParenImpCasts();
995  DE = cast<DeclRefExpr>(Base);
996  OrigVD = cast<VarDecl>(DE->getDecl());
997  } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
998  const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
999  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1000  Base = TempASE->getBase()->IgnoreParenImpCasts();
1001  DE = cast<DeclRefExpr>(Base);
1002  OrigVD = cast<VarDecl>(DE->getDecl());
1003  }
1004  return OrigVD;
1005 }
1006 
1008  Address PrivateAddr) {
1009  const DeclRefExpr *DE;
1010  if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1011  BaseDecls.emplace_back(OrigVD);
1012  LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1013  LValue BaseLValue =
1014  loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1015  OriginalBaseLValue);
1016  Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1017  llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1018  BaseLValue.getPointer(CGF), SharedAddr.getPointer());
1019  llvm::Value *PrivatePointer =
1021  PrivateAddr.getPointer(), SharedAddr.getType());
1022  llvm::Value *Ptr = CGF.Builder.CreateGEP(
1023  SharedAddr.getElementType(), PrivatePointer, Adjustment);
1024  return castToBase(CGF, OrigVD->getType(),
1025  SharedAddresses[N].first.getType(),
1026  OriginalBaseLValue.getAddress(CGF).getType(),
1027  OriginalBaseLValue.getAlignment(), Ptr);
1028  }
1029  BaseDecls.emplace_back(
1030  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1031  return PrivateAddr;
1032 }
1033 
1035  const OMPDeclareReductionDecl *DRD =
1036  getReductionInit(ClausesData[N].ReductionOp);
1037  return DRD && DRD->getInitializer();
1038 }
1039 
1040 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1041  return CGF.EmitLoadOfPointerLValue(
1042  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1043  getThreadIDVariable()->getType()->castAs<PointerType>());
1044 }
1045 
1046 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1047  if (!CGF.HaveInsertPoint())
1048  return;
1049  // 1.2.2 OpenMP Language Terminology
1050  // Structured block - An executable statement with a single entry at the
1051  // top and a single exit at the bottom.
1052  // The point of exit cannot be a branch out of the structured block.
1053  // longjmp() and throw() must not violate the entry/exit criteria.
1054  CGF.EHStack.pushTerminate();
1055  if (S)
1056  CGF.incrementProfileCounter(S);
1057  CodeGen(CGF);
1058  CGF.EHStack.popTerminate();
1059 }
1060 
1061 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1062  CodeGenFunction &CGF) {
1063  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1064  getThreadIDVariable()->getType(),
1066 }
1067 
1069  QualType FieldTy) {
1070  auto *Field = FieldDecl::Create(
1071  C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1072  C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1073  /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1074  Field->setAccess(AS_public);
1075  DC->addDecl(Field);
1076  return Field;
1077 }
1078 
1079 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1080  StringRef Separator)
1081  : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1082  OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1083  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1084 
1085  // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1086  OMPBuilder.initialize();
1087  loadOffloadInfoMetadata();
1088 }
1089 
1091  InternalVars.clear();
1092  // Clean non-target variable declarations possibly used only in debug info.
1093  for (const auto &Data : EmittedNonTargetVariables) {
1094  if (!Data.getValue().pointsToAliveValue())
1095  continue;
1096  auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1097  if (!GV)
1098  continue;
1099  if (!GV->isDeclaration() || GV->getNumUses() > 0)
1100  continue;
1101  GV->eraseFromParent();
1102  }
1103 }
1104 
1106  SmallString<128> Buffer;
1107  llvm::raw_svector_ostream OS(Buffer);
1108  StringRef Sep = FirstSeparator;
1109  for (StringRef Part : Parts) {
1110  OS << Sep << Part;
1111  Sep = Separator;
1112  }
1113  return std::string(OS.str());
1114 }
1115 
1116 static llvm::Function *
1118  const Expr *CombinerInitializer, const VarDecl *In,
1119  const VarDecl *Out, bool IsCombiner) {
1120  // void .omp_combiner.(Ty *in, Ty *out);
1121  ASTContext &C = CGM.getContext();
1122  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1123  FunctionArgList Args;
1124  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1125  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1126  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1127  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1128  Args.push_back(&OmpOutParm);
1129  Args.push_back(&OmpInParm);
1130  const CGFunctionInfo &FnInfo =
1131  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1132  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1133  std::string Name = CGM.getOpenMPRuntime().getName(
1134  {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1135  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1136  Name, &CGM.getModule());
1137  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1138  if (CGM.getLangOpts().Optimize) {
1139  Fn->removeFnAttr(llvm::Attribute::NoInline);
1140  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1141  Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1142  }
1143  CodeGenFunction CGF(CGM);
1144  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1145  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1146  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1147  Out->getLocation());
1149  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1150  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1151  return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1152  .getAddress(CGF);
1153  });
1154  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1155  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1156  return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1157  .getAddress(CGF);
1158  });
1159  (void)Scope.Privatize();
1160  if (!IsCombiner && Out->hasInit() &&
1161  !CGF.isTrivialInitializer(Out->getInit())) {
1162  CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1163  Out->getType().getQualifiers(),
1164  /*IsInitializer=*/true);
1165  }
1166  if (CombinerInitializer)
1167  CGF.EmitIgnoredExpr(CombinerInitializer);
1168  Scope.ForceCleanup();
1169  CGF.FinishFunction();
1170  return Fn;
1171 }
1172 
1174  CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1175  if (UDRMap.count(D) > 0)
1176  return;
1177  llvm::Function *Combiner = emitCombinerOrInitializer(
1178  CGM, D->getType(), D->getCombiner(),
1179  cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1180  cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1181  /*IsCombiner=*/true);
1182  llvm::Function *Initializer = nullptr;
1183  if (const Expr *Init = D->getInitializer()) {
1185  CGM, D->getType(),
1187  : nullptr,
1188  cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1189  cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1190  /*IsCombiner=*/false);
1191  }
1192  UDRMap.try_emplace(D, Combiner, Initializer);
1193  if (CGF) {
1194  auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1195  Decls.second.push_back(D);
1196  }
1197 }
1198 
1199 std::pair<llvm::Function *, llvm::Function *>
1201  auto I = UDRMap.find(D);
1202  if (I != UDRMap.end())
1203  return I->second;
1204  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1205  return UDRMap.lookup(D);
1206 }
1207 
1208 namespace {
1209 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1210 // Builder if one is present.
1211 struct PushAndPopStackRAII {
1212  PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1213  bool HasCancel, llvm::omp::Directive Kind)
1214  : OMPBuilder(OMPBuilder) {
1215  if (!OMPBuilder)
1216  return;
1217 
1218  // The following callback is the crucial part of clangs cleanup process.
1219  //
1220  // NOTE:
1221  // Once the OpenMPIRBuilder is used to create parallel regions (and
1222  // similar), the cancellation destination (Dest below) is determined via
1223  // IP. That means if we have variables to finalize we split the block at IP,
1224  // use the new block (=BB) as destination to build a JumpDest (via
1225  // getJumpDestInCurrentScope(BB)) which then is fed to
1226  // EmitBranchThroughCleanup. Furthermore, there will not be the need
1227  // to push & pop an FinalizationInfo object.
1228  // The FiniCB will still be needed but at the point where the
1229  // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1230  auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1231  assert(IP.getBlock()->end() == IP.getPoint() &&
1232  "Clang CG should cause non-terminated block!");
1233  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1234  CGF.Builder.restoreIP(IP);
1236  CGF.getOMPCancelDestination(OMPD_parallel);
1237  CGF.EmitBranchThroughCleanup(Dest);
1238  };
1239 
1240  // TODO: Remove this once we emit parallel regions through the
1241  // OpenMPIRBuilder as it can do this setup internally.
1242  llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1243  OMPBuilder->pushFinalizationCB(std::move(FI));
1244  }
1245  ~PushAndPopStackRAII() {
1246  if (OMPBuilder)
1247  OMPBuilder->popFinalizationCB();
1248  }
1249  llvm::OpenMPIRBuilder *OMPBuilder;
1250 };
1251 } // namespace
1252 
1254  CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1255  const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1256  const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1257  assert(ThreadIDVar->getType()->isPointerType() &&
1258  "thread id variable must be of type kmp_int32 *");
1259  CodeGenFunction CGF(CGM, true);
1260  bool HasCancel = false;
1261  if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1262  HasCancel = OPD->hasCancel();
1263  else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1264  HasCancel = OPD->hasCancel();
1265  else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1266  HasCancel = OPSD->hasCancel();
1267  else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1268  HasCancel = OPFD->hasCancel();
1269  else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1270  HasCancel = OPFD->hasCancel();
1271  else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1272  HasCancel = OPFD->hasCancel();
1273  else if (const auto *OPFD =
1274  dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1275  HasCancel = OPFD->hasCancel();
1276  else if (const auto *OPFD =
1277  dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1278  HasCancel = OPFD->hasCancel();
1279 
1280  // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1281  // parallel region to make cancellation barriers work properly.
1282  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1283  PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1284  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1285  HasCancel, OutlinedHelperName);
1286  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1287  return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1288 }
1289 
1291  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1293  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1295  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1296 }
1297 
1299  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1300  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1301  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1303  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1304 }
1305 
1307  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1308  const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1309  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1310  bool Tied, unsigned &NumberOfParts) {
1311  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1312  PrePostActionTy &) {
1313  llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1314  llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1315  llvm::Value *TaskArgs[] = {
1316  UpLoc, ThreadID,
1317  CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1318  TaskTVar->getType()->castAs<PointerType>())
1319  .getPointer(CGF)};
1320  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1321  CGM.getModule(), OMPRTL___kmpc_omp_task),
1322  TaskArgs);
1323  };
1324  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1325  UntiedCodeGen);
1326  CodeGen.setAction(Action);
1327  assert(!ThreadIDVar->getType()->isPointerType() &&
1328  "thread id variable must be of type kmp_int32 for tasks");
1329  const OpenMPDirectiveKind Region =
1330  isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1331  : OMPD_task;
1332  const CapturedStmt *CS = D.getCapturedStmt(Region);
1333  bool HasCancel = false;
1334  if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1335  HasCancel = TD->hasCancel();
1336  else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1337  HasCancel = TD->hasCancel();
1338  else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1339  HasCancel = TD->hasCancel();
1340  else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1341  HasCancel = TD->hasCancel();
1342 
1343  CodeGenFunction CGF(CGM, true);
1344  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1345  InnermostKind, HasCancel, Action);
1346  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1347  llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1348  if (!Tied)
1349  NumberOfParts = Action.getNumberOfParts();
1350  return Res;
1351 }
1352 
1354  const RecordDecl *RD, const CGRecordLayout &RL,
1356  llvm::StructType *StructTy = RL.getLLVMType();
1357  unsigned PrevIdx = 0;
1358  ConstantInitBuilder CIBuilder(CGM);
1359  auto DI = Data.begin();
1360  for (const FieldDecl *FD : RD->fields()) {
1361  unsigned Idx = RL.getLLVMFieldNo(FD);
1362  // Fill the alignment.
1363  for (unsigned I = PrevIdx; I < Idx; ++I)
1364  Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1365  PrevIdx = Idx + 1;
1366  Fields.add(*DI);
1367  ++DI;
1368  }
1369 }
1370 
1371 template <class... As>
1372 static llvm::GlobalVariable *
1373 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1374  ArrayRef<llvm::Constant *> Data, const Twine &Name,
1375  As &&... Args) {
1376  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1377  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1378  ConstantInitBuilder CIBuilder(CGM);
1379  ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1380  buildStructValue(Fields, CGM, RD, RL, Data);
1381  return Fields.finishAndCreateGlobal(
1382  Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1383  std::forward<As>(Args)...);
1384 }
1385 
1386 template <typename T>
1387 static void
1390  T &Parent) {
1391  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1392  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1393  ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1394  buildStructValue(Fields, CGM, RD, RL, Data);
1395  Fields.finishAndAddTo(Parent);
1396 }
1397 
1399  bool AtCurrentPoint) {
1400  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1401  assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1402 
1403  llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1404  if (AtCurrentPoint) {
1405  Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1406  Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1407  } else {
1408  Elem.second.ServiceInsertPt =
1409  new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1410  Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1411  }
1412 }
1413 
1415  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1416  if (Elem.second.ServiceInsertPt) {
1417  llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1418  Elem.second.ServiceInsertPt = nullptr;
1419  Ptr->eraseFromParent();
1420  }
1421 }
1422 
1424  SourceLocation Loc,
1425  SmallString<128> &Buffer) {
1426  llvm::raw_svector_ostream OS(Buffer);
1427  // Build debug location
1429  OS << ";" << PLoc.getFilename() << ";";
1430  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1431  OS << FD->getQualifiedNameAsString();
1432  OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1433  return OS.str();
1434 }
1435 
1437  SourceLocation Loc,
1438  unsigned Flags) {
1439  llvm::Constant *SrcLocStr;
1440  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1441  Loc.isInvalid()) {
1442  SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1443  } else {
1444  std::string FunctionName = "";
1445  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1446  FunctionName = FD->getQualifiedNameAsString();
1448  const char *FileName = PLoc.getFilename();
1449  unsigned Line = PLoc.getLine();
1450  unsigned Column = PLoc.getColumn();
1451  SrcLocStr =
1452  OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column);
1453  }
1454  unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1455  return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1456  Reserved2Flags);
1457 }
1458 
1460  SourceLocation Loc) {
1461  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1462  // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1463  // the clang invariants used below might be broken.
1464  if (CGM.getLangOpts().OpenMPIRBuilder) {
1465  SmallString<128> Buffer;
1466  OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1467  auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1468  getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1469  return OMPBuilder.getOrCreateThreadID(
1470  OMPBuilder.getOrCreateIdent(SrcLocStr));
1471  }
1472 
1473  llvm::Value *ThreadID = nullptr;
1474  // Check whether we've already cached a load of the thread id in this
1475  // function.
1476  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1477  if (I != OpenMPLocThreadIDMap.end()) {
1478  ThreadID = I->second.ThreadID;
1479  if (ThreadID != nullptr)
1480  return ThreadID;
1481  }
1482  // If exceptions are enabled, do not use parameter to avoid possible crash.
1483  if (auto *OMPRegionInfo =
1484  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1485  if (OMPRegionInfo->getThreadIDVariable()) {
1486  // Check if this an outlined function with thread id passed as argument.
1487  LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1488  llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1489  if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1490  !CGF.getLangOpts().CXXExceptions ||
1491  CGF.Builder.GetInsertBlock() == TopBlock ||
1492  !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1493  cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1494  TopBlock ||
1495  cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1496  CGF.Builder.GetInsertBlock()) {
1497  ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1498  // If value loaded in entry block, cache it and use it everywhere in
1499  // function.
1500  if (CGF.Builder.GetInsertBlock() == TopBlock) {
1501  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1502  Elem.second.ThreadID = ThreadID;
1503  }
1504  return ThreadID;
1505  }
1506  }
1507  }
1508 
1509  // This is not an outlined function region - need to call __kmpc_int32
1510  // kmpc_global_thread_num(ident_t *loc).
1511  // Generate thread id value and cache this value for use across the
1512  // function.
1513  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1514  if (!Elem.second.ServiceInsertPt)
1516  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1517  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1518  llvm::CallInst *Call = CGF.Builder.CreateCall(
1519  OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1520  OMPRTL___kmpc_global_thread_num),
1521  emitUpdateLocation(CGF, Loc));
1522  Call->setCallingConv(CGF.getRuntimeCC());
1523  Elem.second.ThreadID = Call;
1524  return Call;
1525 }
1526 
1528  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1529  if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1531  OpenMPLocThreadIDMap.erase(CGF.CurFn);
1532  }
1533  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1534  for(const auto *D : FunctionUDRMap[CGF.CurFn])
1535  UDRMap.erase(D);
1536  FunctionUDRMap.erase(CGF.CurFn);
1537  }
1538  auto I = FunctionUDMMap.find(CGF.CurFn);
1539  if (I != FunctionUDMMap.end()) {
1540  for(const auto *D : I->second)
1541  UDMMap.erase(D);
1542  FunctionUDMMap.erase(I);
1543  }
1544  LastprivateConditionalToTypes.erase(CGF.CurFn);
1545  FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1546 }
1547 
1549  return OMPBuilder.IdentPtr;
1550 }
1551 
1552 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1553  if (!Kmpc_MicroTy) {
1554  // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1555  llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1556  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1557  Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1558  }
1559  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1560 }
1561 
1562 llvm::FunctionCallee
1563 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1564  bool IsGPUDistribute) {
1565  assert((IVSize == 32 || IVSize == 64) &&
1566  "IV size is not compatible with the omp runtime");
1567  StringRef Name;
1568  if (IsGPUDistribute)
1569  Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1570  : "__kmpc_distribute_static_init_4u")
1571  : (IVSigned ? "__kmpc_distribute_static_init_8"
1572  : "__kmpc_distribute_static_init_8u");
1573  else
1574  Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1575  : "__kmpc_for_static_init_4u")
1576  : (IVSigned ? "__kmpc_for_static_init_8"
1577  : "__kmpc_for_static_init_8u");
1578 
1579  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1580  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1581  llvm::Type *TypeParams[] = {
1582  getIdentTyPointerTy(), // loc
1583  CGM.Int32Ty, // tid
1584  CGM.Int32Ty, // schedtype
1585  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1586  PtrTy, // p_lower
1587  PtrTy, // p_upper
1588  PtrTy, // p_stride
1589  ITy, // incr
1590  ITy // chunk
1591  };
1592  auto *FnTy =
1593  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1594  return CGM.CreateRuntimeFunction(FnTy, Name);
1595 }
1596 
1597 llvm::FunctionCallee
1598 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1599  assert((IVSize == 32 || IVSize == 64) &&
1600  "IV size is not compatible with the omp runtime");
1601  StringRef Name =
1602  IVSize == 32
1603  ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1604  : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1605  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1606  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1607  CGM.Int32Ty, // tid
1608  CGM.Int32Ty, // schedtype
1609  ITy, // lower
1610  ITy, // upper
1611  ITy, // stride
1612  ITy // chunk
1613  };
1614  auto *FnTy =
1615  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1616  return CGM.CreateRuntimeFunction(FnTy, Name);
1617 }
1618 
1619 llvm::FunctionCallee
1620 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1621  assert((IVSize == 32 || IVSize == 64) &&
1622  "IV size is not compatible with the omp runtime");
1623  StringRef Name =
1624  IVSize == 32
1625  ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1626  : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1627  llvm::Type *TypeParams[] = {
1628  getIdentTyPointerTy(), // loc
1629  CGM.Int32Ty, // tid
1630  };
1631  auto *FnTy =
1632  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1633  return CGM.CreateRuntimeFunction(FnTy, Name);
1634 }
1635 
1636 llvm::FunctionCallee
1637 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1638  assert((IVSize == 32 || IVSize == 64) &&
1639  "IV size is not compatible with the omp runtime");
1640  StringRef Name =
1641  IVSize == 32
1642  ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1643  : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1644  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1645  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1646  llvm::Type *TypeParams[] = {
1647  getIdentTyPointerTy(), // loc
1648  CGM.Int32Ty, // tid
1649  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1650  PtrTy, // p_lower
1651  PtrTy, // p_upper
1652  PtrTy // p_stride
1653  };
1654  auto *FnTy =
1655  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1656  return CGM.CreateRuntimeFunction(FnTy, Name);
1657 }
1658 
1659 /// Obtain information that uniquely identifies a target entry. This
1660 /// consists of the file and device IDs as well as line number associated with
1661 /// the relevant entry source location.
1663  unsigned &DeviceID, unsigned &FileID,
1664  unsigned &LineNum) {
1665  SourceManager &SM = C.getSourceManager();
1666 
1667  // The loc should be always valid and have a file ID (the user cannot use
1668  // #pragma directives in macros)
1669 
1670  assert(Loc.isValid() && "Source location is expected to be always valid.");
1671 
1672  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1673  assert(PLoc.isValid() && "Source location is expected to be always valid.");
1674 
1675  llvm::sys::fs::UniqueID ID;
1676  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1677  PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1678  assert(PLoc.isValid() && "Source location is expected to be always valid.");
1679  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1680  SM.getDiagnostics().Report(diag::err_cannot_open_file)
1681  << PLoc.getFilename() << EC.message();
1682  }
1683 
1684  DeviceID = ID.getDevice();
1685  FileID = ID.getFile();
1686  LineNum = PLoc.getLine();
1687 }
1688 
1690  if (CGM.getLangOpts().OpenMPSimd)
1691  return Address::invalid();
1693  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1694  if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1695  (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1696  HasRequiresUnifiedSharedMemory))) {
1697  SmallString<64> PtrName;
1698  {
1699  llvm::raw_svector_ostream OS(PtrName);
1700  OS << CGM.getMangledName(GlobalDecl(VD));
1701  if (!VD->isExternallyVisible()) {
1702  unsigned DeviceID, FileID, Line;
1704  VD->getCanonicalDecl()->getBeginLoc(),
1705  DeviceID, FileID, Line);
1706  OS << llvm::format("_%x", FileID);
1707  }
1708  OS << "_decl_tgt_ref_ptr";
1709  }
1710  llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1711  if (!Ptr) {
1712  QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1713  Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1714  PtrName);
1715 
1716  auto *GV = cast<llvm::GlobalVariable>(Ptr);
1717  GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1718 
1719  if (!CGM.getLangOpts().OpenMPIsDevice)
1720  GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1721  registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1722  }
1723  return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1724  }
1725  return Address::invalid();
1726 }
1727 
1728 llvm::Constant *
1729 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1730  assert(!CGM.getLangOpts().OpenMPUseTLS ||
1732  // Lookup the entry, lazily creating it if necessary.
1733  std::string Suffix = getName({"cache", ""});
1734  return getOrCreateInternalVariable(
1735  CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1736 }
1737 
1739  const VarDecl *VD,
1740  Address VDAddr,
1741  SourceLocation Loc) {
1742  if (CGM.getLangOpts().OpenMPUseTLS &&
1744  return VDAddr;
1745 
1746  llvm::Type *VarTy = VDAddr.getElementType();
1747  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1748  CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1749  CGM.Int8PtrTy),
1751  getOrCreateThreadPrivateCache(VD)};
1752  return Address(CGF.EmitRuntimeCall(
1753  OMPBuilder.getOrCreateRuntimeFunction(
1754  CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1755  Args),
1756  VDAddr.getAlignment());
1757 }
1758 
1759 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1760  CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1761  llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1762  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1763  // library.
1764  llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1765  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1766  CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1767  OMPLoc);
1768  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1769  // to register constructor/destructor for variable.
1770  llvm::Value *Args[] = {
1771  OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1772  Ctor, CopyCtor, Dtor};
1773  CGF.EmitRuntimeCall(
1774  OMPBuilder.getOrCreateRuntimeFunction(
1775  CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1776  Args);
1777 }
1778 
1780  const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1781  bool PerformInit, CodeGenFunction *CGF) {
1782  if (CGM.getLangOpts().OpenMPUseTLS &&
1784  return nullptr;
1785 
1786  VD = VD->getDefinition(CGM.getContext());
1787  if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1788  QualType ASTTy = VD->getType();
1789 
1790  llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1791  const Expr *Init = VD->getAnyInitializer();
1792  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1793  // Generate function that re-emits the declaration's initializer into the
1794  // threadprivate copy of the variable VD
1795  CodeGenFunction CtorCGF(CGM);
1796  FunctionArgList Args;
1797  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1798  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1800  Args.push_back(&Dst);
1801 
1802  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1803  CGM.getContext().VoidPtrTy, Args);
1804  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1805  std::string Name = getName({"__kmpc_global_ctor_", ""});
1806  llvm::Function *Fn =
1807  CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1808  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1809  Args, Loc, Loc);
1810  llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1811  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1812  CGM.getContext().VoidPtrTy, Dst.getLocation());
1813  Address Arg = Address(ArgVal, VDAddr.getAlignment());
1814  Arg = CtorCGF.Builder.CreateElementBitCast(
1815  Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1816  CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1817  /*IsInitializer=*/true);
1818  ArgVal = CtorCGF.EmitLoadOfScalar(
1819  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1820  CGM.getContext().VoidPtrTy, Dst.getLocation());
1821  CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1822  CtorCGF.FinishFunction();
1823  Ctor = Fn;
1824  }
1825  if (VD->getType().isDestructedType() != QualType::DK_none) {
1826  // Generate function that emits destructor call for the threadprivate copy
1827  // of the variable VD
1828  CodeGenFunction DtorCGF(CGM);
1829  FunctionArgList Args;
1830  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1831  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1833  Args.push_back(&Dst);
1834 
1835  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1836  CGM.getContext().VoidTy, Args);
1837  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1838  std::string Name = getName({"__kmpc_global_dtor_", ""});
1839  llvm::Function *Fn =
1840  CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1841  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1842  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1843  Loc, Loc);
1844  // Create a scope with an artificial location for the body of this function.
1845  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1846  llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1847  DtorCGF.GetAddrOfLocalVar(&Dst),
1848  /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1849  DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1850  DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1851  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1852  DtorCGF.FinishFunction();
1853  Dtor = Fn;
1854  }
1855  // Do not emit init function if it is not required.
1856  if (!Ctor && !Dtor)
1857  return nullptr;
1858 
1859  llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1860  auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1861  /*isVarArg=*/false)
1862  ->getPointerTo();
1863  // Copying constructor for the threadprivate variable.
1864  // Must be NULL - reserved by runtime, but currently it requires that this
1865  // parameter is always NULL. Otherwise it fires assertion.
1866  CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1867  if (Ctor == nullptr) {
1868  auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1869  /*isVarArg=*/false)
1870  ->getPointerTo();
1871  Ctor = llvm::Constant::getNullValue(CtorTy);
1872  }
1873  if (Dtor == nullptr) {
1874  auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1875  /*isVarArg=*/false)
1876  ->getPointerTo();
1877  Dtor = llvm::Constant::getNullValue(DtorTy);
1878  }
1879  if (!CGF) {
1880  auto *InitFunctionTy =
1881  llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1882  std::string Name = getName({"__omp_threadprivate_init_", ""});
1883  llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1884  InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1885  CodeGenFunction InitCGF(CGM);
1886  FunctionArgList ArgList;
1887  InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1888  CGM.getTypes().arrangeNullaryFunction(), ArgList,
1889  Loc, Loc);
1890  emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1891  InitCGF.FinishFunction();
1892  return InitFunction;
1893  }
1894  emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1895  }
1896  return nullptr;
1897 }
1898 
1900  llvm::GlobalVariable *Addr,
1901  bool PerformInit) {
1902  if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1903  !CGM.getLangOpts().OpenMPIsDevice)
1904  return false;
1906  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1907  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1908  (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1909  HasRequiresUnifiedSharedMemory))
1910  return CGM.getLangOpts().OpenMPIsDevice;
1911  VD = VD->getDefinition(CGM.getContext());
1912  assert(VD && "Unknown VarDecl");
1913 
1914  if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1915  return CGM.getLangOpts().OpenMPIsDevice;
1916 
1917  QualType ASTTy = VD->getType();
1919 
1920  // Produce the unique prefix to identify the new target regions. We use
1921  // the source location of the variable declaration which we know to not
1922  // conflict with any target region.
1923  unsigned DeviceID;
1924  unsigned FileID;
1925  unsigned Line;
1926  getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1927  SmallString<128> Buffer, Out;
1928  {
1929  llvm::raw_svector_ostream OS(Buffer);
1930  OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1931  << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1932  }
1933 
1934  const Expr *Init = VD->getAnyInitializer();
1935  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1936  llvm::Constant *Ctor;
1937  llvm::Constant *ID;
1938  if (CGM.getLangOpts().OpenMPIsDevice) {
1939  // Generate function that re-emits the declaration's initializer into
1940  // the threadprivate copy of the variable VD
1941  CodeGenFunction CtorCGF(CGM);
1942 
1944  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1945  llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1946  FTy, Twine(Buffer, "_ctor"), FI, Loc);
1947  auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1948  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1949  FunctionArgList(), Loc, Loc);
1950  auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1951  CtorCGF.EmitAnyExprToMem(Init,
1952  Address(Addr, CGM.getContext().getDeclAlign(VD)),
1953  Init->getType().getQualifiers(),
1954  /*IsInitializer=*/true);
1955  CtorCGF.FinishFunction();
1956  Ctor = Fn;
1957  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1958  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1959  } else {
1960  Ctor = new llvm::GlobalVariable(
1961  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1962  llvm::GlobalValue::PrivateLinkage,
1963  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1964  ID = Ctor;
1965  }
1966 
1967  // Register the information for the entry associated with the constructor.
1968  Out.clear();
1969  OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1970  DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1971  ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1972  }
1973  if (VD->getType().isDestructedType() != QualType::DK_none) {
1974  llvm::Constant *Dtor;
1975  llvm::Constant *ID;
1976  if (CGM.getLangOpts().OpenMPIsDevice) {
1977  // Generate function that emits destructor call for the threadprivate
1978  // copy of the variable VD
1979  CodeGenFunction DtorCGF(CGM);
1980 
1982  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1983  llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1984  FTy, Twine(Buffer, "_dtor"), FI, Loc);
1985  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1986  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1987  FunctionArgList(), Loc, Loc);
1988  // Create a scope with an artificial location for the body of this
1989  // function.
1990  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1991  DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1992  ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1993  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1994  DtorCGF.FinishFunction();
1995  Dtor = Fn;
1996  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1997  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1998  } else {
1999  Dtor = new llvm::GlobalVariable(
2000  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2001  llvm::GlobalValue::PrivateLinkage,
2002  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2003  ID = Dtor;
2004  }
2005  // Register the information for the entry associated with the destructor.
2006  Out.clear();
2007  OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2008  DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2009  ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2010  }
2011  return CGM.getLangOpts().OpenMPIsDevice;
2012 }
2013 
2015  QualType VarType,
2016  StringRef Name) {
2017  std::string Suffix = getName({"artificial", ""});
2018  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2019  llvm::Value *GAddr =
2020  getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2021  if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2023  cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2024  return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2025  }
2026  std::string CacheSuffix = getName({"cache", ""});
2027  llvm::Value *Args[] = {
2029  getThreadID(CGF, SourceLocation()),
2031  CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2032  /*isSigned=*/false),
2033  getOrCreateInternalVariable(
2034  CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2035  return Address(
2037  CGF.EmitRuntimeCall(
2038  OMPBuilder.getOrCreateRuntimeFunction(
2039  CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2040  Args),
2041  VarLVType->getPointerTo(/*AddrSpace=*/0)),
2042  CGM.getContext().getTypeAlignInChars(VarType));
2043 }
2044 
2046  const RegionCodeGenTy &ThenGen,
2047  const RegionCodeGenTy &ElseGen) {
2048  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2049 
2050  // If the condition constant folds and can be elided, try to avoid emitting
2051  // the condition and the dead arm of the if/else.
2052  bool CondConstant;
2053  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2054  if (CondConstant)
2055  ThenGen(CGF);
2056  else
2057  ElseGen(CGF);
2058  return;
2059  }
2060 
2061  // Otherwise, the condition did not fold, or we couldn't elide it. Just
2062  // emit the conditional branch.
2063  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2064  llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2065  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2066  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2067 
2068  // Emit the 'then' code.
2069  CGF.EmitBlock(ThenBlock);
2070  ThenGen(CGF);
2071  CGF.EmitBranch(ContBlock);
2072  // Emit the 'else' code if present.
2073  // There is no need to emit line number for unconditional branch.
2075  CGF.EmitBlock(ElseBlock);
2076  ElseGen(CGF);
2077  // There is no need to emit line number for unconditional branch.
2079  CGF.EmitBranch(ContBlock);
2080  // Emit the continuation block for code after the if.
2081  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2082 }
2083 
2085  llvm::Function *OutlinedFn,
2086  ArrayRef<llvm::Value *> CapturedVars,
2087  const Expr *IfCond) {
2088  if (!CGF.HaveInsertPoint())
2089  return;
2090  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2091  auto &M = CGM.getModule();
2092  auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2093  this](CodeGenFunction &CGF, PrePostActionTy &) {
2094  // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2095  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2096  llvm::Value *Args[] = {
2097  RTLoc,
2098  CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2099  CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2101  RealArgs.append(std::begin(Args), std::end(Args));
2102  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2103 
2104  llvm::FunctionCallee RTLFn =
2105  OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2106  CGF.EmitRuntimeCall(RTLFn, RealArgs);
2107  };
2108  auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2109  this](CodeGenFunction &CGF, PrePostActionTy &) {
2110  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2111  llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2112  // Build calls:
2113  // __kmpc_serialized_parallel(&Loc, GTid);
2114  llvm::Value *Args[] = {RTLoc, ThreadID};
2115  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2116  M, OMPRTL___kmpc_serialized_parallel),
2117  Args);
2118 
2119  // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2120  Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2121  Address ZeroAddrBound =
2122  CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2123  /*Name=*/".bound.zero.addr");
2124  CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2125  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2126  // ThreadId for serialized parallels is 0.
2127  OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2128  OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2129  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2130 
2131  // Ensure we do not inline the function. This is trivially true for the ones
2132  // passed to __kmpc_fork_call but the ones called in serialized regions
2133  // could be inlined. This is not a perfect but it is closer to the invariant
2134  // we want, namely, every data environment starts with a new function.
2135  // TODO: We should pass the if condition to the runtime function and do the
2136  // handling there. Much cleaner code.
2137  OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2138  OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2139  RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2140 
2141  // __kmpc_end_serialized_parallel(&Loc, GTid);
2142  llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2143  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2144  M, OMPRTL___kmpc_end_serialized_parallel),
2145  EndArgs);
2146  };
2147  if (IfCond) {
2148  emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2149  } else {
2150  RegionCodeGenTy ThenRCG(ThenGen);
2151  ThenRCG(CGF);
2152  }
2153 }
2154 
2155 // If we're inside an (outlined) parallel region, use the region info's
2156 // thread-ID variable (it is passed in a first argument of the outlined function
2157 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2158 // regular serial code region, get thread ID by calling kmp_int32
2159 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2160 // return the address of that temp.
2162  SourceLocation Loc) {
2163  if (auto *OMPRegionInfo =
2164  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2165  if (OMPRegionInfo->getThreadIDVariable())
2166  return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2167 
2168  llvm::Value *ThreadID = getThreadID(CGF, Loc);
2169  QualType Int32Ty =
2170  CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2171  Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2172  CGF.EmitStoreOfScalar(ThreadID,
2173  CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2174 
2175  return ThreadIDTemp;
2176 }
2177 
2178 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2179  llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2180  SmallString<256> Buffer;
2181  llvm::raw_svector_ostream Out(Buffer);
2182  Out << Name;
2183  StringRef RuntimeName = Out.str();
2184  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2185  if (Elem.second) {
2186  assert(Elem.second->getType()->getPointerElementType() == Ty &&
2187  "OMP internal variable has different type than requested");
2188  return &*Elem.second;
2189  }
2190 
2191  return Elem.second = new llvm::GlobalVariable(
2192  CGM.getModule(), Ty, /*IsConstant*/ false,
2193  llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2194  Elem.first(), /*InsertBefore=*/nullptr,
2195  llvm::GlobalValue::NotThreadLocal, AddressSpace);
2196 }
2197 
2199  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2200  std::string Name = getName({Prefix, "var"});
2201  return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2202 }
2203 
2204 namespace {
2205 /// Common pre(post)-action for different OpenMP constructs.
2206 class CommonActionTy final : public PrePostActionTy {
2207  llvm::FunctionCallee EnterCallee;
2208  ArrayRef<llvm::Value *> EnterArgs;
2209  llvm::FunctionCallee ExitCallee;
2210  ArrayRef<llvm::Value *> ExitArgs;
2211  bool Conditional;
2212  llvm::BasicBlock *ContBlock = nullptr;
2213 
2214 public:
2215  CommonActionTy(llvm::FunctionCallee EnterCallee,
2216  ArrayRef<llvm::Value *> EnterArgs,
2217  llvm::FunctionCallee ExitCallee,
2218  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2219  : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2220  ExitArgs(ExitArgs), Conditional(Conditional) {}
2221  void Enter(CodeGenFunction &CGF) override {
2222  llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2223  if (Conditional) {
2224  llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2225  auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2226  ContBlock = CGF.createBasicBlock("omp_if.end");
2227  // Generate the branch (If-stmt)
2228  CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2229  CGF.EmitBlock(ThenBlock);
2230  }
2231  }
2232  void Done(CodeGenFunction &CGF) {
2233  // Emit the rest of blocks/branches
2234  CGF.EmitBranch(ContBlock);
2235  CGF.EmitBlock(ContBlock, true);
2236  }
2237  void Exit(CodeGenFunction &CGF) override {
2238  CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2239  }
2240 };
2241 } // anonymous namespace
2242 
2244  StringRef CriticalName,
2245  const RegionCodeGenTy &CriticalOpGen,
2246  SourceLocation Loc, const Expr *Hint) {
2247  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2248  // CriticalOpGen();
2249  // __kmpc_end_critical(ident_t *, gtid, Lock);
2250  // Prepare arguments and build a call to __kmpc_critical
2251  if (!CGF.HaveInsertPoint())
2252  return;
2253  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2254  getCriticalRegionLock(CriticalName)};
2255  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2256  std::end(Args));
2257  if (Hint) {
2258  EnterArgs.push_back(CGF.Builder.CreateIntCast(
2259  CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2260  }
2261  CommonActionTy Action(
2262  OMPBuilder.getOrCreateRuntimeFunction(
2263  CGM.getModule(),
2264  Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2265  EnterArgs,
2266  OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2267  OMPRTL___kmpc_end_critical),
2268  Args);
2269  CriticalOpGen.setAction(Action);
2270  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2271 }
2272 
2274  const RegionCodeGenTy &MasterOpGen,
2275  SourceLocation Loc) {
2276  if (!CGF.HaveInsertPoint())
2277  return;
2278  // if(__kmpc_master(ident_t *, gtid)) {
2279  // MasterOpGen();
2280  // __kmpc_end_master(ident_t *, gtid);
2281  // }
2282  // Prepare arguments and build a call to __kmpc_master
2283  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2284  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2285  CGM.getModule(), OMPRTL___kmpc_master),
2286  Args,
2287  OMPBuilder.getOrCreateRuntimeFunction(
2288  CGM.getModule(), OMPRTL___kmpc_end_master),
2289  Args,
2290  /*Conditional=*/true);
2291  MasterOpGen.setAction(Action);
2292  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2293  Action.Done(CGF);
2294 }
2295 
2297  const RegionCodeGenTy &MaskedOpGen,
2298  SourceLocation Loc, const Expr *Filter) {
2299  if (!CGF.HaveInsertPoint())
2300  return;
2301  // if(__kmpc_masked(ident_t *, gtid, filter)) {
2302  // MaskedOpGen();
2303  // __kmpc_end_masked(iden_t *, gtid);
2304  // }
2305  // Prepare arguments and build a call to __kmpc_masked
2306  llvm::Value *FilterVal = Filter
2307  ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2308  : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2309  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2310  FilterVal};
2311  llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2312  getThreadID(CGF, Loc)};
2313  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2314  CGM.getModule(), OMPRTL___kmpc_masked),
2315  Args,
2316  OMPBuilder.getOrCreateRuntimeFunction(
2317  CGM.getModule(), OMPRTL___kmpc_end_masked),
2318  ArgsEnd,
2319  /*Conditional=*/true);
2320  MaskedOpGen.setAction(Action);
2321  emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2322  Action.Done(CGF);
2323 }
2324 
2326  SourceLocation Loc) {
2327  if (!CGF.HaveInsertPoint())
2328  return;
2329  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2330  OMPBuilder.createTaskyield(CGF.Builder);
2331  } else {
2332  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2333  llvm::Value *Args[] = {
2334  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2335  llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2336  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2337  CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2338  Args);
2339  }
2340 
2341  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2342  Region->emitUntiedSwitch(CGF);
2343 }
2344 
2346  const RegionCodeGenTy &TaskgroupOpGen,
2347  SourceLocation Loc) {
2348  if (!CGF.HaveInsertPoint())
2349  return;
2350  // __kmpc_taskgroup(ident_t *, gtid);
2351  // TaskgroupOpGen();
2352  // __kmpc_end_taskgroup(ident_t *, gtid);
2353  // Prepare arguments and build a call to __kmpc_taskgroup
2354  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2355  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2356  CGM.getModule(), OMPRTL___kmpc_taskgroup),
2357  Args,
2358  OMPBuilder.getOrCreateRuntimeFunction(
2359  CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2360  Args);
2361  TaskgroupOpGen.setAction(Action);
2362  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2363 }
2364 
2365 /// Given an array of pointers to variables, project the address of a
2366 /// given variable.
2368  unsigned Index, const VarDecl *Var) {
2369  // Pull out the pointer to the variable.
2370  Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2371  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2372 
2373  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2374  Addr = CGF.Builder.CreateElementBitCast(
2375  Addr, CGF.ConvertTypeForMem(Var->getType()));
2376  return Addr;
2377 }
2378 
2380  CodeGenModule &CGM, llvm::Type *ArgsType,
2381  ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2382  ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2383  SourceLocation Loc) {
2384  ASTContext &C = CGM.getContext();
2385  // void copy_func(void *LHSArg, void *RHSArg);
2386  FunctionArgList Args;
2387  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2389  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2391  Args.push_back(&LHSArg);
2392  Args.push_back(&RHSArg);
2393  const auto &CGFI =
2394  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2395  std::string Name =
2396  CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2397  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2399  &CGM.getModule());
2400  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2401  Fn->setDoesNotRecurse();
2402  CodeGenFunction CGF(CGM);
2403  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2404  // Dest = (void*[n])(LHSArg);
2405  // Src = (void*[n])(RHSArg);
2407  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2408  ArgsType), CGF.getPointerAlign());
2410  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2411  ArgsType), CGF.getPointerAlign());
2412  // *(Type0*)Dst[0] = *(Type0*)Src[0];
2413  // *(Type1*)Dst[1] = *(Type1*)Src[1];
2414  // ...
2415  // *(Typen*)Dst[n] = *(Typen*)Src[n];
2416  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2417  const auto *DestVar =
2418  cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2419  Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2420 
2421  const auto *SrcVar =
2422  cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2423  Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2424 
2425  const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2426  QualType Type = VD->getType();
2427  CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2428  }
2429  CGF.FinishFunction();
2430  return Fn;
2431 }
2432 
2434  const RegionCodeGenTy &SingleOpGen,
2435  SourceLocation Loc,
2436  ArrayRef<const Expr *> CopyprivateVars,
2437  ArrayRef<const Expr *> SrcExprs,
2438  ArrayRef<const Expr *> DstExprs,
2439  ArrayRef<const Expr *> AssignmentOps) {
2440  if (!CGF.HaveInsertPoint())
2441  return;
2442  assert(CopyprivateVars.size() == SrcExprs.size() &&
2443  CopyprivateVars.size() == DstExprs.size() &&
2444  CopyprivateVars.size() == AssignmentOps.size());
2445  ASTContext &C = CGM.getContext();
2446  // int32 did_it = 0;
2447  // if(__kmpc_single(ident_t *, gtid)) {
2448  // SingleOpGen();
2449  // __kmpc_end_single(ident_t *, gtid);
2450  // did_it = 1;
2451  // }
2452  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2453  // <copy_func>, did_it);
2454 
2455  Address DidIt = Address::invalid();
2456  if (!CopyprivateVars.empty()) {
2457  // int32 did_it = 0;
2458  QualType KmpInt32Ty =
2459  C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2460  DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2461  CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2462  }
2463  // Prepare arguments and build a call to __kmpc_single
2464  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2465  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2466  CGM.getModule(), OMPRTL___kmpc_single),
2467  Args,
2468  OMPBuilder.getOrCreateRuntimeFunction(
2469  CGM.getModule(), OMPRTL___kmpc_end_single),
2470  Args,
2471  /*Conditional=*/true);
2472  SingleOpGen.setAction(Action);
2473  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2474  if (DidIt.isValid()) {
2475  // did_it = 1;
2476  CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2477  }
2478  Action.Done(CGF);
2479  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2480  // <copy_func>, did_it);
2481  if (DidIt.isValid()) {
2482  llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2483  QualType CopyprivateArrayTy = C.getConstantArrayType(
2484  C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2485  /*IndexTypeQuals=*/0);
2486  // Create a list of all private variables for copyprivate.
2487  Address CopyprivateList =
2488  CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2489  for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2490  Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2491  CGF.Builder.CreateStore(
2493  CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2494  CGF.VoidPtrTy),
2495  Elem);
2496  }
2497  // Build function that copies private values from single region to all other
2498  // threads in the corresponding parallel region.
2500  CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2501  CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2502  llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2503  Address CL =
2504  CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2505  CGF.VoidPtrTy);
2506  llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2507  llvm::Value *Args[] = {
2508  emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2509  getThreadID(CGF, Loc), // i32 <gtid>
2510  BufSize, // size_t <buf_size>
2511  CL.getPointer(), // void *<copyprivate list>
2512  CpyFn, // void (*) (void *, void *) <copy_func>
2513  DidItVal // i32 did_it
2514  };
2515  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2516  CGM.getModule(), OMPRTL___kmpc_copyprivate),
2517  Args);
2518  }
2519 }
2520 
2522  const RegionCodeGenTy &OrderedOpGen,
2523  SourceLocation Loc, bool IsThreads) {
2524  if (!CGF.HaveInsertPoint())
2525  return;
2526  // __kmpc_ordered(ident_t *, gtid);
2527  // OrderedOpGen();
2528  // __kmpc_end_ordered(ident_t *, gtid);
2529  // Prepare arguments and build a call to __kmpc_ordered
2530  if (IsThreads) {
2531  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2532  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2533  CGM.getModule(), OMPRTL___kmpc_ordered),
2534  Args,
2535  OMPBuilder.getOrCreateRuntimeFunction(
2536  CGM.getModule(), OMPRTL___kmpc_end_ordered),
2537  Args);
2538  OrderedOpGen.setAction(Action);
2539  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2540  return;
2541  }
2542  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2543 }
2544 
2546  unsigned Flags;
2547  if (Kind == OMPD_for)
2548  Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2549  else if (Kind == OMPD_sections)
2550  Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2551  else if (Kind == OMPD_single)
2552  Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2553  else if (Kind == OMPD_barrier)
2554  Flags = OMP_IDENT_BARRIER_EXPL;
2555  else
2556  Flags = OMP_IDENT_BARRIER_IMPL;
2557  return Flags;
2558 }
2559 
2561  CodeGenFunction &CGF, const OMPLoopDirective &S,
2562  OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2563  // Check if the loop directive is actually a doacross loop directive. In this
2564  // case choose static, 1 schedule.
2565  if (llvm::any_of(
2566  S.getClausesOfKind<OMPOrderedClause>(),
2567  [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2568  ScheduleKind = OMPC_SCHEDULE_static;
2569  // Chunk size is 1 in this case.
2570  llvm::APInt ChunkSize(32, 1);
2571  ChunkExpr = IntegerLiteral::Create(
2572  CGF.getContext(), ChunkSize,
2573  CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2574  SourceLocation());
2575  }
2576 }
2577 
2579  OpenMPDirectiveKind Kind, bool EmitChecks,
2580  bool ForceSimpleCall) {
2581  // Check if we should use the OMPBuilder
2582  auto *OMPRegionInfo =
2583  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2584  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2585  CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2586  CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2587  return;
2588  }
2589 
2590  if (!CGF.HaveInsertPoint())
2591  return;
2592  // Build call __kmpc_cancel_barrier(loc, thread_id);
2593  // Build call __kmpc_barrier(loc, thread_id);
2594  unsigned Flags = getDefaultFlagsForBarriers(Kind);
2595  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2596  // thread_id);
2597  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2598  getThreadID(CGF, Loc)};
2599  if (OMPRegionInfo) {
2600  if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2601  llvm::Value *Result = CGF.EmitRuntimeCall(
2602  OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2603  OMPRTL___kmpc_cancel_barrier),
2604  Args);
2605  if (EmitChecks) {
2606  // if (__kmpc_cancel_barrier()) {
2607  // exit from construct;
2608  // }
2609  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2610  llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2611  llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2612  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2613  CGF.EmitBlock(ExitBB);
2614  // exit from construct;
2615  CodeGenFunction::JumpDest CancelDestination =
2616  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2617  CGF.EmitBranchThroughCleanup(CancelDestination);
2618  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2619  }
2620  return;
2621  }
2622  }
2623  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2624  CGM.getModule(), OMPRTL___kmpc_barrier),
2625  Args);
2626 }
2627 
2628 /// Map the OpenMP loop schedule to the runtime enumeration.
2629 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2630  bool Chunked, bool Ordered) {
2631  switch (ScheduleKind) {
2632  case OMPC_SCHEDULE_static:
2633  return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2634  : (Ordered ? OMP_ord_static : OMP_sch_static);
2635  case OMPC_SCHEDULE_dynamic:
2636  return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2637  case OMPC_SCHEDULE_guided:
2638  return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2639  case OMPC_SCHEDULE_runtime:
2640  return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2641  case OMPC_SCHEDULE_auto:
2642  return Ordered ? OMP_ord_auto : OMP_sch_auto;
2643  case OMPC_SCHEDULE_unknown:
2644  assert(!Chunked && "chunk was specified but schedule kind not known");
2645  return Ordered ? OMP_ord_static : OMP_sch_static;
2646  }
2647  llvm_unreachable("Unexpected runtime schedule");
2648 }
2649 
2650 /// Map the OpenMP distribute schedule to the runtime enumeration.
2651 static OpenMPSchedType
2653  // only static is allowed for dist_schedule
2654  return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2655 }
2656 
2658  bool Chunked) const {
2659  OpenMPSchedType Schedule =
2660  getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2661  return Schedule == OMP_sch_static;
2662 }
2663 
2665  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2666  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2667  return Schedule == OMP_dist_sch_static;
2668 }
2669 
2671  bool Chunked) const {
2672  OpenMPSchedType Schedule =
2673  getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2674  return Schedule == OMP_sch_static_chunked;
2675 }
2676 
2678  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2679  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2680  return Schedule == OMP_dist_sch_static_chunked;
2681 }
2682 
2684  OpenMPSchedType Schedule =
2685  getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2686  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2687  return Schedule != OMP_sch_static;
2688 }
2689 
2690 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2693  int Modifier = 0;
2694  switch (M1) {
2695  case OMPC_SCHEDULE_MODIFIER_monotonic:
2696  Modifier = OMP_sch_modifier_monotonic;
2697  break;
2698  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2699  Modifier = OMP_sch_modifier_nonmonotonic;
2700  break;
2701  case OMPC_SCHEDULE_MODIFIER_simd:
2702  if (Schedule == OMP_sch_static_chunked)
2703  Schedule = OMP_sch_static_balanced_chunked;
2704  break;
2707  break;
2708  }
2709  switch (M2) {
2710  case OMPC_SCHEDULE_MODIFIER_monotonic:
2711  Modifier = OMP_sch_modifier_monotonic;
2712  break;
2713  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2714  Modifier = OMP_sch_modifier_nonmonotonic;
2715  break;
2716  case OMPC_SCHEDULE_MODIFIER_simd:
2717  if (Schedule == OMP_sch_static_chunked)
2718  Schedule = OMP_sch_static_balanced_chunked;
2719  break;
2722  break;
2723  }
2724  // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2725  // If the static schedule kind is specified or if the ordered clause is
2726  // specified, and if the nonmonotonic modifier is not specified, the effect is
2727  // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2728  // modifier is specified, the effect is as if the nonmonotonic modifier is
2729  // specified.
2730  if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2731  if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2732  Schedule == OMP_sch_static_balanced_chunked ||
2733  Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2734  Schedule == OMP_dist_sch_static_chunked ||
2735  Schedule == OMP_dist_sch_static))
2736  Modifier = OMP_sch_modifier_nonmonotonic;
2737  }
2738  return Schedule | Modifier;
2739 }
2740 
2742  CodeGenFunction &CGF, SourceLocation Loc,
2743  const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2744  bool Ordered, const DispatchRTInput &DispatchValues) {
2745  if (!CGF.HaveInsertPoint())
2746  return;
2747  OpenMPSchedType Schedule = getRuntimeSchedule(
2748  ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2749  assert(Ordered ||
2750  (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2751  Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2752  Schedule != OMP_sch_static_balanced_chunked));
2753  // Call __kmpc_dispatch_init(
2754  // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2755  // kmp_int[32|64] lower, kmp_int[32|64] upper,
2756  // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2757 
2758  // If the Chunk was not specified in the clause - use default value 1.
2759  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2760  : CGF.Builder.getIntN(IVSize, 1);
2761  llvm::Value *Args[] = {
2762  emitUpdateLocation(CGF, Loc),
2763  getThreadID(CGF, Loc),
2764  CGF.Builder.getInt32(addMonoNonMonoModifier(
2765  CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2766  DispatchValues.LB, // Lower
2767  DispatchValues.UB, // Upper
2768  CGF.Builder.getIntN(IVSize, 1), // Stride
2769  Chunk // Chunk
2770  };
2771  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2772 }
2773 
2775  CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2776  llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2778  const CGOpenMPRuntime::StaticRTInput &Values) {
2779  if (!CGF.HaveInsertPoint())
2780  return;
2781 
2782  assert(!Values.Ordered);
2783  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2784  Schedule == OMP_sch_static_balanced_chunked ||
2785  Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2786  Schedule == OMP_dist_sch_static ||
2787  Schedule == OMP_dist_sch_static_chunked);
2788 
2789  // Call __kmpc_for_static_init(
2790  // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2791  // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2792  // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2793  // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2794  llvm::Value *Chunk = Values.Chunk;
2795  if (Chunk == nullptr) {
2796  assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2797  Schedule == OMP_dist_sch_static) &&
2798  "expected static non-chunked schedule");
2799  // If the Chunk was not specified in the clause - use default value 1.
2800  Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2801  } else {
2802  assert((Schedule == OMP_sch_static_chunked ||
2803  Schedule == OMP_sch_static_balanced_chunked ||
2804  Schedule == OMP_ord_static_chunked ||
2805  Schedule == OMP_dist_sch_static_chunked) &&
2806  "expected static chunked schedule");
2807  }
2808  llvm::Value *Args[] = {
2809  UpdateLocation,
2810  ThreadId,
2811  CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2812  M2)), // Schedule type
2813  Values.IL.getPointer(), // &isLastIter
2814  Values.LB.getPointer(), // &LB
2815  Values.UB.getPointer(), // &UB
2816  Values.ST.getPointer(), // &Stride
2817  CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2818  Chunk // Chunk
2819  };
2820  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2821 }
2822 
2824  SourceLocation Loc,
2825  OpenMPDirectiveKind DKind,
2826  const OpenMPScheduleTy &ScheduleKind,
2827  const StaticRTInput &Values) {
2828  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2829  ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2830  assert(isOpenMPWorksharingDirective(DKind) &&
2831  "Expected loop-based or sections-based directive.");
2832  llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2833  isOpenMPLoopDirective(DKind)
2834  ? OMP_IDENT_WORK_LOOP
2835  : OMP_IDENT_WORK_SECTIONS);
2836  llvm::Value *ThreadId = getThreadID(CGF, Loc);
2837  llvm::FunctionCallee StaticInitFunction =
2838  createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2839  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2840  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2841  ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2842 }
2843 
2845  CodeGenFunction &CGF, SourceLocation Loc,
2846  OpenMPDistScheduleClauseKind SchedKind,
2847  const CGOpenMPRuntime::StaticRTInput &Values) {
2848  OpenMPSchedType ScheduleNum =
2849  getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2850  llvm::Value *UpdatedLocation =
2851  emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2852  llvm::Value *ThreadId = getThreadID(CGF, Loc);
2853  llvm::FunctionCallee StaticInitFunction;
2854  bool isGPUDistribute =
2855  CGM.getLangOpts().OpenMPIsDevice &&
2856  (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2857  StaticInitFunction = createForStaticInitFunction(
2858  Values.IVSize, Values.IVSigned, isGPUDistribute);
2859 
2860  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2861  ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2863 }
2864 
2866  SourceLocation Loc,
2867  OpenMPDirectiveKind DKind) {
2868  if (!CGF.HaveInsertPoint())
2869  return;
2870  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2871  llvm::Value *Args[] = {
2872  emitUpdateLocation(CGF, Loc,
2874  ? OMP_IDENT_WORK_DISTRIBUTE
2875  : isOpenMPLoopDirective(DKind)
2876  ? OMP_IDENT_WORK_LOOP
2877  : OMP_IDENT_WORK_SECTIONS),
2878  getThreadID(CGF, Loc)};
2879  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2880  if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2881  (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2882  CGF.EmitRuntimeCall(
2883  OMPBuilder.getOrCreateRuntimeFunction(
2884  CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2885  Args);
2886  else
2887  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2888  CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2889  Args);
2890 }
2891 
2893  SourceLocation Loc,
2894  unsigned IVSize,
2895  bool IVSigned) {
2896  if (!CGF.HaveInsertPoint())
2897  return;
2898  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2899  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2900  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2901 }
2902 
2904  SourceLocation Loc, unsigned IVSize,
2905  bool IVSigned, Address IL,
2906  Address LB, Address UB,
2907  Address ST) {
2908  // Call __kmpc_dispatch_next(
2909  // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2910  // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2911  // kmp_int[32|64] *p_stride);
2912  llvm::Value *Args[] = {
2913  emitUpdateLocation(CGF, Loc),
2914  getThreadID(CGF, Loc),
2915  IL.getPointer(), // &isLastIter
2916  LB.getPointer(), // &Lower
2917  UB.getPointer(), // &Upper
2918  ST.getPointer() // &Stride
2919  };
2920  llvm::Value *Call =
2921  CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2922  return CGF.EmitScalarConversion(
2923  Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2924  CGF.getContext().BoolTy, Loc);
2925 }
2926 
2928  llvm::Value *NumThreads,
2929  SourceLocation Loc) {
2930  if (!CGF.HaveInsertPoint())
2931  return;
2932  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2933  llvm::Value *Args[] = {
2934  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2935  CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2936  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2937  CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2938  Args);
2939 }
2940 
2942  ProcBindKind ProcBind,
2943  SourceLocation Loc) {
2944  if (!CGF.HaveInsertPoint())
2945  return;
2946  assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2947  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2948  llvm::Value *Args[] = {
2949  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2950  llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2951  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2952  CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2953  Args);
2954 }
2955 
2957  SourceLocation Loc, llvm::AtomicOrdering AO) {
2958  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2959  OMPBuilder.createFlush(CGF.Builder);
2960  } else {
2961  if (!CGF.HaveInsertPoint())
2962  return;
2963  // Build call void __kmpc_flush(ident_t *loc)
2964  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2965  CGM.getModule(), OMPRTL___kmpc_flush),
2966  emitUpdateLocation(CGF, Loc));
2967  }
2968 }
2969 
2970 namespace {
2971 /// Indexes of fields for type kmp_task_t.
2972 enum KmpTaskTFields {
2973  /// List of shared variables.
2974  KmpTaskTShareds,
2975  /// Task routine.
2976  KmpTaskTRoutine,
2977  /// Partition id for the untied tasks.
2978  KmpTaskTPartId,
2979  /// Function with call of destructors for private variables.
2980  Data1,
2981  /// Task priority.
2982  Data2,
2983  /// (Taskloops only) Lower bound.
2984  KmpTaskTLowerBound,
2985  /// (Taskloops only) Upper bound.
2986  KmpTaskTUpperBound,
2987  /// (Taskloops only) Stride.
2988  KmpTaskTStride,
2989  /// (Taskloops only) Is last iteration flag.
2990  KmpTaskTLastIter,
2991  /// (Taskloops only) Reduction data.
2992  KmpTaskTReductions,
2993 };
2994 } // anonymous namespace
2995 
2996 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2997  return OffloadEntriesTargetRegion.empty() &&
2998  OffloadEntriesDeviceGlobalVar.empty();
2999 }
3000 
3001 /// Initialize target region entry.
3002 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3003  initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3004  StringRef ParentName, unsigned LineNum,
3005  unsigned Order) {
3006  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3007  "only required for the device "
3008  "code generation.");
3009  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3010  OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3011  OMPTargetRegionEntryTargetRegion);
3012  ++OffloadingEntriesNum;
3013 }
3014 
3015 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3016  registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3017  StringRef ParentName, unsigned LineNum,
3018  llvm::Constant *Addr, llvm::Constant *ID,
3019  OMPTargetRegionEntryKind Flags) {
3020  // If we are emitting code for a target, the entry is already initialized,
3021  // only has to be registered.
3022  if (CGM.getLangOpts().OpenMPIsDevice) {
3023  // This could happen if the device compilation is invoked standalone.
3024  if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3025  return;
3026  auto &Entry =
3027  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3028  Entry.setAddress(Addr);
3029  Entry.setID(ID);
3030  Entry.setFlags(Flags);
3031  } else {
3032  if (Flags ==
3033  OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3034  hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3035  /*IgnoreAddressId*/ true))
3036  return;
3037  assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3038  "Target region entry already registered!");
3039  OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3040  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3041  ++OffloadingEntriesNum;
3042  }
3043 }
3044 
3045 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3046  unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3047  bool IgnoreAddressId) const {
3048  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3049  if (PerDevice == OffloadEntriesTargetRegion.end())
3050  return false;
3051  auto PerFile = PerDevice->second.find(FileID);
3052  if (PerFile == PerDevice->second.end())
3053  return false;
3054  auto PerParentName = PerFile->second.find(ParentName);
3055  if (PerParentName == PerFile->second.end())
3056  return false;
3057  auto PerLine = PerParentName->second.find(LineNum);
3058  if (PerLine == PerParentName->second.end())
3059  return false;
3060  // Fail if this entry is already registered.
3061  if (!IgnoreAddressId &&
3062  (PerLine->second.getAddress() || PerLine->second.getID()))
3063  return false;
3064  return true;
3065 }
3066 
3067 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3068  const OffloadTargetRegionEntryInfoActTy &Action) {
3069  // Scan all target region entries and perform the provided action.
3070  for (const auto &D : OffloadEntriesTargetRegion)
3071  for (const auto &F : D.second)
3072  for (const auto &P : F.second)
3073  for (const auto &L : P.second)
3074  Action(D.first, F.first, P.first(), L.first, L.second);
3075 }
3076 
3077 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3078  initializeDeviceGlobalVarEntryInfo(StringRef Name,
3079  OMPTargetGlobalVarEntryKind Flags,
3080  unsigned Order) {
3081  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3082  "only required for the device "
3083  "code generation.");
3084  OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3085  ++OffloadingEntriesNum;
3086 }
3087 
3088 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3089  registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3090  CharUnits VarSize,
3091  OMPTargetGlobalVarEntryKind Flags,
3092  llvm::GlobalValue::LinkageTypes Linkage) {
3093  if (CGM.getLangOpts().OpenMPIsDevice) {
3094  // This could happen if the device compilation is invoked standalone.
3095  if (!hasDeviceGlobalVarEntryInfo(VarName))
3096  return;
3097  auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3098  if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3099  if (Entry.getVarSize().isZero()) {
3100  Entry.setVarSize(VarSize);
3101  Entry.setLinkage(Linkage);
3102  }
3103  return;
3104  }
3105  Entry.setVarSize(VarSize);
3106  Entry.setLinkage(Linkage);
3107  Entry.setAddress(Addr);
3108  } else {
3109  if (hasDeviceGlobalVarEntryInfo(VarName)) {
3110  auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3111  assert(Entry.isValid() && Entry.getFlags() == Flags &&
3112  "Entry not initialized!");
3113  if (Entry.getVarSize().isZero()) {
3114  Entry.setVarSize(VarSize);
3115  Entry.setLinkage(Linkage);
3116  }
3117  return;
3118  }
3119  OffloadEntriesDeviceGlobalVar.try_emplace(
3120  VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3121  ++OffloadingEntriesNum;
3122  }
3123 }
3124 
3125 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3126  actOnDeviceGlobalVarEntriesInfo(
3127  const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3128  // Scan all target region entries and perform the provided action.
3129  for (const auto &E : OffloadEntriesDeviceGlobalVar)
3130  Action(E.getKey(), E.getValue());
3131 }
3132 
3134  llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3135  llvm::GlobalValue::LinkageTypes Linkage) {
3136  StringRef Name = Addr->getName();
3137  llvm::Module &M = CGM.getModule();
3138  llvm::LLVMContext &C = M.getContext();
3139 
3140  // Create constant string with the name.
3141  llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3142 
3143  std::string StringName = getName({"omp_offloading", "entry_name"});
3144  auto *Str = new llvm::GlobalVariable(
3145  M, StrPtrInit->getType(), /*isConstant=*/true,
3146  llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3147  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3148 
3149  llvm::Constant *Data[] = {
3150  llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3151  llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3152  llvm::ConstantInt::get(CGM.SizeTy, Size),
3153  llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3154  llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3155  std::string EntryName = getName({"omp_offloading", "entry", ""});
3156  llvm::GlobalVariable *Entry = createGlobalStruct(
3157  CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3158  Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3159 
3160  // The entry has to be created in the section the linker expects it to be.
3161  Entry->setSection("omp_offloading_entries");
3162 }
3163 
3165  // Emit the offloading entries and metadata so that the device codegen side
3166  // can easily figure out what to emit. The produced metadata looks like
3167  // this:
3168  //
3169  // !omp_offload.info = !{!1, ...}
3170  //
3171  // Right now we only generate metadata for function that contain target
3172  // regions.
3173 
3174  // If we are in simd mode or there are no entries, we don't need to do
3175  // anything.
3176  if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3177  return;
3178 
3179  llvm::Module &M = CGM.getModule();
3180  llvm::LLVMContext &C = M.getContext();
3182  SourceLocation, StringRef>,
3183  16>
3184  OrderedEntries(OffloadEntriesInfoManager.size());
3185  llvm::SmallVector<StringRef, 16> ParentFunctions(
3186  OffloadEntriesInfoManager.size());
3187 
3188  // Auxiliary methods to create metadata values and strings.
3189  auto &&GetMDInt = [this](unsigned V) {
3190  return llvm::ConstantAsMetadata::get(
3191  llvm::ConstantInt::get(CGM.Int32Ty, V));
3192  };
3193 
3194  auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3195 
3196  // Create the offloading info metadata node.
3197  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3198 
3199  // Create function that emits metadata for each target region entry;
3200  auto &&TargetRegionMetadataEmitter =
3201  [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3202  &GetMDString](
3203  unsigned DeviceID, unsigned FileID, StringRef ParentName,
3204  unsigned Line,
3206  // Generate metadata for target regions. Each entry of this metadata
3207  // contains:
3208  // - Entry 0 -> Kind of this type of metadata (0).
3209  // - Entry 1 -> Device ID of the file where the entry was identified.
3210  // - Entry 2 -> File ID of the file where the entry was identified.
3211  // - Entry 3 -> Mangled name of the function where the entry was
3212  // identified.
3213  // - Entry 4 -> Line in the file where the entry was identified.
3214  // - Entry 5 -> Order the entry was created.
3215  // The first element of the metadata node is the kind.
3216  llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3217  GetMDInt(FileID), GetMDString(ParentName),
3218  GetMDInt(Line), GetMDInt(E.getOrder())};
3219 
3220  SourceLocation Loc;
3221  for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3223  I != E; ++I) {
3224  if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3225  I->getFirst()->getUniqueID().getFile() == FileID) {
3227  I->getFirst(), Line, 1);
3228  break;
3229  }
3230  }
3231  // Save this entry in the right position of the ordered entries array.
3232  OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3233  ParentFunctions[E.getOrder()] = ParentName;
3234 
3235  // Add metadata to the named metadata node.
3236  MD->addOperand(llvm::MDNode::get(C, Ops));
3237  };
3238 
3239  OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3240  TargetRegionMetadataEmitter);
3241 
3242  // Create function that emits metadata for each device global variable entry;
3243  auto &&DeviceGlobalVarMetadataEmitter =
3244  [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3245  MD](StringRef MangledName,
3247  &E) {
3248  // Generate metadata for global variables. Each entry of this metadata
3249  // contains:
3250  // - Entry 0 -> Kind of this type of metadata (1).
3251  // - Entry 1 -> Mangled name of the variable.
3252  // - Entry 2 -> Declare target kind.
3253  // - Entry 3 -> Order the entry was created.
3254  // The first element of the metadata node is the kind.
3255  llvm::Metadata *Ops[] = {
3256  GetMDInt(E.getKind()), GetMDString(MangledName),
3257  GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3258 
3259  // Save this entry in the right position of the ordered entries array.
3260  OrderedEntries[E.getOrder()] =
3261  std::make_tuple(&E, SourceLocation(), MangledName);
3262 
3263  // Add metadata to the named metadata node.
3264  MD->addOperand(llvm::MDNode::get(C, Ops));
3265  };
3266 
3267  OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3268  DeviceGlobalVarMetadataEmitter);
3269 
3270  for (const auto &E : OrderedEntries) {
3271  assert(std::get<0>(E) && "All ordered entries must exist!");
3272  if (const auto *CE =
3273  dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3274  std::get<0>(E))) {
3275  if (!CE->getID() || !CE->getAddress()) {
3276  // Do not blame the entry if the parent funtion is not emitted.
3277  StringRef FnName = ParentFunctions[CE->getOrder()];
3278  if (!CGM.GetGlobalValue(FnName))
3279  continue;
3280  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3282  "Offloading entry for target region in %0 is incorrect: either the "
3283  "address or the ID is invalid.");
3284  CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3285  continue;
3286  }
3287  createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3288  CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3289  } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3290  OffloadEntryInfoDeviceGlobalVar>(
3291  std::get<0>(E))) {
3292  OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3293  static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3294  CE->getFlags());
3295  switch (Flags) {
3296  case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3297  if (CGM.getLangOpts().OpenMPIsDevice &&
3299  continue;
3300  if (!CE->getAddress()) {
3301  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3302  DiagnosticsEngine::Error, "Offloading entry for declare target "
3303  "variable %0 is incorrect: the "
3304  "address is invalid.");
3305  CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3306  continue;
3307  }
3308  // The vaiable has no definition - no need to add the entry.
3309  if (CE->getVarSize().isZero())
3310  continue;
3311  break;
3312  }
3313  case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3314  assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3315  (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3316  "Declaret target link address is set.");
3317  if (CGM.getLangOpts().OpenMPIsDevice)
3318  continue;
3319  if (!CE->getAddress()) {
3320  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3322  "Offloading entry for declare target variable is incorrect: the "
3323  "address is invalid.");
3324  CGM.getDiags().Report(DiagID);
3325  continue;
3326  }
3327  break;
3328  }
3329  createOffloadEntry(CE->getAddress(), CE->getAddress(),
3330  CE->getVarSize().getQuantity(), Flags,
3331  CE->getLinkage());
3332  } else {
3333  llvm_unreachable("Unsupported entry kind.");
3334  }
3335  }
3336 }
3337 
3338 /// Loads all the offload entries information from the host IR
3339 /// metadata.
3340 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3341  // If we are in target mode, load the metadata from the host IR. This code has
3342  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3343 
3344  if (!CGM.getLangOpts().OpenMPIsDevice)
3345  return;
3346 
3347  if (CGM.getLangOpts().OMPHostIRFile.empty())
3348  return;
3349 
3350  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3351  if (auto EC = Buf.getError()) {
3352  CGM.getDiags().Report(diag::err_cannot_open_file)
3353  << CGM.getLangOpts().OMPHostIRFile << EC.message();
3354  return;
3355  }
3356 
3357  llvm::LLVMContext C;
3358  auto ME = expectedToErrorOrAndEmitErrors(
3359  C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3360 
3361  if (auto EC = ME.getError()) {
3362  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3363  DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3364  CGM.getDiags().Report(DiagID)
3365  << CGM.getLangOpts().OMPHostIRFile << EC.message();
3366  return;
3367  }
3368 
3369  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3370  if (!MD)
3371  return;
3372 
3373  for (llvm::MDNode *MN : MD->operands()) {
3374  auto &&GetMDInt = [MN](unsigned Idx) {
3375  auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3376  return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3377  };
3378 
3379  auto &&GetMDString = [MN](unsigned Idx) {
3380  auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3381  return V->getString();
3382  };
3383 
3384  switch (GetMDInt(0)) {
3385  default:
3386  llvm_unreachable("Unexpected metadata!");
3387  break;
3390  OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3391  /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3392  /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3393  /*Order=*/GetMDInt(5));
3394  break;
3397  OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3398  /*MangledName=*/GetMDString(1),
3399  static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3400  /*Flags=*/GetMDInt(2)),
3401  /*Order=*/GetMDInt(3));
3402  break;
3403  }
3404  }
3405 }
3406 
3407 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3408  if (!KmpRoutineEntryPtrTy) {
3409  // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3410  ASTContext &C = CGM.getContext();
3411  QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3413  KmpRoutineEntryPtrQTy = C.getPointerType(
3414  C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3415  KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3416  }
3417 }
3418 
3419 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3420  // Make sure the type of the entry is already created. This is the type we
3421  // have to create:
3422  // struct __tgt_offload_entry{
3423  // void *addr; // Pointer to the offload entry info.
3424  // // (function or global)
3425  // char *name; // Name of the function or global.
3426  // size_t size; // Size of the entry info (0 if it a function).
3427  // int32_t flags; // Flags associated with the entry, e.g. 'link'.
3428  // int32_t reserved; // Reserved, to use by the runtime library.
3429  // };
3430  if (TgtOffloadEntryQTy.isNull()) {
3431  ASTContext &C = CGM.getContext();
3432  RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3433  RD->startDefinition();
3434  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3435  addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3436  addFieldToRecordDecl(C, RD, C.getSizeType());
3438  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3440  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3441  RD->completeDefinition();
3442  RD->addAttr(PackedAttr::CreateImplicit(C));
3443  TgtOffloadEntryQTy = C.getRecordType(RD);
3444  }
3445  return TgtOffloadEntryQTy;
3446 }
3447 
3448 namespace {
3449 struct PrivateHelpersTy {
3450  PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3451  const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3452  : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3453  PrivateElemInit(PrivateElemInit) {}
3454  PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3455  const Expr *OriginalRef = nullptr;
3456  const VarDecl *Original = nullptr;
3457  const VarDecl *PrivateCopy = nullptr;
3458  const VarDecl *PrivateElemInit = nullptr;
3459  bool isLocalPrivate() const {
3460  return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3461  }
3462 };
3463 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3464 } // anonymous namespace
3465 
3466 static bool isAllocatableDecl(const VarDecl *VD) {
3467  const VarDecl *CVD = VD->getCanonicalDecl();
3468  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3469  return false;
3470  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3471  // Use the default allocation.
3472  return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3473  AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3474  !AA->getAllocator());
3475 }
3476 
3477 static RecordDecl *
3479  if (!Privates.empty()) {
3480  ASTContext &C = CGM.getContext();
3481  // Build struct .kmp_privates_t. {
3482  // /* private vars */
3483  // };
3484  RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3485  RD->startDefinition();
3486  for (const auto &Pair : Privates) {
3487  const VarDecl *VD = Pair.second.Original;
3489  // If the private variable is a local variable with lvalue ref type,
3490  // allocate the pointer instead of the pointee type.
3491  if (Pair.second.isLocalPrivate()) {
3492  if (VD->getType()->isLValueReferenceType())
3493  Type = C.getPointerType(Type);
3494  if (isAllocatableDecl(VD))
3495  Type = C.getPointerType(Type);
3496  }
3497  FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3498  if (VD->hasAttrs()) {
3499  for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3500  E(VD->getAttrs().end());
3501  I != E; ++I)
3502  FD->addAttr(*I);
3503  }
3504  }
3505  RD->completeDefinition();
3506  return RD;
3507  }
3508  return nullptr;
3509 }
3510 
3511 static RecordDecl *
3513  QualType KmpInt32Ty,
3514  QualType KmpRoutineEntryPointerQTy) {
3515  ASTContext &C = CGM.getContext();
3516  // Build struct kmp_task_t {
3517  // void * shareds;
3518  // kmp_routine_entry_t routine;
3519  // kmp_int32 part_id;
3520  // kmp_cmplrdata_t data1;
3521  // kmp_cmplrdata_t data2;
3522  // For taskloops additional fields:
3523  // kmp_uint64 lb;
3524  // kmp_uint64 ub;
3525  // kmp_int64 st;
3526  // kmp_int32 liter;
3527  // void * reductions;
3528  // };
3529  RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3530  UD->startDefinition();
3531  addFieldToRecordDecl(C, UD, KmpInt32Ty);
3532  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3533  UD->completeDefinition();
3534  QualType KmpCmplrdataTy = C.getRecordType(UD);
3535  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3536  RD->startDefinition();
3537  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3538  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3539  addFieldToRecordDecl(C, RD, KmpInt32Ty);
3540  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3541  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3543  QualType KmpUInt64Ty =
3544  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3545  QualType KmpInt64Ty =
3546  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3547  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3548  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3549  addFieldToRecordDecl(C, RD, KmpInt64Ty);
3550  addFieldToRecordDecl(C, RD, KmpInt32Ty);
3551  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3552  }
3553  RD->completeDefinition();
3554  return RD;
3555 }
3556 
3557 static RecordDecl *
3559  ArrayRef<PrivateDataTy> Privates) {
3560  ASTContext &C = CGM.getContext();
3561  // Build struct kmp_task_t_with_privates {
3562  // kmp_task_t task_data;
3563  // .kmp_privates_t. privates;
3564  // };
3565  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3566  RD->startDefinition();
3567  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3568  if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3569  addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3570  RD->completeDefinition();
3571  return RD;
3572 }
3573 
3574 /// Emit a proxy function which accepts kmp_task_t as the second
3575 /// argument.
3576 /// \code
3577 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3578 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3579 /// For taskloops:
3580 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3581 /// tt->reductions, tt->shareds);
3582 /// return 0;
3583 /// }
3584 /// \endcode
3585 static llvm::Function *
3587  OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3588  QualType KmpTaskTWithPrivatesPtrQTy,
3589  QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3590  QualType SharedsPtrTy, llvm::Function *TaskFunction,
3591  llvm::Value *TaskPrivatesMap) {
3592  ASTContext &C = CGM.getContext();
3593  FunctionArgList Args;
3594  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3596  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3597  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3599  Args.push_back(&GtidArg);
3600  Args.push_back(&TaskTypeArg);
3601  const auto &TaskEntryFnInfo =
3602  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3603  llvm::FunctionType *TaskEntryTy =
3604  CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3605  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3606  auto *TaskEntry = llvm::Function::Create(
3607  TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3608  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3609  TaskEntry->setDoesNotRecurse();
3610  CodeGenFunction CGF(CGM);
3611  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3612  Loc, Loc);
3613 
3614  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3615  // tt,
3616  // For taskloops:
3617  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3618  // tt->task_data.shareds);
3619  llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3620  CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3621  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3622  CGF.GetAddrOfLocalVar(&TaskTypeArg),
3623  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3624  const auto *KmpTaskTWithPrivatesQTyRD =
3625  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3626  LValue Base =
3627  CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3628  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3629  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3630  LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3631  llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3632 
3633  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3634  LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3636  CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3637  CGF.ConvertTypeForMem(SharedsPtrTy));
3638 
3639  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3640  llvm::Value *PrivatesParam;
3641  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3642  LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3643  PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3644  PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3645  } else {
3646  PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3647  }
3648 
3649  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3650  TaskPrivatesMap,
3651  CGF.Builder
3653  TDBase.getAddress(CGF), CGF.VoidPtrTy)
3654  .getPointer()};
3655  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3656  std::end(CommonArgs));
3658  auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3659  LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3660  llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3661  auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3662  LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3663  llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3664  auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3665  LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3666  llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3667  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3668  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3669  llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3670  auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3671  LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3672  llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3673  CallArgs.push_back(LBParam);
3674  CallArgs.push_back(UBParam);
3675  CallArgs.push_back(StParam);
3676  CallArgs.push_back(LIParam);
3677  CallArgs.push_back(RParam);
3678  }
3679  CallArgs.push_back(SharedsParam);
3680 
3681  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3682  CallArgs);
3683  CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3684  CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3685  CGF.FinishFunction();
3686  return TaskEntry;
3687 }
3688 
3690  SourceLocation Loc,
3691  QualType KmpInt32Ty,
3692  QualType KmpTaskTWithPrivatesPtrQTy,
3693  QualType KmpTaskTWithPrivatesQTy) {
3694  ASTContext &C = CGM.getContext();
3695  FunctionArgList Args;
3696  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3698  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3699  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3701  Args.push_back(&GtidArg);
3702  Args.push_back(&TaskTypeArg);
3703  const auto &DestructorFnInfo =
3704  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3705  llvm::FunctionType *DestructorFnTy =
3706  CGM.getTypes().GetFunctionType(DestructorFnInfo);
3707  std::string Name =
3708  CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3709  auto *DestructorFn =
3710  llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3711  Name, &CGM.getModule());
3712  CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3713  DestructorFnInfo);
3714  DestructorFn->setDoesNotRecurse();
3715  CodeGenFunction CGF(CGM);
3716  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3717  Args, Loc, Loc);
3718 
3720  CGF.GetAddrOfLocalVar(&TaskTypeArg),
3721  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3722  const auto *KmpTaskTWithPrivatesQTyRD =
3723  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3724  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3725  Base = CGF.EmitLValueForField(Base, *FI);
3726  for (const auto *Field :
3727  cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3728  if (QualType::DestructionKind DtorKind =
3729  Field->getType().isDestructedType()) {
3730  LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3731  CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3732  }
3733  }
3734  CGF.FinishFunction();
3735  return DestructorFn;
3736 }
3737 
3738 /// Emit a privates mapping function for correct handling of private and
3739 /// firstprivate variables.
3740 /// \code
3741 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3742 /// **noalias priv1,..., <tyn> **noalias privn) {
3743 /// *priv1 = &.privates.priv1;
3744 /// ...;
3745 /// *privn = &.privates.privn;
3746 /// }
3747 /// \endcode
3748 static llvm::Value *
3750  const OMPTaskDataTy &Data, QualType PrivatesQTy,
3751  ArrayRef<PrivateDataTy> Privates) {
3752  ASTContext &C = CGM.getContext();
3753  FunctionArgList Args;
3754  ImplicitParamDecl TaskPrivatesArg(
3755  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3756  C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3758  Args.push_back(&TaskPrivatesArg);
3759  llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3760  unsigned Counter = 1;
3761  for (const Expr *E : Data.PrivateVars) {
3762  Args.push_back(ImplicitParamDecl::Create(
3763  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3764  C.getPointerType(C.getPointerType(E->getType()))
3765  .withConst()
3766  .withRestrict(),
3768  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3769  PrivateVarsPos[VD] = Counter;
3770  ++Counter;
3771  }
3772  for (const Expr *E : Data.FirstprivateVars) {
3773  Args.push_back(ImplicitParamDecl::Create(
3774  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3775  C.getPointerType(C.getPointerType(E->getType()))
3776  .withConst()
3777  .withRestrict(),
3779  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3780  PrivateVarsPos[VD] = Counter;
3781  ++Counter;
3782  }
3783  for (const Expr *E : Data.LastprivateVars) {
3784  Args.push_back(ImplicitParamDecl::Create(
3785  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3786  C.getPointerType(C.getPointerType(E->getType()))
3787  .withConst()
3788  .withRestrict(),
3790  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3791  PrivateVarsPos[VD] = Counter;
3792  ++Counter;
3793  }
3794  for (const VarDecl *VD : Data.PrivateLocals) {
3795  QualType Ty = VD->getType().getNonReferenceType();
3796  if (VD->getType()->isLValueReferenceType())
3797  Ty = C.getPointerType(Ty);
3798  if (isAllocatableDecl(VD))
3799  Ty = C.getPointerType(Ty);
3800  Args.push_back(ImplicitParamDecl::Create(
3801  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3802  C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3804  PrivateVarsPos[VD] = Counter;
3805  ++Counter;
3806  }
3807  const auto &TaskPrivatesMapFnInfo =
3808  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3809  llvm::FunctionType *TaskPrivatesMapTy =
3810  CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3811  std::string Name =
3812  CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3813  auto *TaskPrivatesMap = llvm::Function::Create(
3814  TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3815  &CGM.getModule());
3816  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3817  TaskPrivatesMapFnInfo);
3818  if (CGM.getLangOpts().Optimize) {
3819  TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3820  TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3821  TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3822  }
3823  CodeGenFunction CGF(CGM);
3824  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3825  TaskPrivatesMapFnInfo, Args, Loc, Loc);
3826 
3827  // *privi = &.privates.privi;
3829  CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3830  TaskPrivatesArg.getType()->castAs<PointerType>());
3831  const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3832  Counter = 0;
3833  for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3834  LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3835  const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3836  LValue RefLVal =
3837  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3838  LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3839  RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3840  CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3841  ++Counter;
3842  }
3843  CGF.FinishFunction();
3844  return TaskPrivatesMap;
3845 }
3846 
3847 /// Emit initialization for private variables in task-based directives.
3849  const OMPExecutableDirective &D,
3850  Address KmpTaskSharedsPtr, LValue TDBase,
3851  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3852  QualType SharedsTy, QualType SharedsPtrTy,
3853  const OMPTaskDataTy &Data,
3854  ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3855  ASTContext &C = CGF.getContext();
3856  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3857  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3859  ? OMPD_taskloop
3860  : OMPD_task;
3861  const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3862  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3863  LValue SrcBase;
3864  bool IsTargetTask =
3867  // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3868  // PointersArray, SizesArray, and MappersArray. The original variables for
3869  // these arrays are not captured and we get their addresses explicitly.
3870  if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3871  (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3872  SrcBase = CGF.MakeAddrLValue(
3874  KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3875  SharedsTy);
3876  }
3877  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3878  for (const PrivateDataTy &Pair : Privates) {
3879  // Do not initialize private locals.
3880  if (Pair.second.isLocalPrivate()) {
3881  ++FI;
3882  continue;
3883  }
3884  const VarDecl *VD = Pair.second.PrivateCopy;
3885  const Expr *Init = VD->getAnyInitializer();
3886  if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3887  !CGF.isTrivialInitializer(Init)))) {
3888  LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3889  if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3890  const VarDecl *OriginalVD = Pair.second.Original;
3891  // Check if the variable is the target-based BasePointersArray,
3892  // PointersArray, SizesArray, or MappersArray.
3893  LValue SharedRefLValue;
3894  QualType Type = PrivateLValue.getType();
3895  const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3896  if (IsTargetTask && !SharedField) {
3897  assert(isa<ImplicitParamDecl>(OriginalVD) &&
3898  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3899  cast<CapturedDecl>(OriginalVD->getDeclContext())
3900  ->getNumParams() == 0 &&
3901  isa<TranslationUnitDecl>(
3902  cast<CapturedDecl>(OriginalVD->getDeclContext())
3903  ->getDeclContext()) &&
3904  "Expected artificial target data variable.");
3905  SharedRefLValue =
3906  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3907  } else if (ForDup) {
3908  SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3909  SharedRefLValue = CGF.MakeAddrLValue(
3910  Address(SharedRefLValue.getPointer(CGF),
3911  C.getDeclAlign(OriginalVD)),
3912  SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3913  SharedRefLValue.getTBAAInfo());
3914  } else if (CGF.LambdaCaptureFields.count(
3915  Pair.second.Original->getCanonicalDecl()) > 0 ||
3916  dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3917  SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3918  } else {
3919  // Processing for implicitly captured variables.
3920  InlinedOpenMPRegionRAII Region(
3921  CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3922  /*HasCancel=*/false, /*NoInheritance=*/true);
3923  SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3924  }
3925  if (Type->isArrayType()) {
3926  // Initialize firstprivate array.
3927  if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3928  // Perform simple memcpy.
3929  CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3930  } else {
3931  // Initialize firstprivate array using element-by-element
3932  // initialization.
3934  PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3935  Type,
3936  [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3937  Address SrcElement) {
3938  // Clean up any temporaries needed by the initialization.
3939  CodeGenFunction::OMPPrivateScope InitScope(CGF);
3940  InitScope.addPrivate(
3941  Elem, [SrcElement]() -> Address { return SrcElement; });
3942  (void)InitScope.Privatize();
3943  // Emit initialization for single element.
3945  CGF, &CapturesInfo);
3946  CGF.EmitAnyExprToMem(Init, DestElement,
3947  Init->getType().getQualifiers(),
3948  /*IsInitializer=*/false);
3949  });
3950  }
3951  } else {
3952  CodeGenFunction::OMPPrivateScope InitScope(CGF);
3953  InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3954  return SharedRefLValue.getAddress(CGF);
3955  });
3956  (void)InitScope.Privatize();
3957  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3958  CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3959  /*capturedByInit=*/false);
3960  }
3961  } else {
3962  CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3963  }
3964  }
3965  ++FI;
3966  }
3967 }
3968 
3969 /// Check if duplication function is required for taskloops.
3971  ArrayRef<PrivateDataTy> Privates) {
3972  bool InitRequired = false;
3973  for (const PrivateDataTy &Pair : Privates) {
3974  if (Pair.second.isLocalPrivate())
3975  continue;
3976  const VarDecl *VD = Pair.second.PrivateCopy;
3977  const Expr *Init = VD->getAnyInitializer();
3978  InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3979  !CGF.isTrivialInitializer(Init));
3980  if (InitRequired)
3981  break;
3982  }
3983  return InitRequired;
3984 }
3985 
3986 
3987 /// Emit task_dup function (for initialization of
3988 /// private/firstprivate/lastprivate vars and last_iter flag)
3989 /// \code
3990 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3991 /// lastpriv) {
3992 /// // setup lastprivate flag
3993 /// task_dst->last = lastpriv;
3994 /// // could be constructor calls here...
3995 /// }
3996 /// \endcode
3997 static llvm::Value *
3999  const OMPExecutableDirective &D,
4000  QualType KmpTaskTWithPrivatesPtrQTy,
4001  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4002  const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4003  QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4004  ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4005  ASTContext &C = CGM.getContext();
4006  FunctionArgList Args;
4007  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4008  KmpTaskTWithPrivatesPtrQTy,
4010  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4011  KmpTaskTWithPrivatesPtrQTy,
4013  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4015  Args.push_back(&DstArg);
4016  Args.push_back(&SrcArg);
4017  Args.push_back(&LastprivArg);
4018  const auto &TaskDupFnInfo =
4019  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4020  llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4021  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4022  auto *TaskDup = llvm::Function::Create(
4023  TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4024  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4025  TaskDup->setDoesNotRecurse();
4026  CodeGenFunction CGF(CGM);
4027  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4028  Loc);
4029 
4030  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4031  CGF.GetAddrOfLocalVar(&DstArg),
4032  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4033  // task_dst->liter = lastpriv;
4034  if (WithLastIter) {
4035  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4037  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4038  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4039  llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4040  CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4041  CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4042  }
4043 
4044  // Emit initial values for private copies (if any).
4045  assert(!Privates.empty());
4046  Address KmpTaskSharedsPtr = Address::invalid();
4047  if (!Data.FirstprivateVars.empty()) {
4048  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4049  CGF.GetAddrOfLocalVar(&SrcArg),
4050  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4052  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4053  KmpTaskSharedsPtr = Address(
4055  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4056  KmpTaskTShareds)),
4057  Loc),
4058  CGM.getNaturalTypeAlignment(SharedsTy));
4059  }
4060  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4061  SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4062  CGF.FinishFunction();
4063  return TaskDup;
4064 }
4065 
4066 /// Checks if destructor function is required to be generated.
4067 /// \return true if cleanups are required, false otherwise.
4068 static bool
4069 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4070  ArrayRef<PrivateDataTy> Privates) {
4071  for (const PrivateDataTy &P : Privates) {
4072  if (P.second.isLocalPrivate())
4073  continue;
4074  QualType Ty = P.second.Original->getType().getNonReferenceType();
4075  if (Ty.isDestructedType())
4076  return true;
4077  }
4078  return false;
4079 }
4080 
4081 namespace {
4082 /// Loop generator for OpenMP iterator expression.
4083 class OMPIteratorGeneratorScope final
4085  CodeGenFunction &CGF;
4086  const OMPIteratorExpr *E = nullptr;
4089  OMPIteratorGeneratorScope() = delete;
4090  OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4091 
4092 public:
4093  OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4094  : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4095  if (!E)
4096  return;
4098  for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4099  Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4100  const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4101  addPrivate(VD, [&CGF, VD]() {
4102  return CGF.CreateMemTemp(VD->getType(), VD->getName());
4103  });
4104  const OMPIteratorHelperData &HelperData = E->getHelper(I);
4105  addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4106  return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4107  "counter.addr");
4108  });
4109  }
4110  Privatize();
4111 
4112  for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4113  const OMPIteratorHelperData &HelperData = E->getHelper(I);
4114  LValue CLVal =
4115  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4116  HelperData.CounterVD->getType());
4117  // Counter = 0;
4118  CGF.EmitStoreOfScalar(
4119  llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4120  CLVal);
4121  CodeGenFunction::JumpDest &ContDest =
4122  ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4123  CodeGenFunction::JumpDest &ExitDest =
4124  ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4125  // N = <number-of_iterations>;
4126  llvm::Value *N = Uppers[I];
4127  // cont:
4128  // if (Counter < N) goto body; else goto exit;
4129  CGF.EmitBlock(ContDest.getBlock());
4130  auto *CVal =
4131  CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4132  llvm::Value *Cmp =
4134  ? CGF.Builder.CreateICmpSLT(CVal, N)
4135  : CGF.Builder.CreateICmpULT(CVal, N);
4136  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4137  CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4138  // body:
4139  CGF.EmitBlock(BodyBB);
4140  // Iteri = Begini + Counter * Stepi;
4141  CGF.EmitIgnoredExpr(HelperData.Update);
4142  }
4143  }
4144  ~OMPIteratorGeneratorScope() {
4145  if (!E)
4146  return;
4147  for (unsigned I = E->numOfIterators(); I > 0; --I) {
4148  // Counter = Counter + 1;
4149  const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4150  CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4151  // goto cont;
4152  CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4153  // exit:
4154  CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4155  }
4156  }
4157 };
4158 } // namespace
4159 
4160 static std::pair<llvm::Value *, llvm::Value *>
4162  const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4163  llvm::Value *Addr;
4164  if (OASE) {
4165  const Expr *Base = OASE->getBase();
4166  Addr = CGF.EmitScalarExpr(Base);
4167  } else {
4168  Addr = CGF.EmitLValue(E).getPointer(CGF);
4169  }
4170  llvm::Value *SizeVal;
4171  QualType Ty = E->getType();
4172  if (OASE) {
4173  SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4174  for (const Expr *SE : OASE->getDimensions()) {
4175  llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4176  Sz = CGF.EmitScalarConversion(
4177  Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4178  SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4179  }
4180  } else if (const auto *ASE =
4181  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4182  LValue UpAddrLVal =
4183  CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4184  Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4185  llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4186  UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4187  llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4188  llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4189  SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4190  } else {
4191  SizeVal = CGF.getTypeSize(Ty);
4192  }
4193  return std::make_pair(Addr, SizeVal);
4194 }
4195 
4196 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4197 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4198  QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4199  if (KmpTaskAffinityInfoTy.isNull()) {
4200  RecordDecl *KmpAffinityInfoRD =
4201  C.buildImplicitRecord("kmp_task_affinity_info_t");
4202  KmpAffinityInfoRD->startDefinition();
4203  addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4204  addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4205  addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4206  KmpAffinityInfoRD->completeDefinition();
4207  KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4208  }
4209 }
4210 
4211 CGOpenMPRuntime::TaskResultTy
4212 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4213  const OMPExecutableDirective &D,
4214  llvm::Function *TaskFunction, QualType SharedsTy,
4215  Address Shareds, const OMPTaskDataTy &Data) {
4216  ASTContext &C = CGM.getContext();
4218  // Aggregate privates and sort them by the alignment.
4219  const auto *I = Data.PrivateCopies.begin();
4220  for (const Expr *E : Data.PrivateVars) {
4221  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4222  Privates.emplace_back(
4223  C.getDeclAlign(VD),
4224  PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4225  /*PrivateElemInit=*/nullptr));
4226  ++I;
4227  }
4228  I = Data.FirstprivateCopies.begin();
4229  const auto *IElemInitRef = Data.FirstprivateInits.begin();
4230  for (const Expr *E : Data.FirstprivateVars) {
4231  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4232  Privates.emplace_back(
4233  C.getDeclAlign(VD),
4234  PrivateHelpersTy(
4235  E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4236  cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4237  ++I;
4238  ++IElemInitRef;
4239  }
4240  I = Data.LastprivateCopies.begin();
4241  for (const Expr *E : Data.LastprivateVars) {
4242  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4243  Privates.emplace_back(
4244  C.getDeclAlign(VD),
4245  PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4246  /*PrivateElemInit=*/nullptr));
4247  ++I;
4248  }
4249  for (const VarDecl *VD : Data.PrivateLocals) {
4250  if (isAllocatableDecl(VD))
4251  Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4252  else
4253  Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4254  }
4255  llvm::stable_sort(Privates,
4256  [](const PrivateDataTy &L, const PrivateDataTy &R) {
4257  return L.first > R.first;
4258  });
4259  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4260  // Build type kmp_routine_entry_t (if not built yet).
4261  emitKmpRoutineEntryT(KmpInt32Ty);
4262  // Build type kmp_task_t (if not built yet).
4264  if (SavedKmpTaskloopTQTy.isNull()) {
4265  SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4266  CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4267  }
4268  KmpTaskTQTy = SavedKmpTaskloopTQTy;
4269  } else {
4270  assert((D.getDirectiveKind() == OMPD_task ||
4273  "Expected taskloop, task or target directive");
4274  if (SavedKmpTaskTQTy.isNull()) {
4275  SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4276  CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4277  }
4278  KmpTaskTQTy = SavedKmpTaskTQTy;
4279  }
4280  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4281  // Build particular struct kmp_task_t for the given task.
4282  const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4283  createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4284  QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4285  QualType KmpTaskTWithPrivatesPtrQTy =
4286  C.getPointerType(KmpTaskTWithPrivatesQTy);
4287  llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4288  llvm::Type *KmpTaskTWithPrivatesPtrTy =
4289  KmpTaskTWithPrivatesTy->getPointerTo();
4290  llvm::Value *KmpTaskTWithPrivatesTySize =
4291  CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4292  QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4293 
4294  // Emit initial values for private copies (if any).
4295  llvm::Value *TaskPrivatesMap = nullptr;
4296  llvm::Type *TaskPrivatesMapTy =
4297  std::next(TaskFunction->arg_begin(), 3)->getType();
4298  if (!Privates.empty()) {
4299  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4300  TaskPrivatesMap =
4301  emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4302  TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4303  TaskPrivatesMap, TaskPrivatesMapTy);
4304  } else {
4305  TaskPrivatesMap = llvm::ConstantPointerNull::get(
4306  cast<llvm::PointerType>(TaskPrivatesMapTy));
4307  }
4308  // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4309  // kmp_task_t *tt);
4310  llvm::Function *TaskEntry = emitProxyTaskFunction(
4311  CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4312  KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4313  TaskPrivatesMap);
4314 
4315  // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4316  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4317  // kmp_routine_entry_t *task_entry);
4318  // Task flags. Format is taken from
4319  // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4320  // description of kmp_tasking_flags struct.
4321  enum {
4322  TiedFlag = 0x1,
4323  FinalFlag = 0x2,
4324  DestructorsFlag = 0x8,
4325  PriorityFlag = 0x20,
4326  DetachableFlag = 0x40,
4327  };
4328  unsigned Flags = Data.Tied ? TiedFlag : 0;
4329  bool NeedsCleanup = false;
4330  if (!Privates.empty()) {
4331  NeedsCleanup =
4332  checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4333  if (NeedsCleanup)
4334  Flags = Flags | DestructorsFlag;
4335  }
4336  if (Data.Priority.getInt())
4337  Flags = Flags | PriorityFlag;
4339  Flags = Flags | DetachableFlag;
4340  llvm::Value *TaskFlags =
4341  Data.Final.getPointer()
4342  ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4343  CGF.Builder.getInt32(FinalFlag),
4344  CGF.Builder.getInt32(/*C=*/0))
4345  : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4346  TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4347  llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4348  SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4349  getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4351  TaskEntry, KmpRoutineEntryPtrTy)};
4352  llvm::Value *NewTask;
4353  if (D.hasClausesOfKind<OMPNowaitClause>()) {
4354  // Check if we have any device clause associated with the directive.
4355  const Expr *Device = nullptr;
4356  if (auto *C = D.getSingleClause<OMPDeviceClause>())
4357  Device = C->getDevice();
4358  // Emit device ID if any otherwise use default value.
4359  llvm::Value *DeviceID;
4360  if (Device)
4361  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4362  CGF.Int64Ty, /*isSigned=*/true);
4363  else
4364  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4365  AllocArgs.push_back(DeviceID);
4366  NewTask = CGF.EmitRuntimeCall(
4367  OMPBuilder.getOrCreateRuntimeFunction(
4368  CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4369  AllocArgs);
4370  } else {
4371  NewTask =
4372  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4373  CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4374  AllocArgs);
4375  }
4376  // Emit detach clause initialization.
4377  // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4378  // task_descriptor);
4379  if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4380  const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4381  LValue EvtLVal = CGF.EmitLValue(Evt);
4382 
4383  // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4384  // int gtid, kmp_task_t *task);
4385  llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4386  llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4387  Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4388  llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4389  OMPBuilder.getOrCreateRuntimeFunction(
4390  CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4391  {Loc, Tid, NewTask});
4392  EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4393  Evt->getExprLoc());
4394  CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4395  }
4396  // Process affinity clauses.
4398  // Process list of affinity data.
4399  ASTContext &C = CGM.getContext();
4400  Address AffinitiesArray = Address::invalid();
4401  // Calculate number of elements to form the array of affinity data.
4402  llvm::Value *NumOfElements = nullptr;
4403  unsigned NumAffinities = 0;
4404  for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4405  if (const Expr *Modifier = C->getModifier()) {
4406  const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4407  for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4408  llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4409  Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4410  NumOfElements =
4411  NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4412  }
4413  } else {
4414  NumAffinities += C->varlist_size();
4415  }
4416  }
4417  getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4418  // Fields ids in kmp_task_affinity_info record.
4419  enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4420 
4421  QualType KmpTaskAffinityInfoArrayTy;
4422  if (NumOfElements) {
4423  NumOfElements = CGF.Builder.CreateNUWAdd(
4424  llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4425  auto *OVE = new (C) OpaqueValueExpr(
4426  Loc,
4427  C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4428  VK_PRValue);
4429  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4430  RValue::get(NumOfElements));
4431  KmpTaskAffinityInfoArrayTy =
4432  C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4433  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4434  // Properly emit variable-sized array.
4435  auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4437  CGF.EmitVarDecl(*PD);
4438  AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4439  NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4440  /*isSigned=*/false);
4441  } else {
4442  KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4443  KmpTaskAffinityInfoTy,
4444  llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4445  ArrayType::Normal, /*IndexTypeQuals=*/0);
4446  AffinitiesArray =
4447  CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4448  AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4449  NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4450  /*isSigned=*/false);
4451  }
4452 
4453  const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4454  // Fill array by elements without iterators.
4455  unsigned Pos = 0;
4456  bool HasIterator = false;
4457  for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4458  if (C->getModifier()) {
4459  HasIterator = true;
4460  continue;
4461  }
4462  for (const Expr *E : C->varlists()) {
4463  llvm::Value *Addr;
4464  llvm::Value *Size;
4465  std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4466  LValue Base =
4467  CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4468  KmpTaskAffinityInfoTy);
4469  // affs[i].base_addr = &<Affinities[i].second>;
4470  LValue BaseAddrLVal = CGF.EmitLValueForField(
4471  Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4472  CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4473  BaseAddrLVal);
4474  // affs[i].len = sizeof(<Affinities[i].second>);
4475  LValue LenLVal = CGF.EmitLValueForField(
4476  Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4477  CGF.EmitStoreOfScalar(Size, LenLVal);
4478  ++Pos;
4479  }
4480  }
4481  LValue PosLVal;
4482  if (HasIterator) {
4483  PosLVal = CGF.MakeAddrLValue(
4484  CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4485  C.getSizeType());
4486  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4487  }
4488  // Process elements with iterators.
4489  for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4490  const Expr *Modifier = C->getModifier();
4491  if (!Modifier)
4492  continue;
4493  OMPIteratorGeneratorScope IteratorScope(
4494  CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4495  for (const Expr *E : C->varlists()) {
4496  llvm::Value *Addr;
4497  llvm::Value *Size;
4498  std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4499  llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4500  LValue Base = CGF.MakeAddrLValue(
4501  Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(),
4502  AffinitiesArray.getPointer(), Idx),
4503  AffinitiesArray.getAlignment()),
4504  KmpTaskAffinityInfoTy);
4505  // affs[i].base_addr = &<Affinities[i].second>;
4506  LValue BaseAddrLVal = CGF.EmitLValueForField(
4507  Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4508  CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4509  BaseAddrLVal);
4510  // affs[i].len = sizeof(<Affinities[i].second>);
4511  LValue LenLVal = CGF.EmitLValueForField(
4512  Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4513  CGF.EmitStoreOfScalar(Size, LenLVal);
4514  Idx = CGF.Builder.CreateNUWAdd(
4515  Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4516  CGF.EmitStoreOfScalar(Idx, PosLVal);
4517  }
4518  }
4519  // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4520  // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4521  // naffins, kmp_task_affinity_info_t *affin_list);
4522  llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4523  llvm::Value *GTid = getThreadID(CGF, Loc);
4525  AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4526  // FIXME: Emit the function and ignore its result for now unless the
4527  // runtime function is properly implemented.
4528  (void)CGF.EmitRuntimeCall(
4529  OMPBuilder.getOrCreateRuntimeFunction(
4530  CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4531  {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4532  }
4533  llvm::Value *NewTaskNewTaskTTy =
4535  NewTask, KmpTaskTWithPrivatesPtrTy);
4536  LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4537  KmpTaskTWithPrivatesQTy);
4538  LValue TDBase =
4539  CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4540  // Fill the data in the resulting kmp_task_t record.
4541  // Copy shareds if there are any.
4542  Address KmpTaskSharedsPtr = Address::invalid();
4543  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4544  KmpTaskSharedsPtr =
4546  CGF.EmitLValueForField(
4547  TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4548  KmpTaskTShareds)),
4549  Loc),
4550  CGM.getNaturalTypeAlignment(SharedsTy));
4551  LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4552  LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4553  CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4554  }
4555  // Emit initial values for private copies (if any).
4556  TaskResultTy Result;
4557  if (!Privates.empty()) {
4558  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4559  SharedsTy, SharedsPtrTy, Data, Privates,
4560  /*ForDup=*/false);
4562  (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4563  Result.TaskDupFn = emitTaskDupFunction(
4564  CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4565  KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4566  /*WithLastIter=*/!Data.LastprivateVars.empty());
4567  }
4568  }
4569  // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4570  enum { Priority = 0, Destructors = 1 };
4571  // Provide pointer to function with destructors for privates.
4572  auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4573  const RecordDecl *KmpCmplrdataUD =
4574  (*FI)->getType()->getAsUnionType()->getDecl();
4575  if (NeedsCleanup) {
4576  llvm::Value *DestructorFn = emitDestructorsFunction(
4577  CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4578  KmpTaskTWithPrivatesQTy);
4579  LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4580  LValue DestructorsLV = CGF.EmitLValueForField(
4581  Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4583  DestructorFn, KmpRoutineEntryPtrTy),
4584  DestructorsLV);
4585  }
4586  // Set priority.
4587  if (Data.Priority.getInt()) {
4588  LValue Data2LV = CGF.EmitLValueForField(
4589  TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4590  LValue PriorityLV = CGF.EmitLValueForField(
4591  Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4592  CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4593  }
4594  Result.NewTask = NewTask;
4595  Result.TaskEntry = TaskEntry;
4596  Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4597  Result.TDBase = TDBase;
4598  Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4599  return Result;
4600 }
4601 
4602 namespace {
4603 /// Dependence kind for RTL.
4604 enum RTLDependenceKindTy {
4605  DepIn = 0x01,
4606  DepInOut = 0x3,
4607  DepMutexInOutSet = 0x4
4608 };
4609 /// Fields ids in kmp_depend_info record.
4610 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4611 } // namespace
4612 
4613 /// Translates internal dependency kind into the runtime kind.
4614 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4615  RTLDependenceKindTy DepKind;
4616  switch (K) {
4617  case OMPC_DEPEND_in:
4618  DepKind = DepIn;
4619  break;
4620  // Out and InOut dependencies must use the same code.
4621  case OMPC_DEPEND_out:
4622  case OMPC_DEPEND_inout:
4623  DepKind = DepInOut;
4624  break;
4625  case OMPC_DEPEND_mutexinoutset:
4626  DepKind = DepMutexInOutSet;
4627  break;
4628  case OMPC_DEPEND_source:
4629  case OMPC_DEPEND_sink:
4630  case OMPC_DEPEND_depobj:
4631  case OMPC_DEPEND_unknown:
4632  llvm_unreachable("Unknown task dependence type");
4633  }
4634  return DepKind;
4635 }
4636 
4637 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4638 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4639  QualType &FlagsTy) {
4640  FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4641  if (KmpDependInfoTy.isNull()) {
4642  RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4643  KmpDependInfoRD->startDefinition();
4644  addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4645  addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4646  addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4647  KmpDependInfoRD->completeDefinition();
4648  KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4649  }
4650 }
4651 
4652 std::pair<llvm::Value *, LValue>
4653 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4654  SourceLocation Loc) {
4655  ASTContext &C = CGM.getContext();
4656  QualType FlagsTy;
4657  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4658  RecordDecl *KmpDependInfoRD =
4659  cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4661  DepobjLVal.getAddress(CGF),
4662  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4663  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4665  Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4666  Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4667  Base.getTBAAInfo());
4668  llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4669  Addr.getElementType(), Addr.getPointer(),
4670  llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4671  LValue NumDepsBase = CGF.MakeAddrLValue(
4672  Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4673  Base.getBaseInfo(), Base.getTBAAInfo());
4674  // NumDeps = deps[i].base_addr;
4675  LValue BaseAddrLVal = CGF.EmitLValueForField(
4676  NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4677  llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4678  return std::make_pair(NumDeps, Base);
4679 }
4680 
4681 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4682  llvm::PointerUnion<unsigned *, LValue *> Pos,
4683  const OMPTaskDataTy::DependData &Data,
4684  Address DependenciesArray) {
4685  CodeGenModule &CGM = CGF.CGM;
4686  ASTContext &C = CGM.getContext();
4687  QualType FlagsTy;
4688  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4689  RecordDecl *KmpDependInfoRD =
4690  cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4691  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4692 
4693  OMPIteratorGeneratorScope IteratorScope(
4694  CGF, cast_or_null<OMPIteratorExpr>(
4696  : nullptr));
4697  for (const Expr *E : Data.DepExprs) {
4698  llvm::Value *Addr;
4699  llvm::Value *Size;
4700  std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4701  LValue Base;
4702  if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4703  Base = CGF.MakeAddrLValue(
4704  CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4705  } else {
4706  LValue &PosLVal = *Pos.get<LValue *>();
4707  llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4708  Base = CGF.MakeAddrLValue(
4709  Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4710  DependenciesArray.getPointer(), Idx),
4711  DependenciesArray.getAlignment()),
4712  KmpDependInfoTy);
4713  }
4714  // deps[i].base_addr = &<Dependencies[i].second>;
4715  LValue BaseAddrLVal = CGF.EmitLValueForField(
4716  Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4717  CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4718  BaseAddrLVal);
4719  // deps[i].len = sizeof(<Dependencies[i].second>);
4720  LValue LenLVal = CGF.EmitLValueForField(
4721  Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4722  CGF.EmitStoreOfScalar(Size, LenLVal);
4723  // deps[i].flags = <Dependencies[i].first>;
4724  RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4725  LValue FlagsLVal = CGF.EmitLValueForField(
4726  Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4727  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4728  FlagsLVal);
4729  if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4730  ++(*P);
4731  } else {
4732  LValue &PosLVal = *Pos.get<LValue *>();
4733  llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4734  Idx = CGF.Builder.CreateNUWAdd(Idx,
4735  llvm::ConstantInt::get(Idx->getType(), 1));
4736  CGF.EmitStoreOfScalar(Idx, PosLVal);
4737  }
4738  }
4739 }
4740 
4743  const OMPTaskDataTy::DependData &Data) {
4744  assert(Data.DepKind == OMPC_DEPEND_depobj &&
4745  "Expected depobj dependecy kind.");
4747  SmallVector<LValue, 4> SizeLVals;
4748  ASTContext &C = CGF.getContext();
4749  QualType FlagsTy;
4750  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4751  RecordDecl *KmpDependInfoRD =
4752  cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4753  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4754  llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4755  {
4756  OMPIteratorGeneratorScope IteratorScope(
4757  CGF, cast_or_null<OMPIteratorExpr>(
4759  : nullptr));
4760  for (const Expr *E : Data.DepExprs) {
4761  LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4763  DepobjLVal.getAddress(CGF),
4764  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4766  Base.getAddress(CGF), KmpDependInfoPtrT);
4767  Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4768  Base.getTBAAInfo());
4769  llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4770  Addr.getElementType(), Addr.getPointer(),
4771  llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4772  LValue NumDepsBase = CGF.MakeAddrLValue(
4773  Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4774  Base.getBaseInfo(), Base.getTBAAInfo());
4775  // NumDeps = deps[i].base_addr;
4776  LValue BaseAddrLVal = CGF.EmitLValueForField(
4777  NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4778  llvm::Value *NumDeps =
4779  CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4780  LValue NumLVal = CGF.MakeAddrLValue(
4781  CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4782  C.getUIntPtrType());
4783  CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4784  NumLVal.getAddress(CGF));
4785  llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4786  llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4787  CGF.EmitStoreOfScalar(Add, NumLVal);
4788  SizeLVals.push_back(NumLVal);
4789  }
4790  }
4791  for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4792  llvm::Value *Size =
4793  CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4794  Sizes.push_back(Size);
4795  }
4796  return Sizes;
4797 }
4798 
4799 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4800  LValue PosLVal,
4801  const OMPTaskDataTy::DependData &Data,
4802  Address DependenciesArray) {
4803  assert(Data.DepKind == OMPC_DEPEND_depobj &&
4804  "Expected depobj dependecy kind.");
4805  ASTContext &C = CGF.getContext();
4806  QualType FlagsTy;
4807  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4808  RecordDecl *KmpDependInfoRD =
4809  cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4810  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4811  llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4812  llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4813  {
4814  OMPIteratorGeneratorScope IteratorScope(
4815  CGF, cast_or_null<OMPIteratorExpr>(
4817  : nullptr));
4818  for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4819  const Expr *E = Data.DepExprs[I];
4820  LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4822  DepobjLVal.getAddress(CGF),
4823  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4825  Base.getAddress(CGF), KmpDependInfoPtrT);
4826  Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4827  Base.getTBAAInfo());
4828 
4829  // Get number of elements in a single depobj.
4830  llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4831  Addr.getElementType(), Addr.getPointer(),
4832  llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4833  LValue NumDepsBase = CGF.MakeAddrLValue(
4834  Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4835  Base.getBaseInfo(), Base.getTBAAInfo());
4836  // NumDeps = deps[i].base_addr;
4837  LValue BaseAddrLVal = CGF.EmitLValueForField(
4838  NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4839  llvm::Value *NumDeps =
4840  CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4841 
4842  // memcopy dependency data.
4843  llvm::Value *Size = CGF.Builder.CreateNUWMul(
4844  ElSize,
4845  CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4846  llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4847  Address DepAddr =
4848  Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4849  DependenciesArray.getPointer(), Pos),
4850  DependenciesArray.getAlignment());
4851  CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4852 
4853  // Increase pos.
4854  // pos += size;
4855  llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4856  CGF.EmitStoreOfScalar(Add, PosLVal);
4857  }
4858  }
4859 }
4860 
4861 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4863  SourceLocation Loc) {
4864  if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4865  return D.DepExprs.empty();
4866  }))
4867  return std::make_pair(nullptr, Address::invalid());
4868  // Process list of dependencies.
4869  ASTContext &C = CGM.getContext();
4870  Address DependenciesArray = Address::invalid();
4871  llvm::Value *NumOfElements = nullptr;
4872  unsigned NumDependencies = std::accumulate(
4873  Dependencies.begin(), Dependencies.end(), 0,
4874  [](unsigned V, const OMPTaskDataTy::DependData &D) {
4875  return D.DepKind == OMPC_DEPEND_depobj
4876  ? V
4877  : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4878  });
4879  QualType FlagsTy;
4880  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4881  bool HasDepobjDeps = false;
4882  bool HasRegularWithIterators = false;
4883  llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4884  llvm::Value *NumOfRegularWithIterators =
4885  llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4886  // Calculate number of depobj dependecies and regular deps with the iterators.
4887  for (const OMPTaskDataTy::DependData &D : Dependencies) {
4888  if (D.DepKind == OMPC_DEPEND_depobj) {
4890  emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4891  for (llvm::Value *Size : Sizes) {
4892  NumOfDepobjElements =
4893  CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4894  }
4895  HasDepobjDeps = true;
4896  continue;
4897  }
4898  // Include number of iterations, if any.
4899 
4900  if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4901  for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4902  llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4903  Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4904  llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4905  Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4906  NumOfRegularWithIterators =
4907  CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4908  }
4909  HasRegularWithIterators = true;
4910  continue;
4911  }
4912  }
4913 
4914  QualType KmpDependInfoArrayTy;
4915  if (HasDepobjDeps || HasRegularWithIterators) {
4916  NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4917  /*isSigned=*/false);
4918  if (HasDepobjDeps) {