clang  14.0.0git
CGStmtOpenMP.cpp
Go to the documentation of this file.
1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit OpenMP nodes as LLVM code.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCleanup.h"
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/DeclOpenMP.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
27 #include "llvm/Frontend/OpenMP/OMPConstants.h"
28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/Support/AtomicOrdering.h"
32 using namespace clang;
33 using namespace CodeGen;
34 using namespace llvm::omp;
35 
36 static const VarDecl *getBaseDecl(const Expr *Ref);
37 
38 namespace {
39 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
40 /// for captured expressions.
41 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
42  void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
43  for (const auto *C : S.clauses()) {
44  if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
45  if (const auto *PreInit =
46  cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
47  for (const auto *I : PreInit->decls()) {
48  if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
49  CGF.EmitVarDecl(cast<VarDecl>(*I));
50  } else {
52  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
53  CGF.EmitAutoVarCleanups(Emission);
54  }
55  }
56  }
57  }
58  }
59  }
60  CodeGenFunction::OMPPrivateScope InlinedShareds;
61 
62  static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
63  return CGF.LambdaCaptureFields.lookup(VD) ||
64  (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
65  (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
66  cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
67  }
68 
69 public:
70  OMPLexicalScope(
72  const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
73  const bool EmitPreInitStmt = true)
74  : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
75  InlinedShareds(CGF) {
76  if (EmitPreInitStmt)
77  emitPreInitStmt(CGF, S);
78  if (!CapturedRegion.hasValue())
79  return;
80  assert(S.hasAssociatedStmt() &&
81  "Expected associated statement for inlined directive.");
82  const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
83  for (const auto &C : CS->captures()) {
84  if (C.capturesVariable() || C.capturesVariableByCopy()) {
85  auto *VD = C.getCapturedVar();
86  assert(VD == VD->getCanonicalDecl() &&
87  "Canonical decl must be captured.");
88  DeclRefExpr DRE(
89  CGF.getContext(), const_cast<VarDecl *>(VD),
90  isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
91  InlinedShareds.isGlobalVarCaptured(VD)),
92  VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
93  InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
94  return CGF.EmitLValue(&DRE).getAddress(CGF);
95  });
96  }
97  }
98  (void)InlinedShareds.Privatize();
99  }
100 };
101 
102 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
103 /// for captured expressions.
104 class OMPParallelScope final : public OMPLexicalScope {
105  bool EmitPreInitStmt(const OMPExecutableDirective &S) {
106  OpenMPDirectiveKind Kind = S.getDirectiveKind();
110  }
111 
112 public:
113  OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
114  : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
115  EmitPreInitStmt(S)) {}
116 };
117 
118 /// Lexical scope for OpenMP teams construct, that handles correct codegen
119 /// for captured expressions.
120 class OMPTeamsScope final : public OMPLexicalScope {
121  bool EmitPreInitStmt(const OMPExecutableDirective &S) {
122  OpenMPDirectiveKind Kind = S.getDirectiveKind();
125  }
126 
127 public:
128  OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
129  : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
130  EmitPreInitStmt(S)) {}
131 };
132 
133 /// Private scope for OpenMP loop-based directives, that supports capturing
134 /// of used expression from loop statement.
135 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
136  void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
137  const DeclStmt *PreInits;
138  CodeGenFunction::OMPMapVars PreCondVars;
139  if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
140  llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
141  for (const auto *E : LD->counters()) {
142  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
143  EmittedAsPrivate.insert(VD->getCanonicalDecl());
144  (void)PreCondVars.setVarAddr(
145  CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
146  }
147  // Mark private vars as undefs.
148  for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
149  for (const Expr *IRef : C->varlists()) {
150  const auto *OrigVD =
151  cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
152  if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
153  (void)PreCondVars.setVarAddr(
154  CGF, OrigVD,
155  Address(llvm::UndefValue::get(CGF.ConvertTypeForMem(
157  OrigVD->getType().getNonReferenceType()))),
158  CGF.getContext().getDeclAlign(OrigVD)));
159  }
160  }
161  }
162  (void)PreCondVars.apply(CGF);
163  // Emit init, __range and __end variables for C++ range loops.
165  LD->getInnermostCapturedStmt()->getCapturedStmt(),
166  /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
167  [&CGF](unsigned Cnt, const Stmt *CurStmt) {
168  if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) {
169  if (const Stmt *Init = CXXFor->getInit())
170  CGF.EmitStmt(Init);
171  CGF.EmitStmt(CXXFor->getRangeStmt());
172  CGF.EmitStmt(CXXFor->getEndStmt());
173  }
174  return false;
175  });
176  PreInits = cast_or_null<DeclStmt>(LD->getPreInits());
177  } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) {
178  PreInits = cast_or_null<DeclStmt>(Tile->getPreInits());
179  } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) {
180  PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits());
181  } else {
182  llvm_unreachable("Unknown loop-based directive kind.");
183  }
184  if (PreInits) {
185  for (const auto *I : PreInits->decls())
186  CGF.EmitVarDecl(cast<VarDecl>(*I));
187  }
188  PreCondVars.restore(CGF);
189  }
190 
191 public:
192  OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
193  : CodeGenFunction::RunCleanupsScope(CGF) {
194  emitPreInitStmt(CGF, S);
195  }
196 };
197 
198 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
199  CodeGenFunction::OMPPrivateScope InlinedShareds;
200 
201  static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
202  return CGF.LambdaCaptureFields.lookup(VD) ||
203  (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
204  (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
205  cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
206  }
207 
208 public:
209  OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
210  : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
211  InlinedShareds(CGF) {
212  for (const auto *C : S.clauses()) {
213  if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
214  if (const auto *PreInit =
215  cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
216  for (const auto *I : PreInit->decls()) {
217  if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
218  CGF.EmitVarDecl(cast<VarDecl>(*I));
219  } else {
221  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
222  CGF.EmitAutoVarCleanups(Emission);
223  }
224  }
225  }
226  } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
227  for (const Expr *E : UDP->varlists()) {
228  const Decl *D = cast<DeclRefExpr>(E)->getDecl();
229  if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
230  CGF.EmitVarDecl(*OED);
231  }
232  } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
233  for (const Expr *E : UDP->varlists()) {
234  const Decl *D = getBaseDecl(E);
235  if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
236  CGF.EmitVarDecl(*OED);
237  }
238  }
239  }
240  if (!isOpenMPSimdDirective(S.getDirectiveKind()))
241  CGF.EmitOMPPrivateClause(S, InlinedShareds);
242  if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
243  if (const Expr *E = TG->getReductionRef())
244  CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
245  }
246  // Temp copy arrays for inscan reductions should not be emitted as they are
247  // not used in simd only mode.
249  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
250  if (C->getModifier() != OMPC_REDUCTION_inscan)
251  continue;
252  for (const Expr *E : C->copy_array_temps())
253  CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl());
254  }
255  const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
256  while (CS) {
257  for (auto &C : CS->captures()) {
258  if (C.capturesVariable() || C.capturesVariableByCopy()) {
259  auto *VD = C.getCapturedVar();
260  if (CopyArrayTemps.contains(VD))
261  continue;
262  assert(VD == VD->getCanonicalDecl() &&
263  "Canonical decl must be captured.");
264  DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
265  isCapturedVar(CGF, VD) ||
266  (CGF.CapturedStmtInfo &&
267  InlinedShareds.isGlobalVarCaptured(VD)),
269  C.getLocation());
270  InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
271  return CGF.EmitLValue(&DRE).getAddress(CGF);
272  });
273  }
274  }
275  CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
276  }
277  (void)InlinedShareds.Privatize();
278  }
279 };
280 
281 } // namespace
282 
284  const OMPExecutableDirective &S,
285  const RegionCodeGenTy &CodeGen);
286 
288  if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
289  if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
290  OrigVD = OrigVD->getCanonicalDecl();
291  bool IsCaptured =
292  LambdaCaptureFields.lookup(OrigVD) ||
293  (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
294  (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
295  DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
296  OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
297  return EmitLValue(&DRE);
298  }
299  }
300  return EmitLValue(E);
301 }
302 
304  ASTContext &C = getContext();
305  llvm::Value *Size = nullptr;
306  auto SizeInChars = C.getTypeSizeInChars(Ty);
307  if (SizeInChars.isZero()) {
308  // getTypeSizeInChars() returns 0 for a VLA.
309  while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
310  VlaSizePair VlaSize = getVLASize(VAT);
311  Ty = VlaSize.Type;
312  Size =
313  Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts;
314  }
315  SizeInChars = C.getTypeSizeInChars(Ty);
316  if (SizeInChars.isZero())
317  return llvm::ConstantInt::get(SizeTy, /*V=*/0);
318  return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
319  }
320  return CGM.getSize(SizeInChars);
321 }
322 
324  const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
325  const RecordDecl *RD = S.getCapturedRecordDecl();
326  auto CurField = RD->field_begin();
327  auto CurCap = S.captures().begin();
328  for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
329  E = S.capture_init_end();
330  I != E; ++I, ++CurField, ++CurCap) {
331  if (CurField->hasCapturedVLAType()) {
332  const VariableArrayType *VAT = CurField->getCapturedVLAType();
333  llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
334  CapturedVars.push_back(Val);
335  } else if (CurCap->capturesThis()) {
336  CapturedVars.push_back(CXXThisValue);
337  } else if (CurCap->capturesVariableByCopy()) {
338  llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
339 
340  // If the field is not a pointer, we need to save the actual value
341  // and load it as a void pointer.
342  if (!CurField->getType()->isAnyPointerType()) {
343  ASTContext &Ctx = getContext();
344  Address DstAddr = CreateMemTemp(
345  Ctx.getUIntPtrType(),
346  Twine(CurCap->getCapturedVar()->getName(), ".casted"));
347  LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
348 
349  llvm::Value *SrcAddrVal = EmitScalarConversion(
350  DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
351  Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
352  LValue SrcLV =
353  MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
354 
355  // Store the value using the source type pointer.
356  EmitStoreThroughLValue(RValue::get(CV), SrcLV);
357 
358  // Load the value using the destination type pointer.
359  CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
360  }
361  CapturedVars.push_back(CV);
362  } else {
363  assert(CurCap->capturesVariable() && "Expected capture by reference.");
364  CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer());
365  }
366  }
367 }
368 
370  QualType DstType, StringRef Name,
371  LValue AddrLV) {
372  ASTContext &Ctx = CGF.getContext();
373 
374  llvm::Value *CastedPtr = CGF.EmitScalarConversion(
375  AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(),
376  Ctx.getPointerType(DstType), Loc);
377  Address TmpAddr =
378  CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
379  .getAddress(CGF);
380  return TmpAddr;
381 }
382 
384  if (T->isLValueReferenceType())
385  return C.getLValueReferenceType(
387  /*SpelledAsLValue=*/false);
388  if (T->isPointerType())
389  return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
390  if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
391  if (const auto *VLA = dyn_cast<VariableArrayType>(A))
392  return getCanonicalParamType(C, VLA->getElementType());
393  if (!A->isVariablyModifiedType())
394  return C.getCanonicalType(T);
395  }
396  return C.getCanonicalParamType(T);
397 }
398 
399 namespace {
400 /// Contains required data for proper outlined function codegen.
401 struct FunctionOptions {
402  /// Captured statement for which the function is generated.
403  const CapturedStmt *S = nullptr;
404  /// true if cast to/from UIntPtr is required for variables captured by
405  /// value.
406  const bool UIntPtrCastRequired = true;
407  /// true if only casted arguments must be registered as local args or VLA
408  /// sizes.
409  const bool RegisterCastedArgsOnly = false;
410  /// Name of the generated function.
411  const StringRef FunctionName;
412  /// Location of the non-debug version of the outlined function.
413  SourceLocation Loc;
414  explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
415  bool RegisterCastedArgsOnly, StringRef FunctionName,
416  SourceLocation Loc)
417  : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
418  RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
419  FunctionName(FunctionName), Loc(Loc) {}
420 };
421 } // namespace
422 
423 static llvm::Function *emitOutlinedFunctionPrologue(
424  CodeGenFunction &CGF, FunctionArgList &Args,
425  llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
426  &LocalAddrs,
427  llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
428  &VLASizes,
429  llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
430  const CapturedDecl *CD = FO.S->getCapturedDecl();
431  const RecordDecl *RD = FO.S->getCapturedRecordDecl();
432  assert(CD->hasBody() && "missing CapturedDecl body");
433 
434  CXXThisValue = nullptr;
435  // Build the argument list.
436  CodeGenModule &CGM = CGF.CGM;
437  ASTContext &Ctx = CGM.getContext();
438  FunctionArgList TargetArgs;
439  Args.append(CD->param_begin(),
440  std::next(CD->param_begin(), CD->getContextParamPosition()));
441  TargetArgs.append(
442  CD->param_begin(),
443  std::next(CD->param_begin(), CD->getContextParamPosition()));
444  auto I = FO.S->captures().begin();
445  FunctionDecl *DebugFunctionDecl = nullptr;
446  if (!FO.UIntPtrCastRequired) {
448  QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
449  DebugFunctionDecl = FunctionDecl::Create(
450  Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
451  SourceLocation(), DeclarationName(), FunctionTy,
452  Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
453  /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
454  /*hasWrittenPrototype=*/false);
455  }
456  for (const FieldDecl *FD : RD->fields()) {
457  QualType ArgType = FD->getType();
458  IdentifierInfo *II = nullptr;
459  VarDecl *CapVar = nullptr;
460 
461  // If this is a capture by copy and the type is not a pointer, the outlined
462  // function argument type should be uintptr and the value properly casted to
463  // uintptr. This is necessary given that the runtime library is only able to
464  // deal with pointers. We can pass in the same way the VLA type sizes to the
465  // outlined function.
466  if (FO.UIntPtrCastRequired &&
467  ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
468  I->capturesVariableArrayType()))
469  ArgType = Ctx.getUIntPtrType();
470 
471  if (I->capturesVariable() || I->capturesVariableByCopy()) {
472  CapVar = I->getCapturedVar();
473  II = CapVar->getIdentifier();
474  } else if (I->capturesThis()) {
475  II = &Ctx.Idents.get("this");
476  } else {
477  assert(I->capturesVariableArrayType());
478  II = &Ctx.Idents.get("vla");
479  }
480  if (ArgType->isVariablyModifiedType())
481  ArgType = getCanonicalParamType(Ctx, ArgType);
482  VarDecl *Arg;
483  if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
484  Arg = ParmVarDecl::Create(
485  Ctx, DebugFunctionDecl,
486  CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
487  CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
488  /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
489  } else {
490  Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
491  II, ArgType, ImplicitParamDecl::Other);
492  }
493  Args.emplace_back(Arg);
494  // Do not cast arguments if we emit function with non-original types.
495  TargetArgs.emplace_back(
496  FO.UIntPtrCastRequired
497  ? Arg
498  : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
499  ++I;
500  }
501  Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
502  CD->param_end());
503  TargetArgs.append(
504  std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
505  CD->param_end());
506 
507  // Create the function declaration.
508  const CGFunctionInfo &FuncInfo =
509  CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
510  llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
511 
512  auto *F =
513  llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
514  FO.FunctionName, &CGM.getModule());
515  CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
516  if (CD->isNothrow())
517  F->setDoesNotThrow();
518  F->setDoesNotRecurse();
519 
520  // Always inline the outlined function if optimizations are enabled.
521  if (CGM.getCodeGenOpts().OptimizationLevel != 0)
522  F->addFnAttr(llvm::Attribute::AlwaysInline);
523 
524  // Generate the function.
525  CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
526  FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
527  FO.UIntPtrCastRequired ? FO.Loc
528  : CD->getBody()->getBeginLoc());
529  unsigned Cnt = CD->getContextParamPosition();
530  I = FO.S->captures().begin();
531  for (const FieldDecl *FD : RD->fields()) {
532  // Do not map arguments if we emit function with non-original types.
533  Address LocalAddr(Address::invalid());
534  if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
535  LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
536  TargetArgs[Cnt]);
537  } else {
538  LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
539  }
540  // If we are capturing a pointer by copy we don't need to do anything, just
541  // use the value that we get from the arguments.
542  if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
543  const VarDecl *CurVD = I->getCapturedVar();
544  if (!FO.RegisterCastedArgsOnly)
545  LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
546  ++Cnt;
547  ++I;
548  continue;
549  }
550 
551  LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
553  if (FD->hasCapturedVLAType()) {
554  if (FO.UIntPtrCastRequired) {
555  ArgLVal = CGF.MakeAddrLValue(
556  castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
557  Args[Cnt]->getName(), ArgLVal),
559  }
560  llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
561  const VariableArrayType *VAT = FD->getCapturedVLAType();
562  VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
563  } else if (I->capturesVariable()) {
564  const VarDecl *Var = I->getCapturedVar();
565  QualType VarTy = Var->getType();
566  Address ArgAddr = ArgLVal.getAddress(CGF);
567  if (ArgLVal.getType()->isLValueReferenceType()) {
568  ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
569  } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
570  assert(ArgLVal.getType()->isPointerType());
571  ArgAddr = CGF.EmitLoadOfPointer(
572  ArgAddr, ArgLVal.getType()->castAs<PointerType>());
573  }
574  if (!FO.RegisterCastedArgsOnly) {
575  LocalAddrs.insert(
576  {Args[Cnt],
577  {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
578  }
579  } else if (I->capturesVariableByCopy()) {
580  assert(!FD->getType()->isAnyPointerType() &&
581  "Not expecting a captured pointer.");
582  const VarDecl *Var = I->getCapturedVar();
583  LocalAddrs.insert({Args[Cnt],
584  {Var, FO.UIntPtrCastRequired
586  CGF, I->getLocation(), FD->getType(),
587  Args[Cnt]->getName(), ArgLVal)
588  : ArgLVal.getAddress(CGF)}});
589  } else {
590  // If 'this' is captured, load it into CXXThisValue.
591  assert(I->capturesThis());
592  CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
593  LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
594  }
595  ++Cnt;
596  ++I;
597  }
598 
599  return F;
600 }
601 
602 llvm::Function *
604  SourceLocation Loc) {
605  assert(
606  CapturedStmtInfo &&
607  "CapturedStmtInfo should be set when generating the captured function");
608  const CapturedDecl *CD = S.getCapturedDecl();
609  // Build the argument list.
610  bool NeedWrapperFunction =
611  getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
612  FunctionArgList Args;
613  llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
614  llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
615  SmallString<256> Buffer;
616  llvm::raw_svector_ostream Out(Buffer);
617  Out << CapturedStmtInfo->getHelperName();
618  if (NeedWrapperFunction)
619  Out << "_debug__";
620  FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
621  Out.str(), Loc);
622  llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
623  VLASizes, CXXThisValue, FO);
624  CodeGenFunction::OMPPrivateScope LocalScope(*this);
625  for (const auto &LocalAddrPair : LocalAddrs) {
626  if (LocalAddrPair.second.first) {
627  LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
628  return LocalAddrPair.second.second;
629  });
630  }
631  }
632  (void)LocalScope.Privatize();
633  for (const auto &VLASizePair : VLASizes)
634  VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
635  PGO.assignRegionCounters(GlobalDecl(CD), F);
636  CapturedStmtInfo->EmitBody(*this, CD->getBody());
637  (void)LocalScope.ForceCleanup();
638  FinishFunction(CD->getBodyRBrace());
639  if (!NeedWrapperFunction)
640  return F;
641 
642  FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
643  /*RegisterCastedArgsOnly=*/true,
644  CapturedStmtInfo->getHelperName(), Loc);
645  CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
646  WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
647  Args.clear();
648  LocalAddrs.clear();
649  VLASizes.clear();
650  llvm::Function *WrapperF =
651  emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
652  WrapperCGF.CXXThisValue, WrapperFO);
654  auto *PI = F->arg_begin();
655  for (const auto *Arg : Args) {
657  auto I = LocalAddrs.find(Arg);
658  if (I != LocalAddrs.end()) {
659  LValue LV = WrapperCGF.MakeAddrLValue(
660  I->second.second,
661  I->second.first ? I->second.first->getType() : Arg->getType(),
663  if (LV.getType()->isAnyComplexType())
665  LV.getAddress(WrapperCGF),
666  PI->getType()->getPointerTo(
667  LV.getAddress(WrapperCGF).getAddressSpace())));
668  CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
669  } else {
670  auto EI = VLASizes.find(Arg);
671  if (EI != VLASizes.end()) {
672  CallArg = EI->second.second;
673  } else {
674  LValue LV =
675  WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
677  CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
678  }
679  }
680  CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
681  ++PI;
682  }
683  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
684  WrapperCGF.FinishFunction();
685  return WrapperF;
686 }
687 
688 //===----------------------------------------------------------------------===//
689 // OpenMP Directive Emission
690 //===----------------------------------------------------------------------===//
692  Address DestAddr, Address SrcAddr, QualType OriginalType,
693  const llvm::function_ref<void(Address, Address)> CopyGen) {
694  // Perform element-by-element initialization.
695  QualType ElementTy;
696 
697  // Drill down to the base element type on both arrays.
698  const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
699  llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
700  SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
701 
702  llvm::Value *SrcBegin = SrcAddr.getPointer();
703  llvm::Value *DestBegin = DestAddr.getPointer();
704  // Cast from pointer to array type to pointer to single element.
705  llvm::Value *DestEnd =
706  Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
707  // The basic structure here is a while-do loop.
708  llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
709  llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
710  llvm::Value *IsEmpty =
711  Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
712  Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
713 
714  // Enter the loop body, making that address the current address.
715  llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
716  EmitBlock(BodyBB);
717 
718  CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
719 
720  llvm::PHINode *SrcElementPHI =
721  Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
722  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
723  Address SrcElementCurrent =
724  Address(SrcElementPHI,
725  SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
726 
727  llvm::PHINode *DestElementPHI = Builder.CreatePHI(
728  DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
729  DestElementPHI->addIncoming(DestBegin, EntryBB);
730  Address DestElementCurrent =
731  Address(DestElementPHI,
732  DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
733 
734  // Emit copy.
735  CopyGen(DestElementCurrent, SrcElementCurrent);
736 
737  // Shift the address forward by one element.
738  llvm::Value *DestElementNext =
739  Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI,
740  /*Idx0=*/1, "omp.arraycpy.dest.element");
741  llvm::Value *SrcElementNext =
742  Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI,
743  /*Idx0=*/1, "omp.arraycpy.src.element");
744  // Check whether we've reached the end.
745  llvm::Value *Done =
746  Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
747  Builder.CreateCondBr(Done, DoneBB, BodyBB);
748  DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
749  SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
750 
751  // Done.
752  EmitBlock(DoneBB, /*IsFinished=*/true);
753 }
754 
755 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
756  Address SrcAddr, const VarDecl *DestVD,
757  const VarDecl *SrcVD, const Expr *Copy) {
758  if (OriginalType->isArrayType()) {
759  const auto *BO = dyn_cast<BinaryOperator>(Copy);
760  if (BO && BO->getOpcode() == BO_Assign) {
761  // Perform simple memcpy for simple copying.
762  LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
763  LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
764  EmitAggregateAssign(Dest, Src, OriginalType);
765  } else {
766  // For arrays with complex element types perform element by element
767  // copying.
768  EmitOMPAggregateAssign(
769  DestAddr, SrcAddr, OriginalType,
770  [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
771  // Working with the single array element, so have to remap
772  // destination and source variables to corresponding array
773  // elements.
775  Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
776  Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
777  (void)Remap.Privatize();
778  EmitIgnoredExpr(Copy);
779  });
780  }
781  } else {
782  // Remap pseudo source variable to private copy.
784  Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
785  Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
786  (void)Remap.Privatize();
787  // Emit copying of the whole variable.
788  EmitIgnoredExpr(Copy);
789  }
790 }
791 
793  OMPPrivateScope &PrivateScope) {
794  if (!HaveInsertPoint())
795  return false;
796  bool DeviceConstTarget =
797  getLangOpts().OpenMPIsDevice &&
799  bool FirstprivateIsLastprivate = false;
800  llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
801  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
802  for (const auto *D : C->varlists())
803  Lastprivates.try_emplace(
804  cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
805  C->getKind());
806  }
807  llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
809  getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
810  // Force emission of the firstprivate copy if the directive does not emit
811  // outlined function, like omp for, omp simd, omp distribute etc.
812  bool MustEmitFirstprivateCopy =
813  CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
814  for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
815  const auto *IRef = C->varlist_begin();
816  const auto *InitsRef = C->inits().begin();
817  for (const Expr *IInit : C->private_copies()) {
818  const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
819  bool ThisFirstprivateIsLastprivate =
820  Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
821  const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
822  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
823  if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
824  !FD->getType()->isReferenceType() &&
825  (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
826  EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
827  ++IRef;
828  ++InitsRef;
829  continue;
830  }
831  // Do not emit copy for firstprivate constant variables in target regions,
832  // captured by reference.
833  if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
834  FD && FD->getType()->isReferenceType() &&
835  (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
836  EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
837  ++IRef;
838  ++InitsRef;
839  continue;
840  }
841  FirstprivateIsLastprivate =
842  FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
843  if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
844  const auto *VDInit =
845  cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
846  bool IsRegistered;
847  DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
848  /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
849  (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
850  LValue OriginalLVal;
851  if (!FD) {
852  // Check if the firstprivate variable is just a constant value.
853  ConstantEmission CE = tryEmitAsConstant(&DRE);
854  if (CE && !CE.isReference()) {
855  // Constant value, no need to create a copy.
856  ++IRef;
857  ++InitsRef;
858  continue;
859  }
860  if (CE && CE.isReference()) {
861  OriginalLVal = CE.getReferenceLValue(*this, &DRE);
862  } else {
863  assert(!CE && "Expected non-constant firstprivate.");
864  OriginalLVal = EmitLValue(&DRE);
865  }
866  } else {
867  OriginalLVal = EmitLValue(&DRE);
868  }
869  QualType Type = VD->getType();
870  if (Type->isArrayType()) {
871  // Emit VarDecl with copy init for arrays.
872  // Get the address of the original variable captured in current
873  // captured region.
874  IsRegistered = PrivateScope.addPrivate(
875  OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
876  AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
877  const Expr *Init = VD->getInit();
878  if (!isa<CXXConstructExpr>(Init) ||
879  isTrivialInitializer(Init)) {
880  // Perform simple memcpy.
881  LValue Dest =
882  MakeAddrLValue(Emission.getAllocatedAddress(), Type);
883  EmitAggregateAssign(Dest, OriginalLVal, Type);
884  } else {
885  EmitOMPAggregateAssign(
886  Emission.getAllocatedAddress(),
887  OriginalLVal.getAddress(*this), Type,
888  [this, VDInit, Init](Address DestElement,
889  Address SrcElement) {
890  // Clean up any temporaries needed by the
891  // initialization.
892  RunCleanupsScope InitScope(*this);
893  // Emit initialization for single element.
894  setAddrOfLocalVar(VDInit, SrcElement);
895  EmitAnyExprToMem(Init, DestElement,
896  Init->getType().getQualifiers(),
897  /*IsInitializer*/ false);
898  LocalDeclMap.erase(VDInit);
899  });
900  }
901  EmitAutoVarCleanups(Emission);
902  return Emission.getAllocatedAddress();
903  });
904  } else {
905  Address OriginalAddr = OriginalLVal.getAddress(*this);
906  IsRegistered =
907  PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD,
908  ThisFirstprivateIsLastprivate,
909  OrigVD, &Lastprivates, IRef]() {
910  // Emit private VarDecl with copy init.
911  // Remap temp VDInit variable to the address of the original
912  // variable (for proper handling of captured global variables).
913  setAddrOfLocalVar(VDInit, OriginalAddr);
914  EmitDecl(*VD);
915  LocalDeclMap.erase(VDInit);
916  if (ThisFirstprivateIsLastprivate &&
917  Lastprivates[OrigVD->getCanonicalDecl()] ==
918  OMPC_LASTPRIVATE_conditional) {
919  // Create/init special variable for lastprivate conditionals.
920  Address VDAddr =
921  CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
922  *this, OrigVD);
923  llvm::Value *V = EmitLoadOfScalar(
924  MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(),
926  (*IRef)->getExprLoc());
927  EmitStoreOfScalar(V,
928  MakeAddrLValue(VDAddr, (*IRef)->getType(),
930  LocalDeclMap.erase(VD);
931  setAddrOfLocalVar(VD, VDAddr);
932  return VDAddr;
933  }
934  return GetAddrOfLocalVar(VD);
935  });
936  }
937  assert(IsRegistered &&
938  "firstprivate var already registered as private");
939  // Silence the warning about unused variable.
940  (void)IsRegistered;
941  }
942  ++IRef;
943  ++InitsRef;
944  }
945  }
946  return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
947 }
948 
950  const OMPExecutableDirective &D,
951  CodeGenFunction::OMPPrivateScope &PrivateScope) {
952  if (!HaveInsertPoint())
953  return;
954  llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
955  for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
956  auto IRef = C->varlist_begin();
957  for (const Expr *IInit : C->private_copies()) {
958  const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
959  if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
960  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
961  bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
962  // Emit private VarDecl with copy init.
963  EmitDecl(*VD);
964  return GetAddrOfLocalVar(VD);
965  });
966  assert(IsRegistered && "private var already registered as private");
967  // Silence the warning about unused variable.
968  (void)IsRegistered;
969  }
970  ++IRef;
971  }
972  }
973 }
974 
976  if (!HaveInsertPoint())
977  return false;
978  // threadprivate_var1 = master_threadprivate_var1;
979  // operator=(threadprivate_var2, master_threadprivate_var2);
980  // ...
981  // __kmpc_barrier(&loc, global_tid);
983  llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
984  for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
985  auto IRef = C->varlist_begin();
986  auto ISrcRef = C->source_exprs().begin();
987  auto IDestRef = C->destination_exprs().begin();
988  for (const Expr *AssignOp : C->assignment_ops()) {
989  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
990  QualType Type = VD->getType();
991  if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
992  // Get the address of the master variable. If we are emitting code with
993  // TLS support, the address is passed from the master as field in the
994  // captured declaration.
995  Address MasterAddr = Address::invalid();
996  if (getLangOpts().OpenMPUseTLS &&
997  getContext().getTargetInfo().isTLSSupported()) {
998  assert(CapturedStmtInfo->lookup(VD) &&
999  "Copyin threadprivates should have been captured!");
1000  DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
1001  (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1002  MasterAddr = EmitLValue(&DRE).getAddress(*this);
1003  LocalDeclMap.erase(VD);
1004  } else {
1005  MasterAddr =
1006  Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
1007  : CGM.GetAddrOfGlobal(VD),
1008  getContext().getDeclAlign(VD));
1009  }
1010  // Get the address of the threadprivate variable.
1011  Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
1012  if (CopiedVars.size() == 1) {
1013  // At first check if current thread is a master thread. If it is, no
1014  // need to copy data.
1015  CopyBegin = createBasicBlock("copyin.not.master");
1016  CopyEnd = createBasicBlock("copyin.not.master.end");
1017  // TODO: Avoid ptrtoint conversion.
1018  auto *MasterAddrInt =
1019  Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy);
1020  auto *PrivateAddrInt =
1021  Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy);
1022  Builder.CreateCondBr(
1023  Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin,
1024  CopyEnd);
1025  EmitBlock(CopyBegin);
1026  }
1027  const auto *SrcVD =
1028  cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1029  const auto *DestVD =
1030  cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1031  EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1032  }
1033  ++IRef;
1034  ++ISrcRef;
1035  ++IDestRef;
1036  }
1037  }
1038  if (CopyEnd) {
1039  // Exit out of copying procedure for non-master thread.
1040  EmitBlock(CopyEnd, /*IsFinished=*/true);
1041  return true;
1042  }
1043  return false;
1044 }
1045 
1047  const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1048  if (!HaveInsertPoint())
1049  return false;
1050  bool HasAtLeastOneLastprivate = false;
1053  const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1054  for (const Expr *C : LoopDirective->counters()) {
1055  SIMDLCVs.insert(
1056  cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1057  }
1058  }
1059  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1060  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1061  HasAtLeastOneLastprivate = true;
1063  !getLangOpts().OpenMPSimd)
1064  break;
1065  const auto *IRef = C->varlist_begin();
1066  const auto *IDestRef = C->destination_exprs().begin();
1067  for (const Expr *IInit : C->private_copies()) {
1068  // Keep the address of the original variable for future update at the end
1069  // of the loop.
1070  const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1071  // Taskloops do not require additional initialization, it is done in
1072  // runtime support library.
1073  if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1074  const auto *DestVD =
1075  cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1076  PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
1077  DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1078  /*RefersToEnclosingVariableOrCapture=*/
1079  CapturedStmtInfo->lookup(OrigVD) != nullptr,
1080  (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1081  return EmitLValue(&DRE).getAddress(*this);
1082  });
1083  // Check if the variable is also a firstprivate: in this case IInit is
1084  // not generated. Initialization of this variable will happen in codegen
1085  // for 'firstprivate' clause.
1086  if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
1087  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1088  bool IsRegistered =
1089  PrivateScope.addPrivate(OrigVD, [this, VD, C, OrigVD]() {
1090  if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1091  Address VDAddr =
1092  CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1093  *this, OrigVD);
1094  setAddrOfLocalVar(VD, VDAddr);
1095  return VDAddr;
1096  }
1097  // Emit private VarDecl with copy init.
1098  EmitDecl(*VD);
1099  return GetAddrOfLocalVar(VD);
1100  });
1101  assert(IsRegistered &&
1102  "lastprivate var already registered as private");
1103  (void)IsRegistered;
1104  }
1105  }
1106  ++IRef;
1107  ++IDestRef;
1108  }
1109  }
1110  return HasAtLeastOneLastprivate;
1111 }
1112 
1114  const OMPExecutableDirective &D, bool NoFinals,
1115  llvm::Value *IsLastIterCond) {
1116  if (!HaveInsertPoint())
1117  return;
1118  // Emit following code:
1119  // if (<IsLastIterCond>) {
1120  // orig_var1 = private_orig_var1;
1121  // ...
1122  // orig_varn = private_orig_varn;
1123  // }
1124  llvm::BasicBlock *ThenBB = nullptr;
1125  llvm::BasicBlock *DoneBB = nullptr;
1126  if (IsLastIterCond) {
1127  // Emit implicit barrier if at least one lastprivate conditional is found
1128  // and this is not a simd mode.
1129  if (!getLangOpts().OpenMPSimd &&
1130  llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1131  [](const OMPLastprivateClause *C) {
1132  return C->getKind() == OMPC_LASTPRIVATE_conditional;
1133  })) {
1134  CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1135  OMPD_unknown,
1136  /*EmitChecks=*/false,
1137  /*ForceSimpleCall=*/true);
1138  }
1139  ThenBB = createBasicBlock(".omp.lastprivate.then");
1140  DoneBB = createBasicBlock(".omp.lastprivate.done");
1141  Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1142  EmitBlock(ThenBB);
1143  }
1144  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1145  llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1146  if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1147  auto IC = LoopDirective->counters().begin();
1148  for (const Expr *F : LoopDirective->finals()) {
1149  const auto *D =
1150  cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1151  if (NoFinals)
1152  AlreadyEmittedVars.insert(D);
1153  else
1154  LoopCountersAndUpdates[D] = F;
1155  ++IC;
1156  }
1157  }
1158  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1159  auto IRef = C->varlist_begin();
1160  auto ISrcRef = C->source_exprs().begin();
1161  auto IDestRef = C->destination_exprs().begin();
1162  for (const Expr *AssignOp : C->assignment_ops()) {
1163  const auto *PrivateVD =
1164  cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1165  QualType Type = PrivateVD->getType();
1166  const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1167  if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1168  // If lastprivate variable is a loop control variable for loop-based
1169  // directive, update its value before copyin back to original
1170  // variable.
1171  if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1172  EmitIgnoredExpr(FinalExpr);
1173  const auto *SrcVD =
1174  cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1175  const auto *DestVD =
1176  cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1177  // Get the address of the private variable.
1178  Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1179  if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1180  PrivateAddr =
1181  Address(Builder.CreateLoad(PrivateAddr),
1182  CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1183  // Store the last value to the private copy in the last iteration.
1184  if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1185  CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1186  *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1187  (*IRef)->getExprLoc());
1188  // Get the address of the original variable.
1189  Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1190  EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1191  }
1192  ++IRef;
1193  ++ISrcRef;
1194  ++IDestRef;
1195  }
1196  if (const Expr *PostUpdate = C->getPostUpdateExpr())
1197  EmitIgnoredExpr(PostUpdate);
1198  }
1199  if (IsLastIterCond)
1200  EmitBlock(DoneBB, /*IsFinished=*/true);
1201 }
1202 
1204  const OMPExecutableDirective &D,
1205  CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1206  if (!HaveInsertPoint())
1207  return;
1210  SmallVector<const Expr *, 4> ReductionOps;
1213  OMPTaskDataTy Data;
1216  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1217  if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1218  continue;
1219  Shareds.append(C->varlist_begin(), C->varlist_end());
1220  Privates.append(C->privates().begin(), C->privates().end());
1221  ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1222  LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1223  RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1224  if (C->getModifier() == OMPC_REDUCTION_task) {
1225  Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1226  Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1227  Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1228  Data.ReductionOps.append(C->reduction_ops().begin(),
1229  C->reduction_ops().end());
1230  TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1231  TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1232  }
1233  }
1234  ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1235  unsigned Count = 0;
1236  auto *ILHS = LHSs.begin();
1237  auto *IRHS = RHSs.begin();
1238  auto *IPriv = Privates.begin();
1239  for (const Expr *IRef : Shareds) {
1240  const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1241  // Emit private VarDecl with reduction init.
1242  RedCG.emitSharedOrigLValue(*this, Count);
1243  RedCG.emitAggregateType(*this, Count);
1244  AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1245  RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1246  RedCG.getSharedLValue(Count),
1247  [&Emission](CodeGenFunction &CGF) {
1248  CGF.EmitAutoVarInit(Emission);
1249  return true;
1250  });
1251  EmitAutoVarCleanups(Emission);
1252  Address BaseAddr = RedCG.adjustPrivateAddress(
1253  *this, Count, Emission.getAllocatedAddress());
1254  bool IsRegistered = PrivateScope.addPrivate(
1255  RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
1256  assert(IsRegistered && "private var already registered as private");
1257  // Silence the warning about unused variable.
1258  (void)IsRegistered;
1259 
1260  const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1261  const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1262  QualType Type = PrivateVD->getType();
1263  bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1264  if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1265  // Store the address of the original variable associated with the LHS
1266  // implicit variable.
1267  PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1268  return RedCG.getSharedLValue(Count).getAddress(*this);
1269  });
1270  PrivateScope.addPrivate(
1271  RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
1272  } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1273  isa<ArraySubscriptExpr>(IRef)) {
1274  // Store the address of the original variable associated with the LHS
1275  // implicit variable.
1276  PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1277  return RedCG.getSharedLValue(Count).getAddress(*this);
1278  });
1279  PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
1280  return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1281  ConvertTypeForMem(RHSVD->getType()),
1282  "rhs.begin");
1283  });
1284  } else {
1285  QualType Type = PrivateVD->getType();
1286  bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1287  Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this);
1288  // Store the address of the original variable associated with the LHS
1289  // implicit variable.
1290  if (IsArray) {
1291  OriginalAddr = Builder.CreateElementBitCast(
1292  OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1293  }
1294  PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
1295  PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
1296  return IsArray ? Builder.CreateElementBitCast(
1297  GetAddrOfLocalVar(PrivateVD),
1298  ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1299  : GetAddrOfLocalVar(PrivateVD);
1300  });
1301  }
1302  ++ILHS;
1303  ++IRHS;
1304  ++IPriv;
1305  ++Count;
1306  }
1307  if (!Data.ReductionVars.empty()) {
1308  Data.IsReductionWithTaskMod = true;
1309  Data.IsWorksharingReduction =
1311  llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1312  *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1313  const Expr *TaskRedRef = nullptr;
1314  switch (D.getDirectiveKind()) {
1315  case OMPD_parallel:
1316  TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1317  break;
1318  case OMPD_for:
1319  TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1320  break;
1321  case OMPD_sections:
1322  TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1323  break;
1324  case OMPD_parallel_for:
1325  TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1326  break;
1327  case OMPD_parallel_master:
1328  TaskRedRef =
1329  cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1330  break;
1331  case OMPD_parallel_sections:
1332  TaskRedRef =
1333  cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1334  break;
1335  case OMPD_target_parallel:
1336  TaskRedRef =
1337  cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1338  break;
1339  case OMPD_target_parallel_for:
1340  TaskRedRef =
1341  cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1342  break;
1343  case OMPD_distribute_parallel_for:
1344  TaskRedRef =
1345  cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1346  break;
1347  case OMPD_teams_distribute_parallel_for:
1348  TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
1349  .getTaskReductionRefExpr();
1350  break;
1351  case OMPD_target_teams_distribute_parallel_for:
1352  TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
1353  .getTaskReductionRefExpr();
1354  break;
1355  case OMPD_simd:
1356  case OMPD_for_simd:
1357  case OMPD_section:
1358  case OMPD_single:
1359  case OMPD_master:
1360  case OMPD_critical:
1361  case OMPD_parallel_for_simd:
1362  case OMPD_task:
1363  case OMPD_taskyield:
1364  case OMPD_barrier:
1365  case OMPD_taskwait:
1366  case OMPD_taskgroup:
1367  case OMPD_flush:
1368  case OMPD_depobj:
1369  case OMPD_scan:
1370  case OMPD_ordered:
1371  case OMPD_atomic:
1372  case OMPD_teams:
1373  case OMPD_target:
1374  case OMPD_cancellation_point:
1375  case OMPD_cancel:
1376  case OMPD_target_data:
1377  case OMPD_target_enter_data:
1378  case OMPD_target_exit_data:
1379  case OMPD_taskloop:
1380  case OMPD_taskloop_simd:
1381  case OMPD_master_taskloop:
1382  case OMPD_master_taskloop_simd:
1383  case OMPD_parallel_master_taskloop:
1384  case OMPD_parallel_master_taskloop_simd:
1385  case OMPD_distribute:
1386  case OMPD_target_update:
1387  case OMPD_distribute_parallel_for_simd:
1388  case OMPD_distribute_simd:
1389  case OMPD_target_parallel_for_simd:
1390  case OMPD_target_simd:
1391  case OMPD_teams_distribute:
1392  case OMPD_teams_distribute_simd:
1393  case OMPD_teams_distribute_parallel_for_simd:
1394  case OMPD_target_teams:
1395  case OMPD_target_teams_distribute:
1396  case OMPD_target_teams_distribute_parallel_for_simd:
1397  case OMPD_target_teams_distribute_simd:
1398  case OMPD_declare_target:
1399  case OMPD_end_declare_target:
1400  case OMPD_threadprivate:
1401  case OMPD_allocate:
1402  case OMPD_declare_reduction:
1403  case OMPD_declare_mapper:
1404  case OMPD_declare_simd:
1405  case OMPD_requires:
1406  case OMPD_declare_variant:
1407  case OMPD_begin_declare_variant:
1408  case OMPD_end_declare_variant:
1409  case OMPD_unknown:
1410  default:
1411  llvm_unreachable("Enexpected directive with task reductions.");
1412  }
1413 
1414  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1415  EmitVarDecl(*VD);
1416  EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1417  /*Volatile=*/false, TaskRedRef->getType());
1418  }
1419 }
1420 
1422  const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1423  if (!HaveInsertPoint())
1424  return;
1429  bool HasAtLeastOneReduction = false;
1430  bool IsReductionWithTaskMod = false;
1431  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1432  // Do not emit for inscan reductions.
1433  if (C->getModifier() == OMPC_REDUCTION_inscan)
1434  continue;
1435  HasAtLeastOneReduction = true;
1436  Privates.append(C->privates().begin(), C->privates().end());
1437  LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1438  RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1439  ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1440  IsReductionWithTaskMod =
1441  IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1442  }
1443  if (HasAtLeastOneReduction) {
1444  if (IsReductionWithTaskMod) {
1445  CGM.getOpenMPRuntime().emitTaskReductionFini(
1446  *this, D.getBeginLoc(),
1448  }
1449  bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1451  ReductionKind == OMPD_simd;
1452  bool SimpleReduction = ReductionKind == OMPD_simd;
1453  // Emit nowait reduction if nowait clause is present or directive is a
1454  // parallel directive (it always has implicit barrier).
1455  CGM.getOpenMPRuntime().emitReduction(
1456  *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1457  {WithNowait, SimpleReduction, ReductionKind});
1458  }
1459 }
1460 
1462  CodeGenFunction &CGF, const OMPExecutableDirective &D,
1463  const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1464  if (!CGF.HaveInsertPoint())
1465  return;
1466  llvm::BasicBlock *DoneBB = nullptr;
1467  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1468  if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1469  if (!DoneBB) {
1470  if (llvm::Value *Cond = CondGen(CGF)) {
1471  // If the first post-update expression is found, emit conditional
1472  // block if it was requested.
1473  llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1474  DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1475  CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1476  CGF.EmitBlock(ThenBB);
1477  }
1478  }
1479  CGF.EmitIgnoredExpr(PostUpdate);
1480  }
1481  }
1482  if (DoneBB)
1483  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1484 }
1485 
1486 namespace {
1487 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1488 /// parallel function. This is necessary for combined constructs such as
1489 /// 'distribute parallel for'
1490 typedef llvm::function_ref<void(CodeGenFunction &,
1491  const OMPExecutableDirective &,
1493  CodeGenBoundParametersTy;
1494 } // anonymous namespace
1495 
1496 static void
1498  const OMPExecutableDirective &S) {
1499  if (CGF.getLangOpts().OpenMP < 50)
1500  return;
1502  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1503  for (const Expr *Ref : C->varlists()) {
1504  if (!Ref->getType()->isScalarType())
1505  continue;
1506  const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1507  if (!DRE)
1508  continue;
1509  PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1511  }
1512  }
1513  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1514  for (const Expr *Ref : C->varlists()) {
1515  if (!Ref->getType()->isScalarType())
1516  continue;
1517  const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1518  if (!DRE)
1519  continue;
1520  PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1522  }
1523  }
1524  for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1525  for (const Expr *Ref : C->varlists()) {
1526  if (!Ref->getType()->isScalarType())
1527  continue;
1528  const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1529  if (!DRE)
1530  continue;
1531  PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1533  }
1534  }
1535  // Privates should ne analyzed since they are not captured at all.
1536  // Task reductions may be skipped - tasks are ignored.
1537  // Firstprivates do not return value but may be passed by reference - no need
1538  // to check for updated lastprivate conditional.
1539  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1540  for (const Expr *Ref : C->varlists()) {
1541  if (!Ref->getType()->isScalarType())
1542  continue;
1543  const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1544  if (!DRE)
1545  continue;
1546  PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1547  }
1548  }
1550  CGF, S, PrivateDecls);
1551 }
1552 
1554  CodeGenFunction &CGF, const OMPExecutableDirective &S,
1555  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1556  const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1557  const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1558  llvm::Function *OutlinedFn =
1560  S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1561  if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1562  CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1563  llvm::Value *NumThreads =
1564  CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1565  /*IgnoreResultAssign=*/true);
1567  CGF, NumThreads, NumThreadsClause->getBeginLoc());
1568  }
1569  if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1570  CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1572  CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1573  }
1574  const Expr *IfCond = nullptr;
1575  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1576  if (C->getNameModifier() == OMPD_unknown ||
1577  C->getNameModifier() == OMPD_parallel) {
1578  IfCond = C->getCondition();
1579  break;
1580  }
1581  }
1582 
1583  OMPParallelScope Scope(CGF, S);
1585  // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1586  // lower and upper bounds with the pragma 'for' chunking mechanism.
1587  // The following lambda takes care of appending the lower and upper bound
1588  // parameters when necessary
1589  CodeGenBoundParameters(CGF, S, CapturedVars);
1590  CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1591  CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1592  CapturedVars, IfCond);
1593 }
1594 
1595 static bool isAllocatableDecl(const VarDecl *VD) {
1596  const VarDecl *CVD = VD->getCanonicalDecl();
1597  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1598  return false;
1599  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1600  // Use the default allocation.
1601  return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1602  AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1603  !AA->getAllocator());
1604 }
1605 
1607  const OMPExecutableDirective &,
1609 
1611  CodeGenFunction &CGF, const VarDecl *VD) {
1612  CodeGenModule &CGM = CGF.CGM;
1613  auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1614 
1615  if (!VD)
1616  return Address::invalid();
1617  const VarDecl *CVD = VD->getCanonicalDecl();
1618  if (!isAllocatableDecl(CVD))
1619  return Address::invalid();
1620  llvm::Value *Size;
1621  CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1622  if (CVD->getType()->isVariablyModifiedType()) {
1623  Size = CGF.getTypeSize(CVD->getType());
1624  // Align the size: ((size + align - 1) / align) * align
1625  Size = CGF.Builder.CreateNUWAdd(
1626  Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1627  Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1628  Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1629  } else {
1630  CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1631  Size = CGM.getSize(Sz.alignTo(Align));
1632  }
1633 
1634  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1635  assert(AA->getAllocator() &&
1636  "Expected allocator expression for non-default allocator.");
1637  llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1638  // According to the standard, the original allocator type is a enum (integer).
1639  // Convert to pointer type, if required.
1640  if (Allocator->getType()->isIntegerTy())
1641  Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1642  else if (Allocator->getType()->isPointerTy())
1643  Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1644  CGM.VoidPtrTy);
1645 
1646  llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1647  CGF.Builder, Size, Allocator,
1648  getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1649  llvm::CallInst *FreeCI =
1650  OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
1651 
1652  CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1654  Addr,
1656  getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1657  return Address(Addr, Align);
1658 }
1659 
1661  CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1662  SourceLocation Loc) {
1663  CodeGenModule &CGM = CGF.CGM;
1664  if (CGM.getLangOpts().OpenMPUseTLS &&
1666  return VDAddr;
1667 
1668  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1669 
1670  llvm::Type *VarTy = VDAddr.getElementType();
1671  llvm::Value *Data =
1672  CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
1673  llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1674  std::string Suffix = getNameWithSeparators({"cache", ""});
1675  llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1676 
1677  llvm::CallInst *ThreadPrivateCacheCall =
1678  OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1679 
1680  return Address(ThreadPrivateCacheCall, VDAddr.getAlignment());
1681 }
1682 
1684  ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1685  SmallString<128> Buffer;
1686  llvm::raw_svector_ostream OS(Buffer);
1687  StringRef Sep = FirstSeparator;
1688  for (StringRef Part : Parts) {
1689  OS << Sep << Part;
1690  Sep = Separator;
1691  }
1692  return OS.str().str();
1693 }
1695  if (CGM.getLangOpts().OpenMPIRBuilder) {
1696  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1697  // Check if we have any if clause associated with the directive.
1698  llvm::Value *IfCond = nullptr;
1699  if (const auto *C = S.getSingleClause<OMPIfClause>())
1700  IfCond = EmitScalarExpr(C->getCondition(),
1701  /*IgnoreResultAssign=*/true);
1702 
1703  llvm::Value *NumThreads = nullptr;
1704  if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1705  NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1706  /*IgnoreResultAssign=*/true);
1707 
1708  ProcBindKind ProcBind = OMP_PROC_BIND_default;
1709  if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1710  ProcBind = ProcBindClause->getProcBindKind();
1711 
1712  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1713 
1714  // The cleanup callback that finalizes all variabels at the given location,
1715  // thus calls destructors etc.
1716  auto FiniCB = [this](InsertPointTy IP) {
1717  OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1718  };
1719 
1720  // Privatization callback that performs appropriate action for
1721  // shared/private/firstprivate/lastprivate/copyin/... variables.
1722  //
1723  // TODO: This defaults to shared right now.
1724  auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1725  llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1726  // The next line is appropriate only for variables (Val) with the
1727  // data-sharing attribute "shared".
1728  ReplVal = &Val;
1729 
1730  return CodeGenIP;
1731  };
1732 
1733  const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1734  const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1735 
1736  auto BodyGenCB = [ParallelRegionBodyStmt,
1737  this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1738  llvm::BasicBlock &ContinuationBB) {
1739  OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP,
1740  ContinuationBB);
1741  OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt,
1742  CodeGenIP, ContinuationBB);
1743  };
1744 
1745  CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1746  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1747  llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1748  AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1749  Builder.restoreIP(
1750  OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1751  IfCond, NumThreads, ProcBind, S.hasCancel()));
1752  return;
1753  }
1754 
1755  // Emit parallel region as a standalone region.
1756  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1757  Action.Enter(CGF);
1758  OMPPrivateScope PrivateScope(CGF);
1759  bool Copyins = CGF.EmitOMPCopyinClause(S);
1760  (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1761  if (Copyins) {
1762  // Emit implicit barrier to synchronize threads and avoid data races on
1763  // propagation master's thread values of threadprivate variables to local
1764  // instances of that variables of all other implicit threads.
1766  CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1767  /*ForceSimpleCall=*/true);
1768  }
1769  CGF.EmitOMPPrivateClause(S, PrivateScope);
1770  CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1771  (void)PrivateScope.Privatize();
1772  CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1773  CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1774  };
1775  {
1776  auto LPCRegion =
1778  emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1781  [](CodeGenFunction &) { return nullptr; });
1782  }
1783  // Check for outer lastprivate conditional update.
1785 }
1786 
1788  EmitStmt(S.getIfStmt());
1789 }
1790 
1791 namespace {
1792 /// RAII to handle scopes for loop transformation directives.
1793 class OMPTransformDirectiveScopeRAII {
1794  OMPLoopScope *Scope = nullptr;
1795  CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1796  CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1797 
1798 public:
1799  OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1800  if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) {
1801  Scope = new OMPLoopScope(CGF, *Dir);
1803  CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1804  }
1805  }
1806  ~OMPTransformDirectiveScopeRAII() {
1807  if (!Scope)
1808  return;
1809  delete CapInfoRAII;
1810  delete CGSI;
1811  delete Scope;
1812  }
1813 };
1814 } // namespace
1815 
1816 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1817  int MaxLevel, int Level = 0) {
1818  assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1819  const Stmt *SimplifiedS = S->IgnoreContainers();
1820  if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1821  PrettyStackTraceLoc CrashInfo(
1822  CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1823  "LLVM IR generation of compound statement ('{}')");
1824 
1825  // Keep track of the current cleanup stack depth, including debug scopes.
1826  CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1827  for (const Stmt *CurStmt : CS->body())
1828  emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1829  return;
1830  }
1831  if (SimplifiedS == NextLoop) {
1832  if (auto *Dir = dyn_cast<OMPTileDirective>(SimplifiedS))
1833  SimplifiedS = Dir->getTransformedStmt();
1834  if (auto *Dir = dyn_cast<OMPUnrollDirective>(SimplifiedS))
1835  SimplifiedS = Dir->getTransformedStmt();
1836  if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
1837  SimplifiedS = CanonLoop->getLoopStmt();
1838  if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1839  S = For->getBody();
1840  } else {
1841  assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1842  "Expected canonical for loop or range-based for loop.");
1843  const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
1844  CGF.EmitStmt(CXXFor->getLoopVarStmt());
1845  S = CXXFor->getBody();
1846  }
1847  if (Level + 1 < MaxLevel) {
1849  S, /*TryImperfectlyNestedLoops=*/true);
1850  emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
1851  return;
1852  }
1853  }
1854  CGF.EmitStmt(S);
1855 }
1856 
1858  JumpDest LoopExit) {
1859  RunCleanupsScope BodyScope(*this);
1860  // Update counters values on current iteration.
1861  for (const Expr *UE : D.updates())
1862  EmitIgnoredExpr(UE);
1863  // Update the linear variables.
1864  // In distribute directives only loop counters may be marked as linear, no
1865  // need to generate the code for them.
1867  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1868  for (const Expr *UE : C->updates())
1869  EmitIgnoredExpr(UE);
1870  }
1871  }
1872 
1873  // On a continue in the body, jump to the end.
1874  JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1875  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1876  for (const Expr *E : D.finals_conditions()) {
1877  if (!E)
1878  continue;
1879  // Check that loop counter in non-rectangular nest fits into the iteration
1880  // space.
1881  llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1882  EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1883  getProfileCount(D.getBody()));
1884  EmitBlock(NextBB);
1885  }
1886 
1887  OMPPrivateScope InscanScope(*this);
1888  EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1889  bool IsInscanRegion = InscanScope.Privatize();
1890  if (IsInscanRegion) {
1891  // Need to remember the block before and after scan directive
1892  // to dispatch them correctly depending on the clause used in
1893  // this directive, inclusive or exclusive. For inclusive scan the natural
1894  // order of the blocks is used, for exclusive clause the blocks must be
1895  // executed in reverse order.
1896  OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
1897  OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
1898  // No need to allocate inscan exit block, in simd mode it is selected in the
1899  // codegen for the scan directive.
1900  if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
1901  OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
1902  OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
1903  EmitBranch(OMPScanDispatch);
1904  EmitBlock(OMPBeforeScanBlock);
1905  }
1906 
1907  // Emit loop variables for C++ range loops.
1908  const Stmt *Body =
1910  // Emit loop body.
1911  emitBody(*this, Body,
1913  Body, /*TryImperfectlyNestedLoops=*/true),
1914  D.getLoopsNumber());
1915 
1916  // Jump to the dispatcher at the end of the loop body.
1917  if (IsInscanRegion)
1918  EmitBranch(OMPScanExitBlock);
1919 
1920  // The end (updates/cleanups).
1921  EmitBlock(Continue.getBlock());
1922  BreakContinueStack.pop_back();
1923 }
1924 
1925 using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
1926 
1927 /// Emit a captured statement and return the function as well as its captured
1928 /// closure context.
1930  const CapturedStmt *S) {
1931  LValue CapStruct = ParentCGF.InitCapturedStruct(*S);
1932  CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
1933  std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
1934  std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S);
1935  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
1936  llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S);
1937 
1938  return {F, CapStruct.getPointer(ParentCGF)};
1939 }
1940 
1941 /// Emit a call to a previously captured closure.
1942 static llvm::CallInst *
1945  // Append the closure context to the argument.
1946  SmallVector<llvm::Value *> EffectiveArgs;
1947  EffectiveArgs.reserve(Args.size() + 1);
1948  llvm::append_range(EffectiveArgs, Args);
1949  EffectiveArgs.push_back(Cap.second);
1950 
1951  return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs);
1952 }
1953 
1954 llvm::CanonicalLoopInfo *
1956  assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
1957 
1958  // The caller is processing the loop-associated directive processing the \p
1959  // Depth loops nested in \p S. Put the previous pending loop-associated
1960  // directive to the stack. If the current loop-associated directive is a loop
1961  // transformation directive, it will push its generated loops onto the stack
1962  // such that together with the loops left here they form the combined loop
1963  // nest for the parent loop-associated directive.
1964  int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
1965  ExpectedOMPLoopDepth = Depth;
1966 
1967  EmitStmt(S);
1968  assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
1969 
1970  // The last added loop is the outermost one.
1971  llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
1972 
1973  // Pop the \p Depth loops requested by the call from that stack and restore
1974  // the previous context.
1975  OMPLoopNestStack.set_size(OMPLoopNestStack.size() - Depth);
1976  ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
1977 
1978  return Result;
1979 }
1980 
1982  const Stmt *SyntacticalLoop = S->getLoopStmt();
1983  if (!getLangOpts().OpenMPIRBuilder) {
1984  // Ignore if OpenMPIRBuilder is not enabled.
1985  EmitStmt(SyntacticalLoop);
1986  return;
1987  }
1988 
1989  LexicalScope ForScope(*this, S->getSourceRange());
1990 
1991  // Emit init statements. The Distance/LoopVar funcs may reference variable
1992  // declarations they contain.
1993  const Stmt *BodyStmt;
1994  if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) {
1995  if (const Stmt *InitStmt = For->getInit())
1996  EmitStmt(InitStmt);
1997  BodyStmt = For->getBody();
1998  } else if (const auto *RangeFor =
1999  dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) {
2000  if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
2001  EmitStmt(RangeStmt);
2002  if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
2003  EmitStmt(BeginStmt);
2004  if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
2005  EmitStmt(EndStmt);
2006  if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
2007  EmitStmt(LoopVarStmt);
2008  BodyStmt = RangeFor->getBody();
2009  } else
2010  llvm_unreachable("Expected for-stmt or range-based for-stmt");
2011 
2012  // Emit closure for later use. By-value captures will be captured here.
2013  const CapturedStmt *DistanceFunc = S->getDistanceFunc();
2014  EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
2015  const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
2016  EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
2017 
2018  // Call the distance function to get the number of iterations of the loop to
2019  // come.
2020  QualType LogicalTy = DistanceFunc->getCapturedDecl()
2021  ->getParam(0)
2022  ->getType()
2024  Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
2025  emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
2026  llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count");
2027 
2028  // Emit the loop structure.
2029  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2030  auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2031  llvm::Value *IndVar) {
2032  Builder.restoreIP(CodeGenIP);
2033 
2034  // Emit the loop body: Convert the logical iteration number to the loop
2035  // variable and emit the body.
2036  const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2037  LValue LCVal = EmitLValue(LoopVarRef);
2038  Address LoopVarAddress = LCVal.getAddress(*this);
2039  emitCapturedStmtCall(*this, LoopVarClosure,
2040  {LoopVarAddress.getPointer(), IndVar});
2041 
2042  RunCleanupsScope BodyScope(*this);
2043  EmitStmt(BodyStmt);
2044  };
2045  llvm::CanonicalLoopInfo *CL =
2046  OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal);
2047 
2048  // Finish up the loop.
2049  Builder.restoreIP(CL->getAfterIP());
2050  ForScope.ForceCleanup();
2051 
2052  // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2053  OMPLoopNestStack.push_back(CL);
2054 }
2055 
2057  const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2058  const Expr *IncExpr,
2059  const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2060  const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2061  auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
2062 
2063  // Start the loop with a block that tests the condition.
2064  auto CondBlock = createBasicBlock("omp.inner.for.cond");
2065  EmitBlock(CondBlock);
2066  const SourceRange R = S.getSourceRange();
2067 
2068  // If attributes are attached, push to the basic block with them.
2069  const auto &OMPED = cast<OMPExecutableDirective>(S);
2070  const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2071  const Stmt *SS = ICS->getCapturedStmt();
2072  const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
2073  OMPLoopNestStack.clear();
2074  if (AS)
2075  LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
2076  AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
2077  SourceLocToDebugLoc(R.getEnd()));
2078  else
2079  LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2080  SourceLocToDebugLoc(R.getEnd()));
2081 
2082  // If there are any cleanups between here and the loop-exit scope,
2083  // create a block to stage a loop exit along.
2084  llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2085  if (RequiresCleanup)
2086  ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
2087 
2088  llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
2089 
2090  // Emit condition.
2091  EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
2092  if (ExitBlock != LoopExit.getBlock()) {
2093  EmitBlock(ExitBlock);
2094  EmitBranchThroughCleanup(LoopExit);
2095  }
2096 
2097  EmitBlock(LoopBody);
2098  incrementProfileCounter(&S);
2099 
2100  // Create a block for the increment.
2101  JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
2102  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2103 
2104  BodyGen(*this);
2105 
2106  // Emit "IV = IV + 1" and a back-edge to the condition block.
2107  EmitBlock(Continue.getBlock());
2108  EmitIgnoredExpr(IncExpr);
2109  PostIncGen(*this);
2110  BreakContinueStack.pop_back();
2111  EmitBranch(CondBlock);
2112  LoopStack.pop();
2113  // Emit the fall-through block.
2114  EmitBlock(LoopExit.getBlock());
2115 }
2116 
2118  if (!HaveInsertPoint())
2119  return false;
2120  // Emit inits for the linear variables.
2121  bool HasLinears = false;
2122  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2123  for (const Expr *Init : C->inits()) {
2124  HasLinears = true;
2125  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
2126  if (const auto *Ref =
2127  dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
2128  AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
2129  const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
2130  DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2131  CapturedStmtInfo->lookup(OrigVD) != nullptr,
2132  VD->getInit()->getType(), VK_LValue,
2133  VD->getInit()->getExprLoc());
2134  EmitExprAsInit(
2135  &DRE, VD,
2136  MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
2137  /*capturedByInit=*/false);
2138  EmitAutoVarCleanups(Emission);
2139  } else {
2140  EmitVarDecl(*VD);
2141  }
2142  }
2143  // Emit the linear steps for the linear clauses.
2144  // If a step is not constant, it is pre-calculated before the loop.
2145  if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
2146  if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
2147  EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
2148  // Emit calculation of the linear step.
2149  EmitIgnoredExpr(CS);
2150  }
2151  }
2152  return HasLinears;
2153 }
2154 
2156  const OMPLoopDirective &D,
2157  const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2158  if (!HaveInsertPoint())
2159  return;
2160  llvm::BasicBlock *DoneBB = nullptr;
2161  // Emit the final values of the linear variables.
2162  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2163  auto IC = C->varlist_begin();
2164  for (const Expr *F : C->finals()) {
2165  if (!DoneBB) {
2166  if (llvm::Value *Cond = CondGen(*this)) {
2167  // If the first post-update expression is found, emit conditional
2168  // block if it was requested.
2169  llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
2170  DoneBB = createBasicBlock(".omp.linear.pu.done");
2171  Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2172  EmitBlock(ThenBB);
2173  }
2174  }
2175  const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
2176  DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2177  CapturedStmtInfo->lookup(OrigVD) != nullptr,
2178  (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2179  Address OrigAddr = EmitLValue(&DRE).getAddress(*this);
2180  CodeGenFunction::OMPPrivateScope VarScope(*this);
2181  VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
2182  (void)VarScope.Privatize();
2183  EmitIgnoredExpr(F);
2184  ++IC;
2185  }
2186  if (const Expr *PostUpdate = C->getPostUpdateExpr())
2187  EmitIgnoredExpr(PostUpdate);
2188  }
2189  if (DoneBB)
2190  EmitBlock(DoneBB, /*IsFinished=*/true);
2191 }
2192 
2194  const OMPExecutableDirective &D) {
2195  if (!CGF.HaveInsertPoint())
2196  return;
2197  for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2198  llvm::APInt ClauseAlignment(64, 0);
2199  if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2200  auto *AlignmentCI =
2201  cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2202  ClauseAlignment = AlignmentCI->getValue();
2203  }
2204  for (const Expr *E : Clause->varlists()) {
2205  llvm::APInt Alignment(ClauseAlignment);
2206  if (Alignment == 0) {
2207  // OpenMP [2.8.1, Description]
2208  // If no optional parameter is specified, implementation-defined default
2209  // alignments for SIMD instructions on the target platforms are assumed.
2210  Alignment =
2211  CGF.getContext()
2213  E->getType()->getPointeeType()))
2214  .getQuantity();
2215  }
2216  assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2217  "alignment is not power of 2");
2218  if (Alignment != 0) {
2219  llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2221  PtrValue, E, /*No second loc needed*/ SourceLocation(),
2222  llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2223  }
2224  }
2225  }
2226 }
2227 
2229  const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2230  if (!HaveInsertPoint())
2231  return;
2232  auto I = S.private_counters().begin();
2233  for (const Expr *E : S.counters()) {
2234  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2235  const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2236  // Emit var without initialization.
2237  AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
2238  EmitAutoVarCleanups(VarEmission);
2239  LocalDeclMap.erase(PrivateVD);
2240  (void)LoopScope.addPrivate(
2241  VD, [&VarEmission]() { return VarEmission.getAllocatedAddress(); });
2242  if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
2243  VD->hasGlobalStorage()) {
2244  (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
2245  DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2246  LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
2247  E->getType(), VK_LValue, E->getExprLoc());
2248  return EmitLValue(&DRE).getAddress(*this);
2249  });
2250  } else {
2251  (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
2252  return VarEmission.getAllocatedAddress();
2253  });
2254  }
2255  ++I;
2256  }
2257  // Privatize extra loop counters used in loops for ordered(n) clauses.
2258  for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2259  if (!C->getNumForLoops())
2260  continue;
2261  for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2262  I < E; ++I) {
2263  const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2264  const auto *VD = cast<VarDecl>(DRE->getDecl());
2265  // Override only those variables that can be captured to avoid re-emission
2266  // of the variables declared within the loops.
2267  if (DRE->refersToEnclosingVariableOrCapture()) {
2268  (void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
2269  return CreateMemTemp(DRE->getType(), VD->getName());
2270  });
2271  }
2272  }
2273  }
2274 }
2275 
2276 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2277  const Expr *Cond, llvm::BasicBlock *TrueBlock,
2278  llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2279  if (!CGF.HaveInsertPoint())
2280  return;
2281  {
2282  CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2283  CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
2284  (void)PreCondScope.Privatize();
2285  // Get initial values of real counters.
2286  for (const Expr *I : S.inits()) {
2287  CGF.EmitIgnoredExpr(I);
2288  }
2289  }
2290  // Create temp loop control variables with their init values to support
2291  // non-rectangular loops.
2292  CodeGenFunction::OMPMapVars PreCondVars;
2293  for (const Expr *E : S.dependent_counters()) {
2294  if (!E)
2295  continue;
2296  assert(!E->getType().getNonReferenceType()->isRecordType() &&
2297  "dependent counter must not be an iterator.");
2298  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2299  Address CounterAddr =
2301  (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
2302  }
2303  (void)PreCondVars.apply(CGF);
2304  for (const Expr *E : S.dependent_inits()) {
2305  if (!E)
2306  continue;
2307  CGF.EmitIgnoredExpr(E);
2308  }
2309  // Check that loop is executed at least one time.
2310  CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2311  PreCondVars.restore(CGF);
2312 }
2313 
2315  const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2316  if (!HaveInsertPoint())
2317  return;
2320  const auto *LoopDirective = cast<OMPLoopDirective>(&D);
2321  for (const Expr *C : LoopDirective->counters()) {
2322  SIMDLCVs.insert(
2323  cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
2324  }
2325  }
2326  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2327  auto CurPrivate = C->privates().begin();
2328  for (const Expr *E : C->varlists()) {
2329  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2330  const auto *PrivateVD =
2331  cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2332  if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2333  bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() {
2334  // Emit private VarDecl with copy init.
2335  EmitVarDecl(*PrivateVD);
2336  return GetAddrOfLocalVar(PrivateVD);
2337  });
2338  assert(IsRegistered && "linear var already registered as private");
2339  // Silence the warning about unused variable.
2340  (void)IsRegistered;
2341  } else {
2342  EmitVarDecl(*PrivateVD);
2343  }
2344  ++CurPrivate;
2345  }
2346  }
2347 }
2348 
2350  const OMPExecutableDirective &D) {
2351  if (!CGF.HaveInsertPoint())
2352  return;
2353  if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2354  RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2355  /*ignoreResult=*/true);
2356  auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2357  CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2358  // In presence of finite 'safelen', it may be unsafe to mark all
2359  // the memory instructions parallel, because loop-carried
2360  // dependences of 'safelen' iterations are possible.
2362  } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2363  RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2364  /*ignoreResult=*/true);
2365  auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2366  CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2367  // In presence of finite 'safelen', it may be unsafe to mark all
2368  // the memory instructions parallel, because loop-carried
2369  // dependences of 'safelen' iterations are possible.
2370  CGF.LoopStack.setParallel(/*Enable=*/false);
2371  }
2372 }
2373 
2375  // Walk clauses and process safelen/lastprivate.
2376  LoopStack.setParallel(/*Enable=*/true);
2377  LoopStack.setVectorizeEnable();
2378  emitSimdlenSafelenClause(*this, D);
2379  if (const auto *C = D.getSingleClause<OMPOrderClause>())
2380  if (C->getKind() == OMPC_ORDER_concurrent)
2381  LoopStack.setParallel(/*Enable=*/true);
2382  if ((D.getDirectiveKind() == OMPD_simd ||
2383  (getLangOpts().OpenMPSimd &&
2385  llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2386  [](const OMPReductionClause *C) {
2387  return C->getModifier() == OMPC_REDUCTION_inscan;
2388  }))
2389  // Disable parallel access in case of prefix sum.
2390  LoopStack.setParallel(/*Enable=*/false);
2391 }
2392 
2394  const OMPLoopDirective &D,
2395  const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2396  if (!HaveInsertPoint())
2397  return;
2398  llvm::BasicBlock *DoneBB = nullptr;
2399  auto IC = D.counters().begin();
2400  auto IPC = D.private_counters().begin();
2401  for (const Expr *F : D.finals()) {
2402  const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
2403  const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
2404  const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
2405  if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
2406  OrigVD->hasGlobalStorage() || CED) {
2407  if (!DoneBB) {
2408  if (llvm::Value *Cond = CondGen(*this)) {
2409  // If the first post-update expression is found, emit conditional
2410  // block if it was requested.
2411  llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
2412  DoneBB = createBasicBlock(".omp.final.done");
2413  Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2414  EmitBlock(ThenBB);
2415  }
2416  }
2417  Address OrigAddr = Address::invalid();
2418  if (CED) {
2419  OrigAddr =
2420  EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this);
2421  } else {
2422  DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2423  /*RefersToEnclosingVariableOrCapture=*/false,
2424  (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2425  OrigAddr = EmitLValue(&DRE).getAddress(*this);
2426  }
2427  OMPPrivateScope VarScope(*this);
2428  VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
2429  (void)VarScope.Privatize();
2430  EmitIgnoredExpr(F);
2431  }
2432  ++IC;
2433  ++IPC;
2434  }
2435  if (DoneBB)
2436  EmitBlock(DoneBB, /*IsFinished=*/true);
2437 }
2438 
2440  const OMPLoopDirective &S,
2442  CGF.EmitOMPLoopBody(S, LoopExit);
2443  CGF.EmitStopPoint(&S);
2444 }
2445 
2446 /// Emit a helper variable and return corresponding lvalue.
2448  const DeclRefExpr *Helper) {
2449  auto VDecl = cast<VarDecl>(Helper->getDecl());
2450  CGF.EmitVarDecl(*VDecl);
2451  return CGF.EmitLValue(Helper);
2452 }
2453 
2455  const RegionCodeGenTy &SimdInitGen,
2456  const RegionCodeGenTy &BodyCodeGen) {
2457  auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2458  PrePostActionTy &) {
2459  CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2461  SimdInitGen(CGF);
2462 
2463  BodyCodeGen(CGF);
2464  };
2465  auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2467  CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2468 
2469  BodyCodeGen(CGF);
2470  };
2471  const Expr *IfCond = nullptr;
2472  if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2473  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2474  if (CGF.getLangOpts().OpenMP >= 50 &&
2475  (C->getNameModifier() == OMPD_unknown ||
2476  C->getNameModifier() == OMPD_simd)) {
2477  IfCond = C->getCondition();
2478  break;
2479  }
2480  }
2481  }
2482  if (IfCond) {
2483  CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2484  } else {
2485  RegionCodeGenTy ThenRCG(ThenGen);
2486  ThenRCG(CGF);
2487  }
2488 }
2489 
2491  PrePostActionTy &Action) {
2492  Action.Enter(CGF);
2493  assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2494  "Expected simd directive");
2495  OMPLoopScope PreInitScope(CGF, S);
2496  // if (PreCond) {
2497  // for (IV in 0..LastIteration) BODY;
2498  // <Final counter/linear vars updates>;
2499  // }
2500  //
2501  if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2502  isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
2503  isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
2504  (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2505  (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2506  }
2507 
2508  // Emit: if (PreCond) - begin.
2509  // If the condition constant folds and can be elided, avoid emitting the
2510  // whole loop.
2511  bool CondConstant;
2512  llvm::BasicBlock *ContBlock = nullptr;
2513  if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2514  if (!CondConstant)
2515  return;
2516  } else {
2517  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
2518  ContBlock = CGF.createBasicBlock("simd.if.end");
2519  emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
2520  CGF.getProfileCount(&S));
2521  CGF.EmitBlock(ThenBlock);
2522  CGF.incrementProfileCounter(&S);
2523  }
2524 
2525  // Emit the loop iteration variable.
2526  const Expr *IVExpr = S.getIterationVariable();
2527  const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
2528  CGF.EmitVarDecl(*IVDecl);
2529  CGF.EmitIgnoredExpr(S.getInit());
2530 
2531  // Emit the iterations count variable.
2532  // If it is not a variable, Sema decided to calculate iterations count on
2533  // each iteration (e.g., it is foldable into a constant).
2534  if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2535  CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2536  // Emit calculation of the iterations count.
2537  CGF.EmitIgnoredExpr(S.getCalcLastIteration());
2538  }
2539 
2540  emitAlignedClause(CGF, S);
2541  (void)CGF.EmitOMPLinearClauseInit(S);
2542  {
2543  CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2544  CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2545  CGF.EmitOMPLinearClause(S, LoopScope);
2546  CGF.EmitOMPPrivateClause(S, LoopScope);
2547  CGF.EmitOMPReductionClauseInit(S, LoopScope);
2549  CGF, S, CGF.EmitLValue(S.getIterationVariable()));
2550  bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2551  (void)LoopScope.Privatize();
2552  if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2554 
2556  CGF, S,
2557  [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2558  CGF.EmitOMPSimdInit(S);
2559  },
2560  [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2561  CGF.EmitOMPInnerLoop(
2562  S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2563  [&S](CodeGenFunction &CGF) {
2564  emitOMPLoopBodyWithStopPoint(CGF, S,
2565  CodeGenFunction::JumpDest());
2566  },
2567  [](CodeGenFunction &) {});
2568  });
2569  CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2570  // Emit final copy of the lastprivate variables at the end of loops.
2571  if (HasLastprivateClause)
2572  CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2573  CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2575  [](CodeGenFunction &) { return nullptr; });
2576  }
2577  CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
2578  // Emit: if (PreCond) - end.
2579  if (ContBlock) {
2580  CGF.EmitBranch(ContBlock);
2581  CGF.EmitBlock(ContBlock, true);
2582  }
2583 }
2584 
2586  ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2587  OMPFirstScanLoop = true;
2588  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2589  emitOMPSimdRegion(CGF, S, Action);
2590  };
2591  {
2592  auto LPCRegion =
2594  OMPLexicalScope Scope(*this, S, OMPD_unknown);
2595  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2596  }
2597  // Check for outer lastprivate conditional update.
2599 }
2600 
2602  // Emit the de-sugared statement.
2603  OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2604  EmitStmt(S.getTransformedStmt());
2605 }
2606 
2608  bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
2609 
2610  if (UseOMPIRBuilder) {
2611  auto DL = SourceLocToDebugLoc(S.getBeginLoc());
2612  const Stmt *Inner = S.getRawStmt();
2613 
2614  // Consume nested loop. Clear the entire remaining loop stack because a
2615  // fully unrolled loop is non-transformable. For partial unrolling the
2616  // generated outer loop is pushed back to the stack.
2617  llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
2618  OMPLoopNestStack.clear();
2619 
2620  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2621 
2622  bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
2623  llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
2624 
2625  if (S.hasClausesOfKind<OMPFullClause>()) {
2626  assert(ExpectedOMPLoopDepth == 0);
2627  OMPBuilder.unrollLoopFull(DL, CLI);
2628  } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2629  uint64_t Factor = 0;
2630  if (Expr *FactorExpr = PartialClause->getFactor()) {
2631  Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2632  assert(Factor >= 1 && "Only positive factors are valid");
2633  }
2634  OMPBuilder.unrollLoopPartial(DL, CLI, Factor,
2635  NeedsUnrolledCLI ? &UnrolledCLI : nullptr);
2636  } else {
2637  OMPBuilder.unrollLoopHeuristic(DL, CLI);
2638  }
2639 
2640  assert((!NeedsUnrolledCLI || UnrolledCLI) &&
2641  "NeedsUnrolledCLI implies UnrolledCLI to be set");
2642  if (UnrolledCLI)
2643  OMPLoopNestStack.push_back(UnrolledCLI);
2644 
2645  return;
2646  }
2647 
2648  // This function is only called if the unrolled loop is not consumed by any
2649  // other loop-associated construct. Such a loop-associated construct will have
2650  // used the transformed AST.
2651 
2652  // Set the unroll metadata for the next emitted loop.
2653  LoopStack.setUnrollState(LoopAttributes::Enable);
2654 
2655  if (S.hasClausesOfKind<OMPFullClause>()) {
2656  LoopStack.setUnrollState(LoopAttributes::Full);
2657  } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2658  if (Expr *FactorExpr = PartialClause->getFactor()) {
2659  uint64_t Factor =
2660  FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2661  assert(Factor >= 1 && "Only positive factors are valid");
2662  LoopStack.setUnrollCount(Factor);
2663  }
2664  }
2665 
2666  EmitStmt(S.getAssociatedStmt());
2667 }
2668 
2669 void CodeGenFunction::EmitOMPOuterLoop(
2670  bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2672  const CodeGenFunction::OMPLoopArguments &LoopArgs,
2673  const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2674  const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2675  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2676 
2677  const Expr *IVExpr = S.getIterationVariable();
2678  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2679  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2680 
2681  JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
2682 
2683  // Start the loop with a block that tests the condition.
2684  llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2685  EmitBlock(CondBlock);
2686  const SourceRange R = S.getSourceRange();
2687  OMPLoopNestStack.clear();
2688  LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2689  SourceLocToDebugLoc(R.getEnd()));
2690 
2691  llvm::Value *BoolCondVal = nullptr;
2692  if (!DynamicOrOrdered) {
2693  // UB = min(UB, GlobalUB) or
2694  // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2695  // 'distribute parallel for')
2696  EmitIgnoredExpr(LoopArgs.EUB);
2697  // IV = LB
2698  EmitIgnoredExpr(LoopArgs.Init);
2699  // IV < UB
2700  BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
2701  } else {
2702  BoolCondVal =
2703  RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
2704  LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
2705  }
2706 
2707  // If there are any cleanups between here and the loop-exit scope,
2708  // create a block to stage a loop exit along.
2709  llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2710  if (LoopScope.requiresCleanups())
2711  ExitBlock = createBasicBlock("omp.dispatch.cleanup");
2712 
2713  llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
2714  Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
2715  if (ExitBlock != LoopExit.getBlock()) {
2716  EmitBlock(ExitBlock);
2717  EmitBranchThroughCleanup(LoopExit);
2718  }
2719  EmitBlock(LoopBody);
2720 
2721  // Emit "IV = LB" (in case of static schedule, we have already calculated new
2722  // LB for loop condition and emitted it above).
2723  if (DynamicOrOrdered)
2724  EmitIgnoredExpr(LoopArgs.Init);
2725 
2726  // Create a block for the increment.
2727  JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
2728  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2729 
2731  *this, S,
2732  [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2733  // Generate !llvm.loop.parallel metadata for loads and stores for loops
2734  // with dynamic/guided scheduling and without ordered clause.
2735  if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2736  CGF.LoopStack.setParallel(!IsMonotonic);
2737  if (const auto *C = S.getSingleClause<OMPOrderClause>())
2738  if (C->getKind() == OMPC_ORDER_concurrent)
2739  CGF.LoopStack.setParallel(/*Enable=*/true);
2740  } else {
2741  CGF.EmitOMPSimdInit(S);
2742  }
2743  },
2744  [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2745  &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2746  SourceLocation Loc = S.getBeginLoc();
2747  // when 'distribute' is not combined with a 'for':
2748  // while (idx <= UB) { BODY; ++idx; }
2749  // when 'distribute' is combined with a 'for'
2750  // (e.g. 'distribute parallel for')
2751  // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2752  CGF.EmitOMPInnerLoop(
2753  S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2754  [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2755  CodeGenLoop(CGF, S, LoopExit);
2756  },
2757  [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2758  CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2759  });
2760  });
2761 
2762  EmitBlock(Continue.getBlock());
2763  BreakContinueStack.pop_back();
2764  if (!DynamicOrOrdered) {
2765  // Emit "LB = LB + Stride", "UB = UB + Stride".
2766  EmitIgnoredExpr(LoopArgs.NextLB);
2767  EmitIgnoredExpr(LoopArgs.NextUB);
2768  }
2769 
2770  EmitBranch(CondBlock);
2771  OMPLoopNestStack.clear();
2772  LoopStack.pop();
2773  // Emit the fall-through block.
2774  EmitBlock(LoopExit.getBlock());
2775 
2776  // Tell the runtime we are done.
2777  auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
2778  if (!DynamicOrOrdered)
2779  CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2780  S.getDirectiveKind());
2781  };
2782  OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2783 }
2784 
2785 void CodeGenFunction::EmitOMPForOuterLoop(
2786  const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2787  const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2788  const OMPLoopArguments &LoopArgs,
2789  const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2790  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2791 
2792  // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2793  const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind.Schedule);
2794 
2795  assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
2796  LoopArgs.Chunk != nullptr)) &&
2797  "static non-chunked schedule does not need outer loop");
2798 
2799  // Emit outer loop.
2800  //
2801  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2802  // When schedule(dynamic,chunk_size) is specified, the iterations are
2803  // distributed to threads in the team in chunks as the threads request them.
2804  // Each thread executes a chunk of iterations, then requests another chunk,
2805  // until no chunks remain to be distributed. Each chunk contains chunk_size
2806  // iterations, except for the last chunk to be distributed, which may have
2807  // fewer iterations. When no chunk_size is specified, it defaults to 1.
2808  //
2809  // When schedule(guided,chunk_size) is specified, the iterations are assigned
2810  // to threads in the team in chunks as the executing threads request them.
2811  // Each thread executes a chunk of iterations, then requests another chunk,
2812  // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2813  // each chunk is proportional to the number of unassigned iterations divided
2814  // by the number of threads in the team, decreasing to 1. For a chunk_size
2815  // with value k (greater than 1), the size of each chunk is determined in the
2816  // same way, with the restriction that the chunks do not contain fewer than k
2817  // iterations (except for the last chunk to be assigned, which may have fewer
2818  // than k iterations).
2819  //
2820  // When schedule(auto) is specified, the decision regarding scheduling is
2821  // delegated to the compiler and/or runtime system. The programmer gives the
2822  // implementation the freedom to choose any possible mapping of iterations to
2823  // threads in the team.
2824  //
2825  // When schedule(runtime) is specified, the decision regarding scheduling is
2826  // deferred until run time, and the schedule and chunk size are taken from the
2827  // run-sched-var ICV. If the ICV is set to auto, the schedule is
2828  // implementation defined
2829  //
2830  // while(__kmpc_dispatch_next(&LB, &UB)) {
2831  // idx = LB;
2832  // while (idx <= UB) { BODY; ++idx;
2833  // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2834  // } // inner loop
2835  // }
2836  //
2837  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2838  // When schedule(static, chunk_size) is specified, iterations are divided into
2839  // chunks of size chunk_size, and the chunks are assigned to the threads in
2840  // the team in a round-robin fashion in the order of the thread number.
2841  //
2842  // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2843  // while (idx <= UB) { BODY; ++idx; } // inner loop
2844  // LB = LB + ST;
2845  // UB = UB + ST;
2846  // }
2847  //
2848 
2849  const Expr *IVExpr = S.getIterationVariable();
2850  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2851  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2852 
2853  if (DynamicOrOrdered) {
2854  const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
2855  CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
2856  llvm::Value *LBVal = DispatchBounds.first;
2857  llvm::Value *UBVal = DispatchBounds.second;
2858  CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
2859  LoopArgs.Chunk};
2860  RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
2861  IVSigned, Ordered, DipatchRTInputValues);
2862  } else {
2863  CGOpenMPRuntime::StaticRTInput StaticInit(
2864  IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
2865  LoopArgs.ST, LoopArgs.Chunk);
2866  RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
2867  ScheduleKind, StaticInit);
2868  }
2869 
2870  auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
2871  const unsigned IVSize,
2872  const bool IVSigned) {
2873  if (Ordered) {
2874  CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
2875  IVSigned);
2876  }
2877  };
2878 
2879  OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
2880  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
2881  OuterLoopArgs.IncExpr = S.getInc();
2882  OuterLoopArgs.Init = S.getInit();
2883  OuterLoopArgs.Cond = S.getCond();
2884  OuterLoopArgs.NextLB = S.getNextLowerBound();
2885  OuterLoopArgs.NextUB = S.getNextUpperBound();
2886  EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
2887  emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
2888 }
2889 
2891  const unsigned IVSize, const bool IVSigned) {}
2892 
2893 void CodeGenFunction::EmitOMPDistributeOuterLoop(
2894  OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
2895  OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
2896  const CodeGenLoopTy &CodeGenLoopContent) {
2897 
2898  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2899 
2900  // Emit outer loop.
2901  // Same behavior as a OMPForOuterLoop, except that schedule cannot be
2902  // dynamic
2903  //
2904 
2905  const Expr *IVExpr = S.getIterationVariable();
2906  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2907  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2908 
2909  CGOpenMPRuntime::StaticRTInput StaticInit(
2910  IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
2911  LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
2912  RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
2913 
2914  // for combined 'distribute' and 'for' the increment expression of distribute
2915  // is stored in DistInc. For 'distribute' alone, it is in Inc.
2916  Expr *IncExpr;
2917  if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
2918  IncExpr = S.getDistInc();
2919  else
2920  IncExpr = S.getInc();
2921 
2922  // this routine is shared by 'omp distribute parallel for' and
2923  // 'omp distribute': select the right EUB expression depending on the
2924  // directive
2925  OMPLoopArguments OuterLoopArgs;
2926  OuterLoopArgs.LB = LoopArgs.LB;
2927  OuterLoopArgs.UB = LoopArgs.UB;
2928  OuterLoopArgs.ST = LoopArgs.ST;
2929  OuterLoopArgs.IL = LoopArgs.IL;
2930  OuterLoopArgs.Chunk = LoopArgs.Chunk;
2931  OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2932  ? S.getCombinedEnsureUpperBound()
2933  : S.getEnsureUpperBound();
2934  OuterLoopArgs.IncExpr = IncExpr;
2935  OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2936  ? S.getCombinedInit()
2937  : S.getInit();
2938  OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2939  ? S.getCombinedCond()
2940  : S.getCond();
2941  OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2942  ? S.getCombinedNextLowerBound()
2943  : S.getNextLowerBound();
2944  OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2945  ? S.getCombinedNextUpperBound()
2946  : S.getNextUpperBound();
2947 
2948  EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2949  LoopScope, OuterLoopArgs, CodeGenLoopContent,
2951 }
2952 
2953 static std::pair<LValue, LValue>
2955  const OMPExecutableDirective &S) {
2956  const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2957  LValue LB =
2958  EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2959  LValue UB =
2960  EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2961 
2962  // When composing 'distribute' with 'for' (e.g. as in 'distribute
2963  // parallel for') we need to use the 'distribute'
2964  // chunk lower and upper bounds rather than the whole loop iteration
2965  // space. These are parameters to the outlined function for 'parallel'
2966  // and we copy the bounds of the previous schedule into the
2967  // the current ones.
2968  LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2969  LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2970  llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
2971  PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
2972  PrevLBVal = CGF.EmitScalarConversion(
2973  PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2974  LS.getIterationVariable()->getType(),
2976  llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
2977  PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
2978  PrevUBVal = CGF.EmitScalarConversion(
2979  PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2980  LS.getIterationVariable()->getType(),
2982 
2983  CGF.EmitStoreOfScalar(PrevLBVal, LB);
2984  CGF.EmitStoreOfScalar(PrevUBVal, UB);
2985 
2986  return {LB, UB};
2987 }
2988 
2989 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2990 /// we need to use the LB and UB expressions generated by the worksharing
2991 /// code generation support, whereas in non combined situations we would
2992 /// just emit 0 and the LastIteration expression
2993 /// This function is necessary due to the difference of the LB and UB
2994 /// types for the RT emission routines for 'for_static_init' and
2995 /// 'for_dispatch_init'
2996 static std::pair<llvm::Value *, llvm::Value *>
2998  const OMPExecutableDirective &S,
2999  Address LB, Address UB) {
3000  const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
3001  const Expr *IVExpr = LS.getIterationVariable();
3002  // when implementing a dynamic schedule for a 'for' combined with a
3003  // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3004  // is not normalized as each team only executes its own assigned
3005  // distribute chunk
3006  QualType IteratorTy = IVExpr->getType();
3007  llvm::Value *LBVal =
3008  CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3009  llvm::Value *UBVal =
3010  CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3011  return {LBVal, UBVal};
3012 }
3013 
3015  CodeGenFunction &CGF, const OMPExecutableDirective &S,
3016  llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
3017  const auto &Dir = cast<OMPLoopDirective>(S);
3018  LValue LB =
3019  CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
3020  llvm::Value *LBCast =
3021  CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)),
3022  CGF.SizeTy, /*isSigned=*/false);
3023  CapturedVars.push_back(LBCast);
3024  LValue UB =
3025  CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
3026 
3027  llvm::Value *UBCast =
3028  CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)),
3029  CGF.SizeTy, /*isSigned=*/false);
3030  CapturedVars.push_back(UBCast);
3031 }
3032 
3033 static void
3035  const OMPLoopDirective &S,
3037  auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
3038  PrePostActionTy &Action) {
3039  Action.Enter(CGF);
3040  bool HasCancel = false;
3041  if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
3042  if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
3043  HasCancel = D->hasCancel();
3044  else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
3045  HasCancel = D->hasCancel();
3046  else if (const auto *D =
3047  dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
3048  HasCancel = D->hasCancel();
3049  }
3050  CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3051  HasCancel);
3052  CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
3055  };
3056 
3058  CGF, S,
3059  isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
3060  CGInlinedWorksharingLoop,
3062 }
3063 
3066  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3068  S.getDistInc());
3069  };
3070  OMPLexicalScope Scope(*this, S, OMPD_parallel);
3071  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3072 }
3073 
3076  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3078  S.getDistInc());
3079  };
3080  OMPLexicalScope Scope(*this, S, OMPD_parallel);
3081  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3082 }
3083 
3085  const OMPDistributeSimdDirective &S) {
3086  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3088  };
3089  OMPLexicalScope Scope(*this, S, OMPD_unknown);
3090  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3091 }
3092 
3094  CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3095  // Emit SPMD target parallel for region as a standalone region.
3096  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3097  emitOMPSimdRegion(CGF, S, Action);
3098  };
3099  llvm::Function *Fn;
3100  llvm::Constant *Addr;
3101  // Emit target region as a standalone region.
3103  S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3104  assert(Fn && Addr && "Target device function emission failed.");
3105 }
3106 
3108  const OMPTargetSimdDirective &S) {
3109  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3110  emitOMPSimdRegion(CGF, S, Action);
3111  };
3112  emitCommonOMPTargetDirective(*this, S, CodeGen);
3113 }
3114 
3115 namespace {
3116 struct ScheduleKindModifiersTy {
3120  ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3123  : Kind(Kind), M1(M1), M2(M2) {}
3124 };
3125 } // namespace
3126 
3128  const OMPLoopDirective &S, Expr *EUB,
3129  const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3130  const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3131  // Emit the loop iteration variable.
3132  const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3133  const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
3134  EmitVarDecl(*IVDecl);
3135 
3136  // Emit the iterations count variable.
3137  // If it is not a variable, Sema decided to calculate iterations count on each
3138  // iteration (e.g., it is foldable into a constant).
3139  if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3140  EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3141  // Emit calculation of the iterations count.
3142  EmitIgnoredExpr(S.getCalcLastIteration());
3143  }
3144 
3145  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3146 
3147  bool HasLastprivateClause;
3148  // Check pre-condition.
3149  {
3150  OMPLoopScope PreInitScope(*this, S);
3151  // Skip the entire loop if we don't meet the precondition.
3152  // If the condition constant folds and can be elided, avoid emitting the
3153  // whole loop.
3154  bool CondConstant;
3155  llvm::BasicBlock *ContBlock = nullptr;
3156  if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3157  if (!CondConstant)
3158  return false;
3159  } else {
3160  llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
3161  ContBlock = createBasicBlock("omp.precond.end");
3162  emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3163  getProfileCount(&S));
3164  EmitBlock(ThenBlock);
3165  incrementProfileCounter(&S);
3166  }
3167 
3168  RunCleanupsScope DoacrossCleanupScope(*this);
3169  bool Ordered = false;
3170  if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3171  if (OrderedClause->getNumForLoops())
3172  RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
3173  else
3174  Ordered = true;
3175  }
3176 
3177  llvm::DenseSet<const Expr *> EmittedFinals;
3178  emitAlignedClause(*this, S);
3179  bool HasLinears = EmitOMPLinearClauseInit(S);
3180  // Emit helper vars inits.
3181 
3182  std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3183  LValue LB = Bounds.first;
3184  LValue UB = Bounds.second;
3185  LValue ST =
3186  EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3187  LValue IL =
3188  EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3189 
3190  // Emit 'then' code.
3191  {
3192  OMPPrivateScope LoopScope(*this);
3193  if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
3194  // Emit implicit barrier to synchronize threads and avoid data races on
3195  // initialization of firstprivate variables and post-update of
3196  // lastprivate variables.
3197  CGM.getOpenMPRuntime().emitBarrierCall(
3198  *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3199  /*ForceSimpleCall=*/true);
3200  }
3201  EmitOMPPrivateClause(S, LoopScope);
3203  *this, S, EmitLValue(S.getIterationVariable()));
3204  HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3205  EmitOMPReductionClauseInit(S, LoopScope);
3206  EmitOMPPrivateLoopCounters(S, LoopScope);
3207  EmitOMPLinearClause(S, LoopScope);
3208  (void)LoopScope.Privatize();
3209  if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3210  CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
3211 
3212  // Detect the loop schedule kind and chunk.
3213  const Expr *ChunkExpr = nullptr;
3214  OpenMPScheduleTy ScheduleKind;
3215  if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3216  ScheduleKind.Schedule = C->getScheduleKind();
3217  ScheduleKind.M1 = C->getFirstScheduleModifier();
3218  ScheduleKind.M2 = C->getSecondScheduleModifier();
3219  ChunkExpr = C->getChunkSize();
3220  } else {
3221  // Default behaviour for schedule clause.
3222  CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3223  *this, S, ScheduleKind.Schedule, ChunkExpr);
3224  }
3225  bool HasChunkSizeOne = false;
3226  llvm::Value *Chunk = nullptr;
3227  if (ChunkExpr) {
3228  Chunk = EmitScalarExpr(ChunkExpr);
3229  Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
3230  S.getIterationVariable()->getType(),
3231  S.getBeginLoc());
3232  Expr::EvalResult Result;
3233  if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
3234  llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3235  HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3236  }
3237  }
3238  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3239  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3240  // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3241  // If the static schedule kind is specified or if the ordered clause is
3242  // specified, and if no monotonic modifier is specified, the effect will
3243  // be as if the monotonic modifier was specified.
3244  bool StaticChunkedOne =
3245  RT.isStaticChunked(ScheduleKind.Schedule,
3246  /* Chunked */ Chunk != nullptr) &&
3247  HasChunkSizeOne &&
3248  isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
3249  bool IsMonotonic =
3250  Ordered ||
3251  (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
3252  !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3253  ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3254  ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3255  ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3256  if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
3257  /* Chunked */ Chunk != nullptr) ||
3258  StaticChunkedOne) &&
3259  !Ordered) {
3260  JumpDest LoopExit =
3261  getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3263  *this, S,
3264  [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3265  if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3266  CGF.EmitOMPSimdInit(S);
3267  } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3268  if (C->getKind() == OMPC_ORDER_concurrent)
3269  CGF.LoopStack.setParallel(/*Enable=*/true);
3270  }
3271  },
3272  [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3273  &S, ScheduleKind, LoopExit,
3274  &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3275  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3276  // When no chunk_size is specified, the iteration space is divided
3277  // into chunks that are approximately equal in size, and at most
3278  // one chunk is distributed to each thread. Note that the size of
3279  // the chunks is unspecified in this case.
3280  CGOpenMPRuntime::StaticRTInput StaticInit(
3281  IVSize, IVSigned, Ordered, IL.getAddress(CGF),
3282  LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF),
3283  StaticChunkedOne ? Chunk : nullptr);
3284  CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3285  CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
3286  StaticInit);
3287  // UB = min(UB, GlobalUB);
3288  if (!StaticChunkedOne)
3289  CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
3290  // IV = LB;
3291  CGF.EmitIgnoredExpr(S.getInit());
3292  // For unchunked static schedule generate:
3293  //
3294  // while (idx <= UB) {
3295  // BODY;
3296  // ++idx;
3297  // }
3298  //
3299  // For static schedule with chunk one:
3300  //
3301  // while (IV <= PrevUB) {
3302  // BODY;
3303  // IV += ST;
3304  // }
3305  CGF.EmitOMPInnerLoop(
3306  S, LoopScope.requiresCleanups(),
3307  StaticChunkedOne ? S.getCombinedParForInDistCond()
3308  : S.getCond(),
3309  StaticChunkedOne ? S.getDistInc() : S.getInc(),
3310  [&S, LoopExit](CodeGenFunction &CGF) {
3311  emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3312  },
3313  [](CodeGenFunction &) {});
3314  });
3315  EmitBlock(LoopExit.getBlock());
3316  // Tell the runtime we are done.
3317  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3318  CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3319  S.getDirectiveKind());
3320  };
3321  OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
3322  } else {
3323  // Emit the outer loop, which requests its work chunk [LB..UB] from
3324  // runtime and runs the inner loop to process it.
3325  const OMPLoopArguments LoopArguments(
3326  LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
3327  IL.getAddress(*this), Chunk, EUB);
3328  EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3329  LoopArguments, CGDispatchBounds);
3330  }
3331  if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3332  EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3333  return CGF.Builder.CreateIsNotNull(
3334  CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3335  });
3336  }
3337  EmitOMPReductionClauseFinal(
3338  S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
3339  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3340  : /*Parallel only*/ OMPD_parallel);
3341  // Emit post-update of the reduction variables if IsLastIter != 0.
3343  *this, S, [IL, &S](CodeGenFunction &CGF) {
3344  return CGF.Builder.CreateIsNotNull(
3345  CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3346  });
3347  // Emit final copy of the lastprivate variables if IsLastIter != 0.
3348  if (HasLastprivateClause)
3349  EmitOMPLastprivateClauseFinal(
3350  S, isOpenMPSimdDirective(S.getDirectiveKind()),
3351  Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3352  }
3353  EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
3354  return CGF.Builder.CreateIsNotNull(
3355  CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3356  });
3357  DoacrossCleanupScope.ForceCleanup();
3358  // We're now done with the loop, so jump to the continuation block.
3359  if (ContBlock) {
3360  EmitBranch(ContBlock);
3361  EmitBlock(ContBlock, /*IsFinished=*/true);
3362  }
3363  }
3364  return HasLastprivateClause;
3365 }
3366 
3367 /// The following two functions generate expressions for the loop lower
3368 /// and upper bounds in case of static and dynamic (dispatch) schedule
3369 /// of the associated 'for' or 'distribute' loop.
3370 static std::pair<LValue, LValue>
3372  const auto &LS = cast<OMPLoopDirective>(S);
3373  LValue LB =
3374  EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3375  LValue UB =
3376  EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3377  return {LB, UB};
3378 }
3379 
3380 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3381 /// consider the lower and upper bound expressions generated by the
3382 /// worksharing loop support, but we use 0 and the iteration space size as
3383 /// constants
3384 static std::pair<llvm::Value *, llvm::Value *>
3386  Address LB, Address UB) {
3387  const auto &LS = cast<OMPLoopDirective>(S);
3388  const Expr *IVExpr = LS.getIterationVariable();
3389  const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
3390  llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
3391  llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
3392  return {LBVal, UBVal};
3393 }
3394 
3395 /// Emits internal temp array declarations for the directive with inscan
3396 /// reductions.
3397 /// The code is the following:
3398 /// \code
3399 /// size num_iters = <num_iters>;
3400 /// <type> buffer[num_iters];
3401 /// \endcode
3403  CodeGenFunction &CGF, const OMPLoopDirective &S,
3404  llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3405  llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3406  NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3409  SmallVector<const Expr *, 4> ReductionOps;
3410  SmallVector<const Expr *, 4> CopyArrayTemps;
3411  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3412  assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3413  "Only inscan reductions are expected.");
3414  Shareds.append(C->varlist_begin(), C->varlist_end());
3415  Privates.append(C->privates().begin(), C->privates().end());
3416  ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3417  CopyArrayTemps.append(C->copy_array_temps().begin(),
3418  C->copy_array_temps().end());
3419  }
3420  {
3421  // Emit buffers for each reduction variables.
3422  // ReductionCodeGen is required to emit correctly the code for array
3423  // reductions.
3424  ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3425  unsigned Count = 0;
3426  auto *ITA = CopyArrayTemps.begin();
3427  for (const Expr *IRef : Privates) {
3428  const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
3429  // Emit variably modified arrays, used for arrays/array sections
3430  // reductions.
3431  if (PrivateVD->getType()->isVariablyModifiedType()) {
3432  RedCG.emitSharedOrigLValue(CGF, Count);
3433  RedCG.emitAggregateType(CGF, Count);
3434  }
3436  CGF,
3437  cast<OpaqueValueExpr>(
3438  cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3439  ->getSizeExpr()),
3440  RValue::get(OMPScanNumIterations));
3441  // Emit temp buffer.
3442  CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
3443  ++ITA;
3444  ++Count;
3445  }
3446  }
3447 }
3448 
3449 /// Emits the code for the directive with inscan reductions.
3450 /// The code is the following:
3451 /// \code
3452 /// #pragma omp ...
3453 /// for (i: 0..<num_iters>) {
3454 /// <input phase>;
3455 /// buffer[i] = red;
3456 /// }
3457 /// #pragma omp master // in parallel region
3458 /// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3459 /// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3460 /// buffer[i] op= buffer[i-pow(2,k)];
3461 /// #pragma omp barrier // in parallel region
3462 /// #pragma omp ...
3463 /// for (0..<num_iters>) {
3464 /// red = InclusiveScan ? buffer[i] : buffer[i-1];
3465 /// <scan phase>;
3466 /// }
3467 /// \endcode
3469  CodeGenFunction &CGF, const OMPLoopDirective &S,
3470  llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3471  llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3472  llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3473  llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3474  NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3476  SmallVector<const Expr *, 4> ReductionOps;
3479  SmallVector<const Expr *, 4> CopyArrayElems;
3480  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3481  assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3482  "Only inscan reductions are expected.");
3483  Privates.append(C->privates().begin(), C->privates().end());
3484  ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3485  LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3486  RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3487  CopyArrayElems.append(C->copy_array_elems().begin(),
3488  C->copy_array_elems().end());
3489  }
3491  {
3492  // Emit loop with input phase:
3493  // #pragma omp ...
3494  // for (i: 0..<num_iters>) {
3495  // <input phase>;
3496  // buffer[i] = red;
3497  // }
3498  CGF.OMPFirstScanLoop = true;
3500  FirstGen(CGF);
3501  }
3502  // #pragma omp barrier // in parallel region
3503  auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
3504  &ReductionOps,
3505  &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
3506  Action.Enter(CGF);
3507  // Emit prefix reduction:
3508  // #pragma omp master // in parallel region
3509  // for (int k = 0; k <= ceil(log2(n)); ++k)
3510  llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3511  llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
3512  llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
3513  llvm::Function *F =
3514  CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3515  llvm::Value *Arg =
3516  CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
3517  llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
3518  F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
3519  LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
3520  LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
3521  llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3522  OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
3523  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
3524  CGF.EmitBlock(LoopBB);
3525  auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
3526  // size pow2k = 1;
3527  auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3528  Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
3529  Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
3530  // for (size i = n - 1; i >= 2 ^ k; --i)
3531  // tmp[i] op= tmp[i-pow2k];
3532  llvm::BasicBlock *InnerLoopBB =
3533  CGF.createBasicBlock("omp.inner.log.scan.body");
3534  llvm::BasicBlock *InnerExitBB =
3535  CGF.createBasicBlock("omp.inner.log.scan.exit");
3536  llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
3537  CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3538  CGF.EmitBlock(InnerLoopBB);
3539  auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3540  IVal->addIncoming(NMin1, LoopBB);
3541  {
3542  CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3543  auto *ILHS = LHSs.begin();
3544  auto *IRHS = RHSs.begin();
3545  for (const Expr *CopyArrayElem : CopyArrayElems) {
3546  const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3547  const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3548  Address LHSAddr = Address::invalid();
3549  {
3551  CGF,
3552  cast<OpaqueValueExpr>(
3553  cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3554  RValue::get(IVal));
3555  LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3556  }
3557  PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
3558  Address RHSAddr = Address::invalid();
3559  {
3560  llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
3562  CGF,
3563  cast<OpaqueValueExpr>(
3564  cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3565  RValue::get(OffsetIVal));
3566  RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3567  }
3568  PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
3569  ++ILHS;
3570  ++IRHS;
3571  }
3572  PrivScope.Privatize();
3573  CGF.CGM.getOpenMPRuntime().emitReduction(
3574  CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3575  {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3576  }
3577  llvm::Value *NextIVal =
3578  CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
3579  IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
3580  CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
3581  CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3582  CGF.EmitBlock(InnerExitBB);
3583  llvm::Value *Next =
3584  CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
3585  Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
3586  // pow2k <<= 1;
3587  llvm::Value *NextPow2K =
3588  CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
3589  Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
3590  llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
3591  CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
3592  auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
3593  CGF.EmitBlock(ExitBB);
3594  };
3595  if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3596  CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3597  CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3598  CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3599  /*ForceSimpleCall=*/true);
3600  } else {
3601  RegionCodeGenTy RCG(CodeGen);
3602  RCG(CGF);
3603  }
3604 
3605  CGF.OMPFirstScanLoop = false;
3606  SecondGen(CGF);
3607 }
3608 
3610  const OMPLoopDirective &S,
3611  bool HasCancel) {
3612  bool HasLastprivates;
3613  if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3614  [](const OMPReductionClause *C) {
3615  return C->getModifier() == OMPC_REDUCTION_inscan;
3616  })) {
3617  const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3619  OMPLoopScope LoopScope(CGF, S);
3620  return CGF.EmitScalarExpr(S.getNumIterations());
3621  };
3622  const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3624  CGF, S.getDirectiveKind(), HasCancel);
3625  (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3628  // Emit an implicit barrier at the end.
3629  CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3630  OMPD_for);
3631  };
3632  const auto &&SecondGen = [&S, HasCancel,
3633  &HasLastprivates](CodeGenFunction &CGF) {
3635  CGF, S.getDirectiveKind(), HasCancel);
3636  HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3639  };
3640  if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3641  emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
3642  emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3643  } else {
3644  CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3645  HasCancel);
3646  HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3649  }
3650  return HasLastprivates;
3651 }
3652 
3654  if (S.hasCancel())
3655  return false;
3656  for (OMPClause *C : S.clauses())
3657  if (!isa<OMPNowaitClause>(C))
3658  return false;
3659 
3660  return true;
3661 }
3662 
3664  bool HasLastprivates = false;
3665  bool UseOMPIRBuilder =
3666  CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
3667  auto &&CodeGen = [this, &S, &HasLastprivates,
3668  UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
3669  // Use the OpenMPIRBuilder if enabled.
3670  if (UseOMPIRBuilder) {
3671  // Emit the associated statement and get its loop representation.
3672  const Stmt *Inner = S.getRawStmt();
3673  llvm::CanonicalLoopInfo *CLI =
3674  EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
3675 
3676  bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
3677  llvm::OpenMPIRBuilder &OMPBuilder =
3678  CGM.getOpenMPRuntime().getOMPBuilder();
3679  llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3680  AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3681  OMPBuilder.applyWorkshareLoop(Builder.getCurrentDebugLocation(), CLI,
3682  AllocaIP, NeedsBarrier);
3683  return;
3684  }
3685 
3686  HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
3687  };
3688  {
3689  auto LPCRegion =
3691  OMPLexicalScope Scope(*this, S, OMPD_unknown);
3692  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
3693  S.hasCancel());
3694  }
3695 
3696  if (!UseOMPIRBuilder) {
3697  // Emit an implicit barrier at the end.
3698  if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3699  CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3700  }
3701  // Check for outer lastprivate conditional update.
3703 }
3704 
3706  bool HasLastprivates = false;
3707  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3708  PrePostActionTy &) {
3709  HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3710  };
3711  {
3712  auto LPCRegion =
3714  OMPLexicalScope Scope(*this, S, OMPD_unknown);
3715  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3716  }
3717 
3718  // Emit an implicit barrier at the end.
3719  if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3720  CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3721  // Check for outer lastprivate conditional update.
3723 }
3724 
3726  const Twine &Name,
3727  llvm::Value *Init = nullptr) {
3728  LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
3729  if (Init)
3730  CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
3731  return LVal;
3732 }
3733 
3734 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
3735  const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3736  const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3737  bool HasLastprivates = false;
3738  auto &&CodeGen = [&S, CapturedStmt, CS,
3739  &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
3740  const ASTContext &C = CGF.getContext();
3741  QualType KmpInt32Ty =
3742  C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3743  // Emit helper vars inits.
3744  LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
3745  CGF.Builder.getInt32(0));
3746  llvm::ConstantInt *GlobalUBVal = CS != nullptr
3747  ? CGF.Builder.getInt32(CS->size() - 1)
3748  : CGF.Builder.getInt32(0);
3749  LValue UB =
3750  createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
3751  LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
3752  CGF.Builder.getInt32(1));
3753  LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
3754  CGF.Builder.getInt32(0));
3755  // Loop counter.
3756  LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
3757  OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3758  CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
3759  OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3760  CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
3761  // Generate condition for loop.
3763  C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary,
3764  S.getBeginLoc(), FPOptionsOverride());
3765  // Increment for loop counter.
3767  C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary,
3768  S.getBeginLoc(), true, FPOptionsOverride());
3769  auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
3770  // Iterate through all sections and emit a switch construct:
3771  // switch (IV) {
3772  // case 0:
3773  // <SectionStmt[0]>;
3774  // break;
3775  // ...
3776  // case <NumSection> - 1:
3777  // <SectionStmt[<NumSection> - 1]>;
3778  // break;
3779  // }
3780  // .omp.sections.exit:
3781  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
3782  llvm::SwitchInst *SwitchStmt =
3783  CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
3784  ExitBB, CS == nullptr ? 1 : CS->size());
3785  if (CS) {
3786  unsigned CaseNumber = 0;
3787  for (const Stmt *SubStmt : CS->children()) {
3788  auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
3789  CGF.EmitBlock(CaseBB);
3790  SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
3791  CGF.EmitStmt(SubStmt);
3792  CGF.EmitBranch(ExitBB);
3793  ++CaseNumber;
3794  }
3795  } else {
3796  llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
3797  CGF.EmitBlock(CaseBB);
3798  SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
3799  CGF.EmitStmt(CapturedStmt);
3800  CGF.EmitBranch(ExitBB);
3801  }
3802  CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
3803  };
3804 
3805  CodeGenFunction::OMPPrivateScope LoopScope(CGF);
3806  if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
3807  // Emit implicit barrier to synchronize threads and avoid data races on
3808  // initialization of firstprivate variables and post-update of lastprivate
3809  // variables.
3810  CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3811  CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3812  /*ForceSimpleCall=*/true);
3813  }
3814  CGF.EmitOMPPrivateClause(S, LoopScope);
3815  CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
3816  HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
3817  CGF.EmitOMPReductionClauseInit(S, LoopScope);
3818  (void)LoopScope.Privatize();
3819  if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3820  CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
3821 
3822  // Emit static non-chunked loop.
3823  OpenMPScheduleTy ScheduleKind;
3824  ScheduleKind.Schedule = OMPC_SCHEDULE_static;
3825  CGOpenMPRuntime::StaticRTInput StaticInit(
3826  /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF),
3827  LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF));
3828  CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3829  CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
3830  // UB = min(UB, GlobalUB);
3831  llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
3832  llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
3833  CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
3834  CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
3835  // IV = LB;
3836  CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
3837  // while (idx <= UB) { BODY; ++idx; }
3838  CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
3839  [](CodeGenFunction &) {});
3840  // Tell the runtime we are done.
3841  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3842  CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3843  S.getDirectiveKind());
3844  };
3845  CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
3846  CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
3847  // Emit post-update of the reduction variables if IsLastIter != 0.
3848  emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
3849  return CGF.Builder.CreateIsNotNull(
3850  CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3851  });
3852 
3853  // Emit final copy of the lastprivate variables if IsLastIter != 0.
3854  if (HasLastprivates)
3856  S, /*NoFinals=*/false,
3857  CGF.Builder.CreateIsNotNull(
3858  CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
3859  };
3860 
3861  bool HasCancel = false;
3862  if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
3863  HasCancel = OSD->hasCancel();
3864  else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
3865  HasCancel = OPSD->hasCancel();
3866  OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
3867  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
3868  HasCancel);
3869  // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
3870  // clause. Otherwise the barrier will be generated by the codegen for the
3871  // directive.
3872  if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
3873  // Emit implicit barrier to synchronize threads and avoid data races on
3874  // initialization of firstprivate variables.
3875  CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3876  OMPD_unknown);
3877  }
3878 }
3879 
3881  if (CGM.getLangOpts().OpenMPIRBuilder) {
3882  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3883  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3884  using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
3885 
3886  auto FiniCB = [this](InsertPointTy IP) {
3887  OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3888  };
3889 
3890  const CapturedStmt *ICS = S.getInnermostCapturedStmt();
3891  const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3892  const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3894  if (CS) {
3895  for (const Stmt *SubStmt : CS->children()) {
3896  auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
3897  InsertPointTy CodeGenIP,
3898  llvm::BasicBlock &FiniBB) {
3899  OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP,
3900  FiniBB);
3901  OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SubStmt, CodeGenIP,
3902  FiniBB);
3903  };
3904  SectionCBVector.push_back(SectionCB);
3905  }
3906  } else {
3907  auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
3908  InsertPointTy CodeGenIP,
3909  llvm::BasicBlock &FiniBB) {
3910  OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3911  OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CapturedStmt, CodeGenIP,
3912  FiniBB);
3913  };
3914  SectionCBVector.push_back(SectionCB);
3915  }
3916 
3917  // Privatization callback that performs appropriate action for
3918  // shared/private/firstprivate/lastprivate/copyin/... variables.
3919  //
3920  // TODO: This defaults to shared right now.
3921  auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
3922  llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
3923  // The next line is appropriate only for variables (Val) with the
3924  // data-sharing attribute "shared".
3925  ReplVal = &Val;
3926 
3927  return CodeGenIP;
3928  };
3929 
3930  CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
3931  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
3932  llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3933  AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3934  Builder.restoreIP(OMPBuilder.createSections(
3935  Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
3936  S.getSingleClause<OMPNowaitClause>()));
3937  return;
3938  }
3939  {
3940  auto LPCRegion =
3942  OMPLexicalScope Scope(*this, S, OMPD_unknown);
3943  EmitSections(S);
3944  }
3945  // Emit an implicit barrier at the end.
3946  if (!S.getSingleClause<OMPNowaitClause>()) {
3947  CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3948  OMPD_sections);
3949  }
3950  // Check for outer lastprivate conditional update.
3952 }
3953 
3955  if (CGM.getLangOpts().OpenMPIRBuilder) {
3956  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3957  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3958 
3959  const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
3960  auto FiniCB = [this](InsertPointTy IP) {
3961  OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3962  };
3963 
3964  auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
3965  InsertPointTy CodeGenIP,
3966  llvm::BasicBlock &FiniBB) {
3967  OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3968  OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SectionRegionBodyStmt,
3969  CodeGenIP, FiniBB);
3970  };
3971 
3972  LexicalScope Scope(*this, S.getSourceRange());
3973  EmitStopPoint(&S);
3974  Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
3975 
3976  return;
3977  }
3978  LexicalScope Scope(*this, S.getSourceRange());
3979  EmitStopPoint(&S);
3980  EmitStmt(S.getAssociatedStmt());
3981 }
3982 
3984  llvm::SmallVector<const Expr *, 8> CopyprivateVars;
3987  llvm::SmallVector<const Expr *, 8> AssignmentOps;
3988  // Check if there are any 'copyprivate' clauses associated with this
3989  // 'single' construct.
3990  // Build a list of copyprivate variables along with helper expressions
3991  // (<source>, <destination>, <destination>=<source> expressions)
3992  for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
3993  CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
3994  DestExprs.append(C->destination_exprs().begin(),
3995  C->destination_exprs().end());
3996  SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
3997  AssignmentOps.append(C->assignment_ops().begin(),
3998  C->assignment_ops().end());
3999  }
4000  // Emit code for 'single' region along with 'copyprivate' clauses
4001  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4002  Action.Enter(CGF);
4003  OMPPrivateScope SingleScope(CGF);
4004  (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
4005  CGF.EmitOMPPrivateClause(S, SingleScope);
4006  (void)SingleScope.Privatize();
4007  CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4008  };
4009  {
4010  auto LPCRegion =
4012  OMPLexicalScope Scope(*this, S, OMPD_unknown);
4013  CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
4014  CopyprivateVars, DestExprs,
4015  SrcExprs, AssignmentOps);
4016  }
4017  // Emit an implicit barrier at the end (to avoid data race on firstprivate
4018  // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4019  if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
4020  CGM.getOpenMPRuntime().emitBarrierCall(
4021  *this, S.getBeginLoc(),
4022  S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
4023  }
4024  // Check for outer lastprivate conditional update.
4026 }
4027 
4029  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4030  Action.Enter(CGF);
4031  CGF.EmitStmt(S.getRawStmt());
4032  };
4033  CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
4034 }
4035 
4037  if (CGM.getLangOpts().OpenMPIRBuilder) {
4038  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4039  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4040 
4041  const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
4042 
4043  auto FiniCB = [this](InsertPointTy IP) {
4044  OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4045  };
4046 
4047  auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
4048  InsertPointTy CodeGenIP,
4049  llvm::BasicBlock &FiniBB) {
4050  OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
4051  OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt,
4052  CodeGenIP, FiniBB);
4053  };
4054 
4055  LexicalScope Scope(*this, S.getSourceRange());
4056  EmitStopPoint(&S);
4057  Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
4058 
4059  return;
4060  }
4061  LexicalScope Scope(*this, S.getSourceRange());
4062  EmitStopPoint(&S);
4063  emitMaster(*this, S);
4064 }
4065 
4067  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4068  Action.Enter(CGF);
4069  CGF.EmitStmt(S.getRawStmt());
4070  };
4071  Expr *Filter = nullptr;
4072  if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4073  Filter = FilterClause->getThreadID();
4074  CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(),
4075  Filter);
4076 }
4077 
4079  if (CGM.getLangOpts().OpenMPIRBuilder) {
4080  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4081  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4082 
4083  const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4084  const Expr *Filter = nullptr;
4085  if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4086  Filter = FilterClause->getThreadID();
4087  llvm::Value *FilterVal = Filter
4088  ? EmitScalarExpr(Filter, CGM.Int32Ty)
4089  : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
4090 
4091  auto FiniCB = [this](InsertPointTy IP) {
4092  OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4093  };
4094 
4095  auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4096  InsertPointTy CodeGenIP,
4097  llvm::BasicBlock &FiniBB) {
4098  OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
4099  OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MaskedRegionBodyStmt,
4100  CodeGenIP, FiniBB);
4101  };
4102 
4103  LexicalScope Scope(*this, S.getSourceRange());
4104  EmitStopPoint(&S);
4105  Builder.restoreIP(
4106  OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
4107 
4108  return;
4109  }
4110  LexicalScope Scope(*this, S.getSourceRange());
4111  EmitStopPoint(&S);
4112  emitMasked(*this, S);
4113 }
4114 
4116  if (CGM.getLangOpts().OpenMPIRBuilder) {
4117  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4118  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4119 
4120  const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4121  const Expr *Hint = nullptr;
4122  if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4123  Hint = HintClause->getHint();
4124 
4125  // TODO: This is slightly different from what's currently being done in
4126  // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4127  // about typing is final.
4128  llvm::Value *HintInst = nullptr;
4129  if (Hint)
4130  HintInst =
4131  Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
4132 
4133  auto FiniCB = [this](InsertPointTy IP) {
4134  OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4135  };
4136 
4137  auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
4138  InsertPointTy CodeGenIP,
4139  llvm::BasicBlock &FiniBB) {
4140  OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
4141  OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt,
4142  CodeGenIP, FiniBB);
4143  };
4144 
4145  LexicalScope Scope(*this, S.getSourceRange());
4146  EmitStopPoint(&S);
4147  Builder.restoreIP(OMPBuilder.createCritical(
4148  Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
4149  HintInst));
4150 
4151  return;
4152  }
4153 
4154  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4155  Action.Enter(CGF);
4156  CGF.EmitStmt(S.getAssociatedStmt());
4157  };
4158  const Expr *Hint = nullptr;
4159  if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4160  Hint = HintClause->getHint();
4161  LexicalScope Scope(*this, S.getSourceRange());
4162  EmitStopPoint(&S);
4163  CGM.getOpenMPRuntime().emitCriticalRegion(*this,
4164  S.getDirectiveName().getAsString(),
4165  CodeGen, S.getBeginLoc(), Hint);
4166 }
4167 
4169  const OMPParallelForDirective &S) {
4170  // Emit directive as a combined directive that consists of two implicit
4171  // directives: 'parallel' with 'for' directive.
4172  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4173  Action.Enter(CGF);
4174  (void)emitWorksharingDirective(CGF, S, S.hasCancel());
4175  };
4176  {
4177  if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4178  [](const OMPReductionClause *C) {
4179  return C->getModifier() == OMPC_REDUCTION_inscan;
4180  })) {
4181  const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4184  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4185  OMPLoopScope LoopScope(CGF, S);
4186  return CGF.EmitScalarExpr(S.getNumIterations());
4187  };
4188  emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4189  }
4190  auto LPCRegion =
4192  emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
4194  }
4195  // Check for outer lastprivate conditional update.
4197 }
4198 
4200  const OMPParallelForSimdDirective &S) {
4201  // Emit directive as a combined directive that consists of two implicit
4202  // directives: 'parallel' with 'for' directive.
4203  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4204  Action.Enter(CGF);
4205  (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4206  };
4207  {
4208  if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4209  [](const OMPReductionClause *C) {
4210  return C->getModifier() == OMPC_REDUCTION_inscan;
4211  })) {
4212  const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4215  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4216  OMPLoopScope LoopScope(CGF, S);
4217  return CGF.EmitScalarExpr(S.getNumIterations());
4218  };
4219  emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4220  }
4221  auto LPCRegion =
4223  emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
4225  }
4226  // Check for outer lastprivate conditional update.
4228 }
4229 
4231  const OMPParallelMasterDirective &S) {
4232  // Emit directive as a combined directive that consists of two implicit
4233  // directives: 'parallel' with 'master' directive.
4234  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4235  Action.Enter(CGF);
4236  OMPPrivateScope PrivateScope(CGF);
4237  bool Copyins = CGF.EmitOMPCopyinClause(S);
4238  (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4239  if (Copyins) {
4240  // Emit implicit barrier to synchronize threads and avoid data races on
4241  // propagation master's thread values of threadprivate variables to local
4242  // instances of that variables of all other implicit threads.
4244  CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4245  /*ForceSimpleCall=*/true);
4246  }
4247  CGF.EmitOMPPrivateClause(S, PrivateScope);
4248  CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4249  (void)PrivateScope.Privatize();
4250  emitMaster(CGF, S);
4251  CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4252  };
4253  {
4254  auto LPCRegion =
4256  emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
4259  [](CodeGenFunction &) { return nullptr; });
4260  }
4261  // Check for outer lastprivate conditional update.
4263 }
4264 
4266  const OMPParallelSectionsDirective &S) {
4267  // Emit directive as a combined directive that consists of two implicit
4268  // directives: 'parallel' with 'sections' directive.
4269  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4270  Action.Enter(CGF);
4271  CGF.EmitSections(S);
4272  };
4273  {
4274  auto LPCRegion =
4276  emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
4278  }
4279  // Check for outer lastprivate conditional update.
4281 }
4282 
4283 namespace {
4284 /// Get the list of variables declared in the context of the untied tasks.
4285 class CheckVarsEscapingUntiedTaskDeclContext final
4286  : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
4288 
4289 public:
4290  explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4291  virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
4292  void VisitDeclStmt(const DeclStmt *S) {
4293  if (!S)
4294  return;
4295  // Need to privatize only local vars, static locals can be processed as is.
4296  for (const Decl *D : S->decls()) {
4297  if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
4298  if (VD->hasLocalStorage())
4299  PrivateDecls.push_back(VD);
4300  }
4301  }
4302  void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; }
4303  void VisitCapturedStmt(const CapturedStmt *) { return; }
4304  void VisitLambdaExpr(const LambdaExpr *) { return; }
4305  void VisitBlockExpr(const BlockExpr *) { return; }
4306  void VisitStmt(const Stmt *S) {
4307  if (!S)
4308  return;
4309  for (const Stmt *Child : S->children())
4310  if (Child)
4311  Visit(Child);
4312  }
4313 
4314  /// Swaps list of vars with the provided one.
4315  ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4316 };
4317 } // anonymous namespace
4318 
4320  const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
4321  const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
4322  OMPTaskDataTy &Data) {
4323  // Emit outlined function for task construct.
4324  const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
4325  auto I = CS->getCapturedDecl()->param_begin();
4326  auto PartId = std::next(I);
4327  auto TaskT = std::next(I, 4);
4328  // Check if the task is final
4329  if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
4330  // If the condition constant folds and can be elided, try to avoid emitting
4331  // the condition and the dead arm of the if/else.
4332  const Expr *Cond = Clause->getCondition();
4333  bool CondConstant;
4334  if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
4335  Data.Final.setInt(CondConstant);
4336  else
4337  Data.Final.setPointer(EvaluateExprAsBool(Cond));
4338  } else {
4339  // By default the task is not final.
4340  Data.Final.setInt(/*IntVal=*/false);
4341  }
4342  // Check if the task has 'priority' clause.
4343  if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
4344  const Expr *Prio = Clause->getPriority();
4345  Data.Priority.setInt(/*IntVal=*/true);
4346  Data.Priority.setPointer(EmitScalarConversion(
4347  EmitScalarExpr(Prio), Prio->getType(),
4348  getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4349  Prio->getExprLoc()));
4350  }
4351  // The first function argument for tasks is a thread id, the second one is a
4352  // part id (0 for tied tasks, >=0 for untied task).
4353  llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
4354  // Get list of private variables.
4355  for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
4356  auto IRef = C->varlist_begin();
4357  for (const Expr *IInit : C->private_copies()) {
4358  const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4359  if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4360  Data.PrivateVars.push_back(*IRef);
4361  Data.PrivateCopies.push_back(IInit);
4362  }
4363  ++IRef;
4364  }
4365  }
4366  EmittedAsPrivate.clear();
4367  // Get list of firstprivate variables.
4368  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4369  auto IRef = C->varlist_begin();
4370  auto IElemInitRef = C->inits().begin();
4371  for (const Expr *IInit : C->private_copies()) {
4372  const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4373  if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4374  Data.FirstprivateVars.push_back(*IRef);
4375  Data.FirstprivateCopies.push_back(IInit);
4376  Data.FirstprivateInits.push_back(*IElemInitRef);
4377  }
4378  ++IRef;
4379  ++IElemInitRef;
4380  }
4381  }
4382  // Get list of lastprivate variables (for taskloops).
4383  llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
4384  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
4385  auto IRef = C->varlist_begin();
4386  auto ID = C->destination_exprs().begin();
4387  for (const Expr *IInit : C->private_copies()) {
4388  const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4389  if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4390  Data.LastprivateVars.push_back(*IRef);
4391  Data.LastprivateCopies.push_back(IInit);
4392  }
4393  LastprivateDstsOrigs.insert(
4394  std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
4395  cast<DeclRefExpr>(*IRef)));
4396  ++IRef;
4397  ++ID;
4398  }
4399  }
4402  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4403  Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4404  Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4405  Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4406  Data.ReductionOps.append(C->reduction_ops().begin(),
4407  C->reduction_ops().end());
4408  LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4409  RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4410  }
4411  Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
4412  *this, S.getBeginLoc(), LHSs, RHSs, Data);
4413  // Build list of dependences.
4414  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4416  Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4417  DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4418  }
4419  // Get list of local vars for untied tasks.
4420  if (!Data.Tied) {
4421  CheckVarsEscapingUntiedTaskDeclContext Checker;
4422  Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
4423  Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
4424  Checker.getPrivateDecls().end());
4425  }
4426  auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
4427  CapturedRegion](CodeGenFunction &CGF,
4428  PrePostActionTy &Action) {
4429  llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
4430  std::pair<Address, Address>>
4431  UntiedLocalVars;
4432  // Set proper addresses for generated private copies.
4433  OMPPrivateScope Scope(CGF);
4435  if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
4436  !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
4437  enum { PrivatesParam = 2, CopyFnParam = 3 };
4438  llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4439  CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4440  llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4441  CS->getCapturedDecl()->getParam(PrivatesParam)));
4442  // Map privates.
4446  CallArgs.push_back(PrivatesPtr);
4447  ParamTypes.push_back(PrivatesPtr->getType());
4448  for (const Expr *E : Data.PrivateVars) {
4449  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4450  Address PrivatePtr = CGF.CreateMemTemp(
4451  CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
4452  PrivatePtrs.emplace_back(VD, PrivatePtr);
4453  CallArgs.push_back(PrivatePtr.getPointer());
4454  ParamTypes.push_back(PrivatePtr.getType());
4455  }
4456  for (const Expr *E : Data.FirstprivateVars) {
4457  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4458  Address PrivatePtr =
4459  CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4460  ".firstpriv.ptr.addr");
4461  PrivatePtrs.emplace_back(VD, PrivatePtr);
4462  FirstprivatePtrs.emplace_back(VD, PrivatePtr);
4463  CallArgs.push_back(PrivatePtr.getPointer());
4464  ParamTypes.push_back(PrivatePtr.getType());
4465  }
4466  for (const Expr *E : Data.LastprivateVars) {
4467  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4468  Address PrivatePtr =
4469  CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4470  ".lastpriv.ptr.addr");
4471  PrivatePtrs.emplace_back(VD, PrivatePtr);
4472  CallArgs.push_back(PrivatePtr.getPointer());
4473  ParamTypes.push_back(PrivatePtr.getType());
4474  }
4475  for (const VarDecl *VD : Data.PrivateLocals) {
4476  QualType Ty = VD->getType().getNonReferenceType();
4477  if (VD->getType()->isLValueReferenceType())
4478  Ty = CGF.getContext().getPointerType(Ty);
4479  if (isAllocatableDecl(VD))
4480  Ty = CGF.getContext().getPointerType(Ty);
4481  Address PrivatePtr = CGF.CreateMemTemp(
4482  CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
4483  auto Result = UntiedLocalVars.insert(
4484  std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid())));
4485  // If key exists update in place.
4486  if (Result.second == false)
4487  *Result.first = std::make_pair(
4488  VD, std::make_pair(PrivatePtr, Address::invalid()));
4489  CallArgs.push_back(PrivatePtr.getPointer());
4490  ParamTypes.push_back(PrivatePtr.getType());
4491  }
4492  auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4493  ParamTypes, /*isVarArg=*/false);
4494  CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4495  CopyFn, CopyFnTy->getPointerTo());
4496  CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4497  CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4498  for (const auto &Pair : LastprivateDstsOrigs) {
4499  const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
4500  DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
4501  /*RefersToEnclosingVariableOrCapture=*/
4502  CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
4503  Pair.second->getType(), VK_LValue,
4504  Pair.second->getExprLoc());
4505  Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
4506  return CGF.EmitLValue(&DRE).getAddress(CGF);
4507  });
4508  }
4509  for (const auto &Pair : PrivatePtrs) {
4510  Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4511  CGF.getContext().getDeclAlign(Pair.first));
4512  Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4513  }
4514  // Adjust mapping for internal locals by mapping actual memory instead of
4515  // a pointer to this memory.
4516  for (auto &Pair : UntiedLocalVars) {
4517  if (isAllocatableDecl(Pair.first)) {
4518  llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4519  Address Replacement(Ptr, CGF.getPointerAlign());
4520  Pair.second.first = Replacement;
4521  Ptr = CGF.Builder.CreateLoad(Replacement);
4522  Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4523  Pair.second.second = Replacement;
4524  } else {
4525  llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4526  Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4527  Pair.second.first = Replacement;
4528  }
4529  }
4530  }
4531  if (Data.Reductions) {
4532  OMPPrivateScope FirstprivateScope(CGF);
4533  for (const auto &Pair : FirstprivatePtrs) {
4534  Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4535  CGF.getContext().getDeclAlign(Pair.first));
4536  FirstprivateScope.addPrivate(Pair.first,
4537  [Replacement]() { return Replacement; });
4538  }
4539  (void)FirstprivateScope.Privatize();
4540  OMPLexicalScope LexScope(CGF, S, CapturedRegion);
4541  ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
4542  Data.ReductionCopies, Data.ReductionOps);
4543  llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
4544  CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
4545  for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
4546  RedCG.emitSharedOrigLValue(CGF, Cnt);
4547  RedCG.emitAggregateType(CGF, Cnt);
4548  // FIXME: This must removed once the runtime library is fixed.
4549  // Emit required threadprivate variables for
4550  // initializer/combiner/finalizer.
4551  CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4552  RedCG, Cnt);
4553  Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4554  CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4555  Replacement =
4556  Address(CGF.EmitScalarConversion(
4557  Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4558  CGF.getContext().getPointerType(
4559  Data.ReductionCopies[Cnt]->getType()),
4560  Data.ReductionCopies[Cnt]->getExprLoc()),
4561  Replacement.getAlignment());
4562  Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4563  Scope.addPrivate(RedCG.getBaseDecl(Cnt),
4564  [Replacement]() { return Replacement; });
4565  }
4566  }
4567  // Privatize all private variables except for in_reduction items.
4568  (void)Scope.Privatize();
4569  SmallVector<const Expr *, 4> InRedVars;
4570  SmallVector<const Expr *, 4> InRedPrivs;
4572  SmallVector<const Expr *, 4> TaskgroupDescriptors;
4573  for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4574  auto IPriv = C->privates().begin();
4575  auto IRed = C->reduction_ops().begin();
4576  auto ITD = C->taskgroup_descriptors().begin();
4577  for (const Expr *Ref : C->varlists()) {
4578  InRedVars.emplace_back(Ref);
4579  InRedPrivs.emplace_back(*IPriv);
4580  InRedOps.emplace_back(*IRed);
4581  TaskgroupDescriptors.emplace_back(*ITD);
4582  std::advance(IPriv, 1);
4583  std::advance(IRed, 1);
4584  std::advance(ITD, 1);
4585  }
4586  }
4587  // Privatize in_reduction items here, because taskgroup descriptors must be
4588  // privatized earlier.
4589  OMPPrivateScope InRedScope(CGF);
4590  if (!InRedVars.empty()) {
4591  ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4592  for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
4593  RedCG.emitSharedOrigLValue(CGF, Cnt);
4594  RedCG.emitAggregateType(CGF, Cnt);
4595  // The taskgroup descriptor variable is always implicit firstprivate and
4596  // privatized already during processing of the firstprivates.
4597  // FIXME: This must removed once the runtime library is fixed.
4598  // Emit required threadprivate variables for
4599  // initializer/combiner/finalizer.
4600  CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4601  RedCG, Cnt);
4602  llvm::Value *ReductionsPtr;
4603  if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
4604  ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
4605  TRExpr->getExprLoc());
4606  } else {
4607  ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4608  }
4609  Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4610  CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4611  Replacement = Address(
4612  CGF.EmitScalarConversion(
4613  Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4614  CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
4615  InRedPrivs[Cnt]->getExprLoc()),
4616  Replacement.getAlignment());
4617  Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4618  InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
4619  [Replacement]() { return Replacement; });
4620  }
4621  }
4622  (void)InRedScope.Privatize();
4623 
4624  CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
4625  UntiedLocalVars);
4626  Action.Enter(CGF);
4627  BodyGen(CGF);
4628  };
4629  llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4630  S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
4631  Data.NumberOfParts);
4632  OMPLexicalScope Scope(*this, S, llvm::None,
4633  !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4634  !isOpenMPSimdDirective(S.getDirectiveKind()));
4635  TaskGen(*this, OutlinedFn, Data);
4636 }
4637 
4638 static ImplicitParamDecl *
4640  QualType Ty, CapturedDecl *CD,
4641  SourceLocation Loc) {
4642  auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4644  auto *OrigRef = DeclRefExpr::Create(
4645  C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
4646  /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4647  auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4649  auto *PrivateRef = DeclRefExpr::Create(
4650  C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
4651  /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4652  QualType ElemType = C.getBaseElementType(Ty);
4653  auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
4655  auto *InitRef = DeclRefExpr::Create(
4656  C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
4657  /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
4658  PrivateVD->setInitStyle(VarDecl::CInit);
4659  PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
4660  InitRef, /*BasePath=*/nullptr,
4662  Data.FirstprivateVars.emplace_back(OrigRef);
4663  Data.FirstprivateCopies.emplace_back(PrivateRef);
4664  Data.FirstprivateInits.emplace_back(InitRef);
4665  return OrigVD;
4666 }
4667 
4669  const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
4670  OMPTargetDataInfo &InputInfo) {
4671  // Emit outlined function for task construct.
4672  const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4673  Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4674  QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4675  auto I = CS->getCapturedDecl()->param_begin();
4676  auto PartId = std::next(I);
4677  auto TaskT = std::next(I, 4);
4678  OMPTaskDataTy Data;
4679  // The task is not final.
4680  Data.Final.setInt(/*IntVal=*/false);
4681  // Get list of firstprivate variables.
4682  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4683  auto IRef = C->varlist_begin();
4684  auto IElemInitRef = C->inits().begin();
4685  for (auto *IInit : C->private_copies()) {
4686  Data.FirstprivateVars.push_back(*IRef);
4687  Data.FirstprivateCopies.push_back(IInit);
4688  Data.FirstprivateInits.push_back(*IElemInitRef);
4689  ++IRef;
4690  ++IElemInitRef;
4691  }
4692  }
4693  OMPPrivateScope TargetScope(*this);
4694  VarDecl *BPVD = nullptr;
4695  VarDecl *PVD = nullptr;
4696  VarDecl *SVD = nullptr;
4697  VarDecl *MVD = nullptr;
4698  if (InputInfo.NumberOfTargetItems > 0) {
4699  auto *CD = CapturedDecl::Create(
4700  getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
4701  llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
4702  QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
4703  getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
4704  /*IndexTypeQuals=*/0);
4706  getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4708  getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4709  QualType SizesType = getContext().getConstantArrayType(
4710  getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
4711  ArrSize, nullptr, ArrayType::Normal,
4712  /*IndexTypeQuals=*/0);
4713  SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
4714  S.getBeginLoc());
4715  TargetScope.addPrivate(
4716  BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
4717  TargetScope.addPrivate(PVD,
4718  [&InputInfo]() { return InputInfo.PointersArray; });
4719  TargetScope.addPrivate(SVD,
4720  [&InputInfo]() { return InputInfo.SizesArray; });
4721  // If there is no user-defined mapper, the mapper array will be nullptr. In
4722  // this case, we don't need to privatize it.
4723  if (!dyn_cast_or_null<llvm::ConstantPointerNull>(
4724  InputInfo.MappersArray.getPointer())) {
4726  getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4727  TargetScope.addPrivate(MVD,
4728  [&InputInfo]() { return InputInfo.MappersArray; });
4729  }
4730  }
4731  (void)TargetScope.Privatize();
4732  // Build list of dependences.
4733  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4735  Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4736  DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4737  }
4738  auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
4739  &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
4740  // Set proper addresses for generated private copies.
4741  OMPPrivateScope Scope(CGF);
4742  if (!Data.FirstprivateVars.empty()) {
4743  enum { PrivatesParam = 2, CopyFnParam = 3 };
4744  llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4745  CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4746  llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4747  CS->getCapturedDecl()->getParam(PrivatesParam)));
4748  // Map privates.
4752  CallArgs.push_back(PrivatesPtr);
4753  ParamTypes.push_back(PrivatesPtr->getType());
4754  for (const Expr *E : Data.FirstprivateVars) {
4755  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4756  Address PrivatePtr =
4757  CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4758  ".firstpriv.ptr.addr");
4759  PrivatePtrs.emplace_back(VD, PrivatePtr);
4760  CallArgs.push_back(PrivatePtr.getPointer());
4761  ParamTypes.push_back(PrivatePtr.getType());
4762  }
4763  auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4764  ParamTypes, /*isVarArg=*/false);
4765  CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4766  CopyFn, CopyFnTy->getPointerTo());
4767  CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4768  CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4769  for (const auto &Pair : PrivatePtrs) {
4770  Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4771  CGF.getContext().getDeclAlign(Pair.first));
4772  Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4773  }
4774  }
4775  // Privatize all private variables except for in_reduction items.
4776  (void)Scope.Privatize();
4777  if (InputInfo.NumberOfTargetItems > 0) {
4778  InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
4779  CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
4780  InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
4781  CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
4782  InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
4783  CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
4784  // If MVD is nullptr, the mapper array is not privatized
4785  if (MVD)
4786  InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
4787  CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
4788  }
4789 
4790  Action.Enter(CGF);
4791  OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
4792  BodyGen(CGF);
4793  };
4794  llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4795  S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
4796  Data.NumberOfParts);
4797  llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
4798  IntegerLiteral IfCond(getContext(), TrueOrFalse,
4799  getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
4800  SourceLocation());
4801 
4802  CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
4803  SharedsTy, CapturedStruct, &IfCond, Data);
4804 }
4805 
4807  // Emit outlined function for task construct.
4808  const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4809  Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4810  QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4811  const Expr *IfCond = nullptr;
4812  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4813  if (C->getNameModifier() == OMPD_unknown ||
4814  C->getNameModifier() == OMPD_task) {
4815  IfCond = C->getCondition();
4816  break;
4817  }
4818  }
4819 
4820  OMPTaskDataTy Data;
4821  // Check if we should emit tied or untied task.
4822  Data.Tied = !S.getSingleClause<OMPUntiedClause>();
4823  auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
4824  CGF.EmitStmt(CS->getCapturedStmt());
4825  };
4826  auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4827  IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
4828  const OMPTaskDataTy &Data) {
4829  CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
4830  SharedsTy, CapturedStruct, IfCond,
4831  Data);
4832  };
4833  auto LPCRegion =
4835  EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
4836 }
4837 
4839  const OMPTaskyieldDirective &S) {
4840  CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
4841 }
4842 
4844  CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
4845 }
4846 
4848  CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc());
4849 }
4850 
4852  const OMPTaskgroupDirective &S) {
4853  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4854  Action.Enter(CGF);
4855  if (const Expr *E = S.getReductionRef()) {
4858  OMPTaskDataTy Data;
4859  for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
4860  Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4861  Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4862  Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4863  Data.ReductionOps.append(C->reduction_ops().begin(),
4864  C->reduction_ops().end());
4865  LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4866  RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4867  }
4868  llvm::Value *ReductionDesc =
4869  CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
4870  LHSs, RHSs, Data);
4871  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4872  CGF.EmitVarDecl(*VD);
4873  CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
4874  /*Volatile=*/false, E->getType());
4875  }
4876  CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4877  };
4878  OMPLexicalScope Scope(*this, S, OMPD_unknown);
4879  CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
4880 }
4881 
4883  llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
4884  ? llvm::AtomicOrdering::NotAtomic
4885  : llvm::AtomicOrdering::AcquireRelease;
4886  CGM.getOpenMPRuntime().emitFlush(
4887  *this,
4888  [&S]() -> ArrayRef<const Expr *> {
4889  if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
4890  return llvm::makeArrayRef(FlushClause->varlist_begin(),
4891  FlushClause->varlist_end());
4892  return llvm::None;
4893  }(),
4894  S.getBeginLoc(), AO);
4895 }
4896 
4898  const auto *DO = S.getSingleClause<OMPDepobjClause>();
4899  LValue DOLVal = EmitLValue(DO->getDepobj());
4900  if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
4901  OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
4902  DC->getModifier());
4903  Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
4904  Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
4905  *this, Dependencies, DC->getBeginLoc());
4906  EmitStoreOfScalar(DepAddr.getPointer(), DOLVal);
4907  return;
4908  }
4909  if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
4910  CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
4911  return;
4912  }
4913  if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
4914  CGM.getOpenMPRuntime().emitUpdateClause(
4915  *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
4916  return;
4917  }
4918 }
4919 
4921  if (!OMPParentLoopDirectiveForScan)
4922  return;
4923  const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
4924  bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
4929  SmallVector<const Expr *, 4> ReductionOps;
4931  SmallVector<const Expr *, 4> CopyArrayTemps;
4932  SmallVector<const Expr *, 4> CopyArrayElems;
4933  for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
4934  if (C->getModifier() != OMPC_REDUCTION_inscan)
4935  continue;
4936  Shareds.append(C->varlist_begin(), C->varlist_end());
4937  Privates.append(C->privates().begin(), C->privates().end());
4938  LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4939  RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4940  ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
4941  CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
4942  CopyArrayTemps.append(C->copy_array_temps().begin(),
4943  C->copy_array_temps().end());
4944  CopyArrayElems.append(C->copy_array_elems().begin(),
4945  C->copy_array_elems().end());
4946  }
4947  if (ParentDir.getDirectiveKind() == OMPD_simd ||
4948  (getLangOpts().OpenMPSimd &&
4949  isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
4950  // For simd directive and simd-based directives in simd only mode, use the
4951  // following codegen:
4952  // int x = 0;
4953  // #pragma omp simd reduction(inscan, +: x)
4954  // for (..) {
4955  // <first part>
4956  // #pragma omp scan inclusive(x)
4957  // <second part>
4958  // }
4959  // is transformed to:
4960  // int x = 0;
4961  // for (..) {
4962  // int x_priv = 0;
4963  // <first part>
4964  // x = x_priv + x;
4965  // x_priv = x;
4966  // <second part>
4967  // }
4968  // and
4969  // int x = 0;
4970  // #pragma omp simd reduction(inscan, +: x)
4971  // for (..) {
4972  // <first part>
4973  // #pragma omp scan exclusive(x)
4974  // <second part>
4975  // }
4976  // to
4977  // int x = 0;
4978  // for (..) {
4979  // int x_priv = 0;
4980  // <second part>
4981  // int temp = x;
4982  // x = x_priv + x;
4983  // x_priv = temp;
4984  // <first part>
4985  // }
4986  llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
4987  EmitBranch(IsInclusive
4988  ? OMPScanReduce
4989  : BreakContinueStack.back().ContinueBlock.getBlock());
4990  EmitBlock(OMPScanDispatch);
4991  {
4992  // New scope for correct construction/destruction of temp variables for
4993  // exclusive scan.
4994  LexicalScope Scope(*this, S.getSourceRange());
4995  EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
4996  EmitBlock(OMPScanReduce);
4997  if (!IsInclusive) {
4998  // Create temp var and copy LHS value to this temp value.
4999  // TMP = LHS;
5000  for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5001  const Expr *PrivateExpr = Privates[I];
5002  const Expr *TempExpr = CopyArrayTemps[I];
5003  EmitAutoVarDecl(
5004  *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
5005  LValue DestLVal = EmitLValue(TempExpr);
5006  LValue SrcLVal = EmitLValue(LHSs[I]);
5007  EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5008  SrcLVal.getAddress(*this),
5009  cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5010  cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5011  CopyOps[I]);
5012  }
5013  }
5014  CGM.getOpenMPRuntime().emitReduction(
5015  *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
5016  {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
5017  for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5018  const Expr *PrivateExpr = Privates[I];
5019  LValue DestLVal;
5020  LValue SrcLVal;
5021  if (IsInclusive) {
5022  DestLVal = EmitLValue(RHSs[I]);
5023  SrcLVal = EmitLValue(LHSs[I]);
5024  } else {
5025  const Expr *TempExpr = CopyArrayTemps[I];
5026  DestLVal = EmitLValue(RHSs[I]);
5027  SrcLVal = EmitLValue(TempExpr);
5028  }
5029  EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5030  SrcLVal.getAddress(*this),
5031  cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5032  cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5033  CopyOps[I]);
5034  }
5035  }
5036  EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
5037  OMPScanExitBlock = IsInclusive
5038  ? BreakContinueStack.back().ContinueBlock.getBlock()
5039  : OMPScanReduce;
5040  EmitBlock(OMPAfterScanBlock);
5041  return;
5042  }
5043  if (!IsInclusive) {
5044  EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5045  EmitBlock(OMPScanExitBlock);
5046  }
5047  if (OMPFirstScanLoop) {
5048  // Emit buffer[i] = red; at the end of the input phase.
5049  const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
5050  .getIterationVariable()
5051  ->IgnoreParenImpCasts();
5052  LValue IdxLVal = EmitLValue(IVExpr);
5053  llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
5054  IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
5055  for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5056  const Expr *PrivateExpr = Privates[I];
5057  const Expr *OrigExpr = Shareds[I];
5058  const Expr *CopyArrayElem = CopyArrayElems[I];
5059  OpaqueValueMapping IdxMapping(
5060  *this,
5061  cast<OpaqueValueExpr>(
5062  cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5063  RValue::get(IdxVal));
5064  LValue DestLVal = EmitLValue(CopyArrayElem);
5065  LValue SrcLVal = EmitLValue(OrigExpr);
5066  EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5067  SrcLVal.getAddress(*this),
5068  cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5069  cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5070  CopyOps[I]);
5071  }
5072  }
5073  EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5074  if (IsInclusive) {
5075  EmitBlock(OMPScanExitBlock);
5076  EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5077  }
5078  EmitBlock(OMPScanDispatch);
5079  if (!OMPFirstScanLoop) {
5080  // Emit red = buffer[i]; at the entrance to the scan phase.
5081  const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
5082  .getIterationVariable()
5083  ->IgnoreParenImpCasts();
5084  LValue IdxLVal = EmitLValue(IVExpr);
5085  llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
5086  IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
5087  llvm::BasicBlock *ExclusiveExitBB = nullptr;
5088  if (!IsInclusive) {
5089  llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
5090  ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
5091  llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
5092  Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
5093  EmitBlock(ContBB);
5094  // Use idx - 1 iteration for exclusive scan.
5095  IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
5096  }
5097  for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5098  const Expr *PrivateExpr = Privates[I];
5099  const Expr *OrigExpr = Shareds[I];
5100  const Expr *CopyArrayElem = CopyArrayElems[I];
5101  OpaqueValueMapping IdxMapping(
5102  *this,
5103  cast<OpaqueValueExpr>(
5104  cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5105  RValue::get(IdxVal));
5106  LValue SrcLVal = EmitLValue(CopyArrayElem);
5107  LValue DestLVal = EmitLValue(OrigExpr);
5108  EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5109  SrcLVal.getAddress(*this),
5110  cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5111  cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5112  CopyOps[I]);
5113  }
5114  if (!IsInclusive) {
5115  EmitBlock(ExclusiveExitBB);
5116  }
5117  }
5118  EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
5119  : OMPAfterScanBlock);
5120  EmitBlock(OMPAfterScanBlock);
5121 }
5122 
5124  const CodeGenLoopTy &CodeGenLoop,
5125  Expr *IncExpr) {
5126  // Emit the loop iteration variable.
5127  const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
5128  const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
5129  EmitVarDecl(*IVDecl);
5130 
5131  // Emit the iterations count variable.
5132  // If it is not a variable, Sema decided to calculate iterations count on each
5133  // iteration (e.g., it is foldable into a constant).
5134  if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
5135  EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
5136  // Emit calculation of the iterations count.
5137  EmitIgnoredExpr(S.getCalcLastIteration());
5138  }
5139 
5140  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
5141 
5142  bool HasLastprivateClause = false;
5143  // Check pre-condition.
5144  {
5145  OMPLoopScope PreInitScope(*this, S);
5146  // Skip the entire loop if we don't meet the precondition.
5147  // If the condition constant folds and can be elided, avoid emitting the
5148  // whole loop.
5149  bool CondConstant;
5150  llvm::BasicBlock *ContBlock = nullptr;
5151  if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
5152  if (!CondConstant)
5153  return;
5154  } else {
5155  llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
5156  ContBlock = createBasicBlock("omp.precond.end");
5157  emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
5158  getProfileCount(&S));
5159  EmitBlock(ThenBlock);
5160  incrementProfileCounter(&S);
5161  }
5162 
5163  emitAlignedClause(*this, S);
5164  // Emit 'then' code.
5165  {
5166  // Emit helper vars inits.
5167 
5168  LValue LB = EmitOMPHelperVar(
5169  *this, cast<DeclRefExpr>(
5170  (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5171  ? S.getCombinedLowerBoundVariable()
5172  : S.getLowerBoundVariable())));
5173  LValue UB = EmitOMPHelperVar(
5174  *this, cast<DeclRefExpr>(
5175  (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5176  ? S.getCombinedUpperBoundVariable()
5177  : S.getUpperBoundVariable())));
5178  LValue ST =
5179  EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
5180  LValue IL =
5181  EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
5182 
5183  OMPPrivateScope LoopScope(*this);
5184  if (EmitOMPFirstprivateClause(S, LoopScope)) {
5185  // Emit implicit barrier to synchronize threads and avoid data races
5186  // on initialization of firstprivate variables and post-update of
5187  // lastprivate variables.
5188  CGM.getOpenMPRuntime().emitBarrierCall(
5189  *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
5190  /*ForceSimpleCall=*/true);
5191  }
5192  EmitOMPPrivateClause(S, LoopScope);
5193  if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5194  !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5195  !isOpenMPTeamsDirective(S.getDirectiveKind()))
5196  EmitOMPReductionClauseInit(S, LoopScope);
5197  HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
5198  EmitOMPPrivateLoopCounters(S, LoopScope);
5199  (void)LoopScope.Privatize();
5200  if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5201  CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
5202 
5203  // Detect the distribute schedule kind and chunk.
5204  llvm::Value *Chunk = nullptr;
5206  if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
5207  ScheduleKind = C->getDistScheduleKind();
5208  if (const Expr *Ch = C->getChunkSize()) {
5209  Chunk = EmitScalarExpr(Ch);
5210  Chunk = EmitScalarConversion(Chunk, Ch->getType(),
5211  S.getIterationVariable()->getType(),
5212  S.getBeginLoc());
5213  }
5214  } else {
5215  // Default behaviour for dist_schedule clause.
5216  CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
5217  *this, S, ScheduleKind, Chunk);
5218  }
5219  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
5220  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
5221 
5222  // OpenMP [2.10.8, distribute Construct, Description]
5223  // If dist_schedule is specified, kind must be static. If specified,
5224  // iterations are divided into chunks of size chunk_size, chunks are
5225  // assigned to the teams of the league in a round-robin fashion in the
5226  // order of the team number. When no chunk_size is specified, the
5227  // iteration space is divided into chunks that are approximately equal
5228  // in size, and at most one chunk is distributed to each team of the
5229  // league. The size of the chunks is unspecified in this case.
5230  bool StaticChunked =
5231  RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
5232  isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
5233  if (RT.isStaticNonchunked(ScheduleKind,
5234  /* Chunked */ Chunk != nullptr) ||
5235  StaticChunked) {
5236  CGOpenMPRuntime::StaticRTInput StaticInit(
5237  IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this),
5238  LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
5239  StaticChunked ? Chunk : nullptr);
5240  RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
5241  StaticInit);
5242  JumpDest LoopExit =
5243  getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
5244  // UB = min(UB, GlobalUB);
5245  EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5246  ? S.getCombinedEnsureUpperBound()
5247  : S.getEnsureUpperBound());
5248  // IV = LB;
5249  EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5250  ? S.getCombinedInit()
5251  : S.getInit());
5252 
5253  const Expr *Cond =
5254  isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5255  ? S.getCombinedCond()
5256  : S.getCond();
5257 
5258  if (StaticChunked)
5259  Cond = S.getCombinedDistCond();
5260 
5261  // For static unchunked schedules generate:
5262  //
5263  // 1. For distribute alone, codegen
5264  // while (idx <= UB) {
5265  // BODY;
5266  // ++idx;
5267  // }
5268  //
5269  // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
5270  // while (idx <= UB) {
5271  // <CodeGen rest of pragma>(LB, UB);
5272  // idx += ST;
5273  // }
5274  //
5275  // For static chunk one schedule generate:
5276  //
5277  // while (IV <= GlobalUB) {
5278  // <CodeGen rest of pragma>(LB, UB);
5279  // LB += ST;
5280  // UB += ST;
5281  // UB = min(UB, GlobalUB);
5282  // IV = LB;
5283  // }
5284  //
5286  *this, S,
5287  [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5288  if (isOpenMPSimdDirective(S.getDirectiveKind()))
5289  CGF.EmitOMPSimdInit(S);
5290  },
5291  [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
5292  StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
5293  CGF.EmitOMPInnerLoop(
5294  S, LoopScope.requiresCleanups(), Cond, IncExpr,
5295  [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
5296  CodeGenLoop(CGF, S, LoopExit);
5297  },
5298  [&S, StaticChunked](CodeGenFunction &CGF) {
5299  if (StaticChunked) {
5300  CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
5301  CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
5302  CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
5303  CGF.EmitIgnoredExpr(S.getCombinedInit());
5304  }
5305  });
5306  });
5307  EmitBlock(LoopExit.getBlock());
5308  // Tell the runtime we are done.
5309  RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind());
5310  } else {
5311  // Emit the outer loop, which requests its work chunk [LB..UB] from
5312  // runtime and runs the inner loop to process it.
5313  const OMPLoopArguments LoopArguments = {
5314  LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
5315  IL.getAddress(*this), Chunk};
5316  EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
5317  CodeGenLoop);
5318  }
5319  if (isOpenMPSimdDirective(S.getDirectiveKind())) {
5320  EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
5321  return CGF.Builder.CreateIsNotNull(
5322  CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5323  });
5324  }
5325  if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5326  !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5327  !isOpenMPTeamsDirective(S.getDirectiveKind())) {
5328  EmitOMPReductionClauseFinal(S, OMPD_simd);
5329  // Emit post-update of the reduction variables if IsLastIter != 0.
5331  *this, S, [IL, &S](CodeGenFunction &CGF) {
5332  return CGF.Builder.CreateIsNotNull(
5333  CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5334  });
5335  }
5336  // Emit final copy of the lastprivate variables if IsLastIter != 0.
5337  if (HasLastprivateClause) {
5338  EmitOMPLastprivateClauseFinal(
5339  S, /*NoFinals=*/false,
5340  Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
5341  }
5342  }
5343 
5344  // We're now done with the loop, so jump to the continuation block.
5345  if (ContBlock) {
5346  EmitBranch(ContBlock);
5347  EmitBlock(ContBlock, true);
5348  }
5349  }
5350 }
5351 
5353  const OMPDistributeDirective &S) {
5354  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5356  };
5357  OMPLexicalScope Scope(*this, S, OMPD_unknown);
5358  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
5359 }
5360 
5361 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
5362  const CapturedStmt *S,
5363  SourceLocation Loc) {
5364  CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
5366  CGF.CapturedStmtInfo = &CapStmtInfo;
5367  llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
5368  Fn->setDoesNotRecurse();
5369  if (CGM.getCodeGenOpts().OptimizationLevel != 0)
5370  Fn->addFnAttr(llvm::Attribute::AlwaysInline);
5371  return Fn;
5372 }
5373 
5375  if (CGM.getLangOpts().OpenMPIRBuilder) {
5376  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5377  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5378 
5379  if (S.hasClausesOfKind<OMPDependClause>()) {
5380  // The ordered directive with depend clause.
5381  assert(!S.hasAssociatedStmt() &&
5382  "No associated statement must be in ordered depend construct.");
5383  InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5384  AllocaInsertPt->getIterator());
5385  for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) {
5386  unsigned NumLoops = DC->getNumLoops();
5388  /*DestWidth=*/64, /*Signed=*/1);
5390  for (unsigned I = 0; I < NumLoops; I++) {
5391  const Expr *CounterVal = DC->getLoopData(I);
5392  assert(CounterVal);
5393  llvm::Value *StoreValue = EmitScalarConversion(
5394  EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
5395  CounterVal->getExprLoc());
5396  StoreValues.emplace_back(StoreValue);
5397  }
5398  bool IsDependSource = false;
5399  if (DC->getDependencyKind() == OMPC_DEPEND_source)
5400  IsDependSource = true;
5401  Builder.restoreIP(OMPBuilder.createOrderedDepend(
5402  Builder, AllocaIP, NumLoops, StoreValues, ".cnt.addr",
5403  IsDependSource));
5404  }
5405  } else {
5406  // The ordered directive with threads or simd clause, or without clause.
5407  // Without clause, it behaves as if the threads clause is specified.
5408  const auto *C = S.getSingleClause<OMPSIMDClause>();
5409 
5410  auto FiniCB = [this](InsertPointTy IP) {
5412  };
5413 
5414  auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
5415  InsertPointTy CodeGenIP,
5416  llvm::BasicBlock &FiniBB) {
5417  const CapturedStmt *CS = S.getInnermostCapturedStmt();
5418  if (C) {
5420  GenerateOpenMPCapturedVars(*CS, CapturedVars);
5421  llvm::Function *OutlinedFn =
5422  emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
5423  assert(S.getBeginLoc().isValid() &&
5424  "Outlined function call location must be valid.");
5425  ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc());
5426  OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, FiniBB,
5427  OutlinedFn, CapturedVars);
5428  } else {
5429  OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP,
5430  FiniBB);
5432  CodeGenIP, FiniBB);
5433  }
5434  };
5435 
5436  OMPLexicalScope Scope(*this, S, OMPD_unknown);
5437  Builder.restoreIP(
5438  OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C));
5439  }
5440  return;
5441  }
5442 
5443  if (S.hasClausesOfKind<OMPDependClause>()) {
5444  assert(!S.hasAssociatedStmt() &&
5445  "No associated statement must be in ordered depend construct.");
5446  for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5448  return;
5449  }
5450  const auto *C = S.getSingleClause<OMPSIMDClause>();
5451  auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
5452  PrePostActionTy &Action) {
5453  const CapturedStmt *CS = S.getInnermostCapturedStmt();
5454  if (C) {
5456  CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
5457  llvm::Function *OutlinedFn =
5458  emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
5459  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
5460  OutlinedFn, CapturedVars);
5461  } else {
5462  Action.Enter(CGF);
5463  CGF.EmitStmt(CS->getCapturedStmt());
5464  }
5465  };
5466  OMPLexicalScope Scope(*this, S, OMPD_unknown);
5467  CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
5468 }
5469 
5471  QualType SrcType, QualType DestType,
5472  SourceLocation Loc) {
5473  assert(CGF.hasScalarEvaluationKind(DestType) &&
5474  "DestType must have scalar evaluation kind.");
5475  assert(!Val.isAggregate() && "Must be a scalar or complex.");
5476  return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
5477  DestType, Loc)
5479  Val.getComplexVal(), SrcType, DestType, Loc);
5480 }
5481 
5484  QualType DestType, SourceLocation Loc) {
5485  assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
5486  "DestType must have complex evaluation kind.");
5487  CodeGenFunction::ComplexPairTy ComplexVal;
5488  if (Val.isScalar()) {
5489  // Convert the input element to the element type of the complex.
5490  QualType DestElementType =
5491  DestType->castAs<ComplexType>()->getElementType();
5492  llvm::Value *ScalarVal = CGF.EmitScalarConversion(
5493  Val.getScalarVal(), SrcType, DestElementType, Loc);
5494  ComplexVal = CodeGenFunction::ComplexPairTy(
5495  ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
5496  } else {
5497  assert(Val.isComplex() && "Must be a scalar or comp