clang  13.0.0git
CGStmtOpenMP.cpp
Go to the documentation of this file.
1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit OpenMP nodes as LLVM code.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCleanup.h"
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/DeclOpenMP.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
27 #include "llvm/Frontend/OpenMP/OMPConstants.h"
28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/Support/AtomicOrdering.h"
32 using namespace clang;
33 using namespace CodeGen;
34 using namespace llvm::omp;
35 
36 static const VarDecl *getBaseDecl(const Expr *Ref);
37 
38 namespace {
39 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
40 /// for captured expressions.
41 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
42  void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
43  for (const auto *C : S.clauses()) {
44  if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
45  if (const auto *PreInit =
46  cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
47  for (const auto *I : PreInit->decls()) {
48  if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
49  CGF.EmitVarDecl(cast<VarDecl>(*I));
50  } else {
52  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
53  CGF.EmitAutoVarCleanups(Emission);
54  }
55  }
56  }
57  }
58  }
59  }
60  CodeGenFunction::OMPPrivateScope InlinedShareds;
61 
62  static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
63  return CGF.LambdaCaptureFields.lookup(VD) ||
64  (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
65  (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
66  cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
67  }
68 
69 public:
70  OMPLexicalScope(
72  const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
73  const bool EmitPreInitStmt = true)
74  : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
75  InlinedShareds(CGF) {
76  if (EmitPreInitStmt)
77  emitPreInitStmt(CGF, S);
78  if (!CapturedRegion.hasValue())
79  return;
80  assert(S.hasAssociatedStmt() &&
81  "Expected associated statement for inlined directive.");
82  const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
83  for (const auto &C : CS->captures()) {
84  if (C.capturesVariable() || C.capturesVariableByCopy()) {
85  auto *VD = C.getCapturedVar();
86  assert(VD == VD->getCanonicalDecl() &&
87  "Canonical decl must be captured.");
88  DeclRefExpr DRE(
89  CGF.getContext(), const_cast<VarDecl *>(VD),
90  isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
91  InlinedShareds.isGlobalVarCaptured(VD)),
92  VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
93  InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
94  return CGF.EmitLValue(&DRE).getAddress(CGF);
95  });
96  }
97  }
98  (void)InlinedShareds.Privatize();
99  }
100 };
101 
102 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
103 /// for captured expressions.
104 class OMPParallelScope final : public OMPLexicalScope {
105  bool EmitPreInitStmt(const OMPExecutableDirective &S) {
106  OpenMPDirectiveKind Kind = S.getDirectiveKind();
110  }
111 
112 public:
113  OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
114  : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
115  EmitPreInitStmt(S)) {}
116 };
117 
118 /// Lexical scope for OpenMP teams construct, that handles correct codegen
119 /// for captured expressions.
120 class OMPTeamsScope final : public OMPLexicalScope {
121  bool EmitPreInitStmt(const OMPExecutableDirective &S) {
122  OpenMPDirectiveKind Kind = S.getDirectiveKind();
125  }
126 
127 public:
128  OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
129  : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
130  EmitPreInitStmt(S)) {}
131 };
132 
133 /// Private scope for OpenMP loop-based directives, that supports capturing
134 /// of used expression from loop statement.
135 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
136  void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
137  const DeclStmt *PreInits;
138  CodeGenFunction::OMPMapVars PreCondVars;
139  if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
140  llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
141  for (const auto *E : LD->counters()) {
142  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
143  EmittedAsPrivate.insert(VD->getCanonicalDecl());
144  (void)PreCondVars.setVarAddr(
145  CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
146  }
147  // Mark private vars as undefs.
148  for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
149  for (const Expr *IRef : C->varlists()) {
150  const auto *OrigVD =
151  cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
152  if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
153  (void)PreCondVars.setVarAddr(
154  CGF, OrigVD,
155  Address(llvm::UndefValue::get(CGF.ConvertTypeForMem(
157  OrigVD->getType().getNonReferenceType()))),
158  CGF.getContext().getDeclAlign(OrigVD)));
159  }
160  }
161  }
162  (void)PreCondVars.apply(CGF);
163  // Emit init, __range and __end variables for C++ range loops.
165  LD->getInnermostCapturedStmt()->getCapturedStmt(),
166  /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
167  [&CGF](unsigned Cnt, const Stmt *CurStmt) {
168  if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) {
169  if (const Stmt *Init = CXXFor->getInit())
170  CGF.EmitStmt(Init);
171  CGF.EmitStmt(CXXFor->getRangeStmt());
172  CGF.EmitStmt(CXXFor->getEndStmt());
173  }
174  return false;
175  });
176  PreInits = cast_or_null<DeclStmt>(LD->getPreInits());
177  } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) {
178  PreInits = cast_or_null<DeclStmt>(Tile->getPreInits());
179  } else {
180  llvm_unreachable("Unknown loop-based directive kind.");
181  }
182  if (PreInits) {
183  for (const auto *I : PreInits->decls())
184  CGF.EmitVarDecl(cast<VarDecl>(*I));
185  }
186  PreCondVars.restore(CGF);
187  }
188 
189 public:
190  OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
191  : CodeGenFunction::RunCleanupsScope(CGF) {
192  emitPreInitStmt(CGF, S);
193  }
194 };
195 
196 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
197  CodeGenFunction::OMPPrivateScope InlinedShareds;
198 
199  static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
200  return CGF.LambdaCaptureFields.lookup(VD) ||
201  (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
202  (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
203  cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
204  }
205 
206 public:
207  OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
208  : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
209  InlinedShareds(CGF) {
210  for (const auto *C : S.clauses()) {
211  if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
212  if (const auto *PreInit =
213  cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
214  for (const auto *I : PreInit->decls()) {
215  if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
216  CGF.EmitVarDecl(cast<VarDecl>(*I));
217  } else {
219  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
220  CGF.EmitAutoVarCleanups(Emission);
221  }
222  }
223  }
224  } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
225  for (const Expr *E : UDP->varlists()) {
226  const Decl *D = cast<DeclRefExpr>(E)->getDecl();
227  if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
228  CGF.EmitVarDecl(*OED);
229  }
230  } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
231  for (const Expr *E : UDP->varlists()) {
232  const Decl *D = getBaseDecl(E);
233  if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
234  CGF.EmitVarDecl(*OED);
235  }
236  }
237  }
238  if (!isOpenMPSimdDirective(S.getDirectiveKind()))
239  CGF.EmitOMPPrivateClause(S, InlinedShareds);
240  if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
241  if (const Expr *E = TG->getReductionRef())
242  CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
243  }
244  const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
245  while (CS) {
246  for (auto &C : CS->captures()) {
247  if (C.capturesVariable() || C.capturesVariableByCopy()) {
248  auto *VD = C.getCapturedVar();
249  assert(VD == VD->getCanonicalDecl() &&
250  "Canonical decl must be captured.");
251  DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
252  isCapturedVar(CGF, VD) ||
253  (CGF.CapturedStmtInfo &&
254  InlinedShareds.isGlobalVarCaptured(VD)),
256  C.getLocation());
257  InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
258  return CGF.EmitLValue(&DRE).getAddress(CGF);
259  });
260  }
261  }
262  CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
263  }
264  (void)InlinedShareds.Privatize();
265  }
266 };
267 
268 } // namespace
269 
271  const OMPExecutableDirective &S,
272  const RegionCodeGenTy &CodeGen);
273 
275  if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
276  if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
277  OrigVD = OrigVD->getCanonicalDecl();
278  bool IsCaptured =
279  LambdaCaptureFields.lookup(OrigVD) ||
280  (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
281  (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
282  DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
283  OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
284  return EmitLValue(&DRE);
285  }
286  }
287  return EmitLValue(E);
288 }
289 
291  ASTContext &C = getContext();
292  llvm::Value *Size = nullptr;
293  auto SizeInChars = C.getTypeSizeInChars(Ty);
294  if (SizeInChars.isZero()) {
295  // getTypeSizeInChars() returns 0 for a VLA.
296  while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
297  VlaSizePair VlaSize = getVLASize(VAT);
298  Ty = VlaSize.Type;
299  Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts)
300  : VlaSize.NumElts;
301  }
302  SizeInChars = C.getTypeSizeInChars(Ty);
303  if (SizeInChars.isZero())
304  return llvm::ConstantInt::get(SizeTy, /*V=*/0);
305  return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
306  }
307  return CGM.getSize(SizeInChars);
308 }
309 
311  const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
312  const RecordDecl *RD = S.getCapturedRecordDecl();
313  auto CurField = RD->field_begin();
314  auto CurCap = S.captures().begin();
315  for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
316  E = S.capture_init_end();
317  I != E; ++I, ++CurField, ++CurCap) {
318  if (CurField->hasCapturedVLAType()) {
319  const VariableArrayType *VAT = CurField->getCapturedVLAType();
320  llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
321  CapturedVars.push_back(Val);
322  } else if (CurCap->capturesThis()) {
323  CapturedVars.push_back(CXXThisValue);
324  } else if (CurCap->capturesVariableByCopy()) {
325  llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
326 
327  // If the field is not a pointer, we need to save the actual value
328  // and load it as a void pointer.
329  if (!CurField->getType()->isAnyPointerType()) {
330  ASTContext &Ctx = getContext();
331  Address DstAddr = CreateMemTemp(
332  Ctx.getUIntPtrType(),
333  Twine(CurCap->getCapturedVar()->getName(), ".casted"));
334  LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
335 
336  llvm::Value *SrcAddrVal = EmitScalarConversion(
337  DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
338  Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
339  LValue SrcLV =
340  MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
341 
342  // Store the value using the source type pointer.
343  EmitStoreThroughLValue(RValue::get(CV), SrcLV);
344 
345  // Load the value using the destination type pointer.
346  CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
347  }
348  CapturedVars.push_back(CV);
349  } else {
350  assert(CurCap->capturesVariable() && "Expected capture by reference.");
351  CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer());
352  }
353  }
354 }
355 
357  QualType DstType, StringRef Name,
358  LValue AddrLV) {
359  ASTContext &Ctx = CGF.getContext();
360 
361  llvm::Value *CastedPtr = CGF.EmitScalarConversion(
362  AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(),
363  Ctx.getPointerType(DstType), Loc);
364  Address TmpAddr =
365  CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
366  .getAddress(CGF);
367  return TmpAddr;
368 }
369 
371  if (T->isLValueReferenceType())
372  return C.getLValueReferenceType(
374  /*SpelledAsLValue=*/false);
375  if (T->isPointerType())
376  return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
377  if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
378  if (const auto *VLA = dyn_cast<VariableArrayType>(A))
379  return getCanonicalParamType(C, VLA->getElementType());
380  if (!A->isVariablyModifiedType())
381  return C.getCanonicalType(T);
382  }
383  return C.getCanonicalParamType(T);
384 }
385 
386 namespace {
387 /// Contains required data for proper outlined function codegen.
388 struct FunctionOptions {
389  /// Captured statement for which the function is generated.
390  const CapturedStmt *S = nullptr;
391  /// true if cast to/from UIntPtr is required for variables captured by
392  /// value.
393  const bool UIntPtrCastRequired = true;
394  /// true if only casted arguments must be registered as local args or VLA
395  /// sizes.
396  const bool RegisterCastedArgsOnly = false;
397  /// Name of the generated function.
398  const StringRef FunctionName;
399  /// Location of the non-debug version of the outlined function.
400  SourceLocation Loc;
401  explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
402  bool RegisterCastedArgsOnly, StringRef FunctionName,
403  SourceLocation Loc)
404  : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
405  RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
406  FunctionName(FunctionName), Loc(Loc) {}
407 };
408 } // namespace
409 
410 static llvm::Function *emitOutlinedFunctionPrologue(
411  CodeGenFunction &CGF, FunctionArgList &Args,
412  llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
413  &LocalAddrs,
414  llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
415  &VLASizes,
416  llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
417  const CapturedDecl *CD = FO.S->getCapturedDecl();
418  const RecordDecl *RD = FO.S->getCapturedRecordDecl();
419  assert(CD->hasBody() && "missing CapturedDecl body");
420 
421  CXXThisValue = nullptr;
422  // Build the argument list.
423  CodeGenModule &CGM = CGF.CGM;
424  ASTContext &Ctx = CGM.getContext();
425  FunctionArgList TargetArgs;
426  Args.append(CD->param_begin(),
427  std::next(CD->param_begin(), CD->getContextParamPosition()));
428  TargetArgs.append(
429  CD->param_begin(),
430  std::next(CD->param_begin(), CD->getContextParamPosition()));
431  auto I = FO.S->captures().begin();
432  FunctionDecl *DebugFunctionDecl = nullptr;
433  if (!FO.UIntPtrCastRequired) {
435  QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
436  DebugFunctionDecl = FunctionDecl::Create(
437  Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
438  SourceLocation(), DeclarationName(), FunctionTy,
439  Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
440  /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
441  }
442  for (const FieldDecl *FD : RD->fields()) {
443  QualType ArgType = FD->getType();
444  IdentifierInfo *II = nullptr;
445  VarDecl *CapVar = nullptr;
446 
447  // If this is a capture by copy and the type is not a pointer, the outlined
448  // function argument type should be uintptr and the value properly casted to
449  // uintptr. This is necessary given that the runtime library is only able to
450  // deal with pointers. We can pass in the same way the VLA type sizes to the
451  // outlined function.
452  if (FO.UIntPtrCastRequired &&
453  ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
454  I->capturesVariableArrayType()))
455  ArgType = Ctx.getUIntPtrType();
456 
457  if (I->capturesVariable() || I->capturesVariableByCopy()) {
458  CapVar = I->getCapturedVar();
459  II = CapVar->getIdentifier();
460  } else if (I->capturesThis()) {
461  II = &Ctx.Idents.get("this");
462  } else {
463  assert(I->capturesVariableArrayType());
464  II = &Ctx.Idents.get("vla");
465  }
466  if (ArgType->isVariablyModifiedType())
467  ArgType = getCanonicalParamType(Ctx, ArgType);
468  VarDecl *Arg;
469  if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
470  Arg = ParmVarDecl::Create(
471  Ctx, DebugFunctionDecl,
472  CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
473  CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
474  /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
475  } else {
476  Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
477  II, ArgType, ImplicitParamDecl::Other);
478  }
479  Args.emplace_back(Arg);
480  // Do not cast arguments if we emit function with non-original types.
481  TargetArgs.emplace_back(
482  FO.UIntPtrCastRequired
483  ? Arg
484  : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
485  ++I;
486  }
487  Args.append(
488  std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
489  CD->param_end());
490  TargetArgs.append(
491  std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
492  CD->param_end());
493 
494  // Create the function declaration.
495  const CGFunctionInfo &FuncInfo =
496  CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
497  llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
498 
499  auto *F =
500  llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
501  FO.FunctionName, &CGM.getModule());
502  CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
503  if (CD->isNothrow())
504  F->setDoesNotThrow();
505  F->setDoesNotRecurse();
506 
507  // Generate the function.
508  CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
509  FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
510  FO.UIntPtrCastRequired ? FO.Loc
511  : CD->getBody()->getBeginLoc());
512  unsigned Cnt = CD->getContextParamPosition();
513  I = FO.S->captures().begin();
514  for (const FieldDecl *FD : RD->fields()) {
515  // Do not map arguments if we emit function with non-original types.
516  Address LocalAddr(Address::invalid());
517  if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
518  LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
519  TargetArgs[Cnt]);
520  } else {
521  LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
522  }
523  // If we are capturing a pointer by copy we don't need to do anything, just
524  // use the value that we get from the arguments.
525  if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
526  const VarDecl *CurVD = I->getCapturedVar();
527  if (!FO.RegisterCastedArgsOnly)
528  LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
529  ++Cnt;
530  ++I;
531  continue;
532  }
533 
534  LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
536  if (FD->hasCapturedVLAType()) {
537  if (FO.UIntPtrCastRequired) {
538  ArgLVal = CGF.MakeAddrLValue(
539  castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
540  Args[Cnt]->getName(), ArgLVal),
542  }
543  llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
544  const VariableArrayType *VAT = FD->getCapturedVLAType();
545  VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
546  } else if (I->capturesVariable()) {
547  const VarDecl *Var = I->getCapturedVar();
548  QualType VarTy = Var->getType();
549  Address ArgAddr = ArgLVal.getAddress(CGF);
550  if (ArgLVal.getType()->isLValueReferenceType()) {
551  ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
552  } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
553  assert(ArgLVal.getType()->isPointerType());
554  ArgAddr = CGF.EmitLoadOfPointer(
555  ArgAddr, ArgLVal.getType()->castAs<PointerType>());
556  }
557  if (!FO.RegisterCastedArgsOnly) {
558  LocalAddrs.insert(
559  {Args[Cnt],
560  {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
561  }
562  } else if (I->capturesVariableByCopy()) {
563  assert(!FD->getType()->isAnyPointerType() &&
564  "Not expecting a captured pointer.");
565  const VarDecl *Var = I->getCapturedVar();
566  LocalAddrs.insert({Args[Cnt],
567  {Var, FO.UIntPtrCastRequired
569  CGF, I->getLocation(), FD->getType(),
570  Args[Cnt]->getName(), ArgLVal)
571  : ArgLVal.getAddress(CGF)}});
572  } else {
573  // If 'this' is captured, load it into CXXThisValue.
574  assert(I->capturesThis());
575  CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
576  LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
577  }
578  ++Cnt;
579  ++I;
580  }
581 
582  return F;
583 }
584 
585 llvm::Function *
587  SourceLocation Loc) {
588  assert(
589  CapturedStmtInfo &&
590  "CapturedStmtInfo should be set when generating the captured function");
591  const CapturedDecl *CD = S.getCapturedDecl();
592  // Build the argument list.
593  bool NeedWrapperFunction =
594  getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
595  FunctionArgList Args;
596  llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
597  llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
598  SmallString<256> Buffer;
599  llvm::raw_svector_ostream Out(Buffer);
600  Out << CapturedStmtInfo->getHelperName();
601  if (NeedWrapperFunction)
602  Out << "_debug__";
603  FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
604  Out.str(), Loc);
605  llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
606  VLASizes, CXXThisValue, FO);
607  CodeGenFunction::OMPPrivateScope LocalScope(*this);
608  for (const auto &LocalAddrPair : LocalAddrs) {
609  if (LocalAddrPair.second.first) {
610  LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
611  return LocalAddrPair.second.second;
612  });
613  }
614  }
615  (void)LocalScope.Privatize();
616  for (const auto &VLASizePair : VLASizes)
617  VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
618  PGO.assignRegionCounters(GlobalDecl(CD), F);
619  CapturedStmtInfo->EmitBody(*this, CD->getBody());
620  (void)LocalScope.ForceCleanup();
621  FinishFunction(CD->getBodyRBrace());
622  if (!NeedWrapperFunction)
623  return F;
624 
625  FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
626  /*RegisterCastedArgsOnly=*/true,
627  CapturedStmtInfo->getHelperName(), Loc);
628  CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
629  WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
630  Args.clear();
631  LocalAddrs.clear();
632  VLASizes.clear();
633  llvm::Function *WrapperF =
634  emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
635  WrapperCGF.CXXThisValue, WrapperFO);
637  auto *PI = F->arg_begin();
638  for (const auto *Arg : Args) {
640  auto I = LocalAddrs.find(Arg);
641  if (I != LocalAddrs.end()) {
642  LValue LV = WrapperCGF.MakeAddrLValue(
643  I->second.second,
644  I->second.first ? I->second.first->getType() : Arg->getType(),
646  if (LV.getType()->isAnyComplexType())
648  LV.getAddress(WrapperCGF),
649  PI->getType()->getPointerTo(
650  LV.getAddress(WrapperCGF).getAddressSpace())));
651  CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
652  } else {
653  auto EI = VLASizes.find(Arg);
654  if (EI != VLASizes.end()) {
655  CallArg = EI->second.second;
656  } else {
657  LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
658  Arg->getType(),
660  CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
661  }
662  }
663  CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
664  ++PI;
665  }
666  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
667  WrapperCGF.FinishFunction();
668  return WrapperF;
669 }
670 
671 //===----------------------------------------------------------------------===//
672 // OpenMP Directive Emission
673 //===----------------------------------------------------------------------===//
675  Address DestAddr, Address SrcAddr, QualType OriginalType,
676  const llvm::function_ref<void(Address, Address)> CopyGen) {
677  // Perform element-by-element initialization.
678  QualType ElementTy;
679 
680  // Drill down to the base element type on both arrays.
681  const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
682  llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
683  SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
684 
685  llvm::Value *SrcBegin = SrcAddr.getPointer();
686  llvm::Value *DestBegin = DestAddr.getPointer();
687  // Cast from pointer to array type to pointer to single element.
688  llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements);
689  // The basic structure here is a while-do loop.
690  llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
691  llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
692  llvm::Value *IsEmpty =
693  Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
694  Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
695 
696  // Enter the loop body, making that address the current address.
697  llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
698  EmitBlock(BodyBB);
699 
700  CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
701 
702  llvm::PHINode *SrcElementPHI =
703  Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
704  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
705  Address SrcElementCurrent =
706  Address(SrcElementPHI,
707  SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
708 
709  llvm::PHINode *DestElementPHI =
710  Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
711  DestElementPHI->addIncoming(DestBegin, EntryBB);
712  Address DestElementCurrent =
713  Address(DestElementPHI,
714  DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
715 
716  // Emit copy.
717  CopyGen(DestElementCurrent, SrcElementCurrent);
718 
719  // Shift the address forward by one element.
720  llvm::Value *DestElementNext = Builder.CreateConstGEP1_32(
721  DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
722  llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32(
723  SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
724  // Check whether we've reached the end.
725  llvm::Value *Done =
726  Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
727  Builder.CreateCondBr(Done, DoneBB, BodyBB);
728  DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
729  SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
730 
731  // Done.
732  EmitBlock(DoneBB, /*IsFinished=*/true);
733 }
734 
735 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
736  Address SrcAddr, const VarDecl *DestVD,
737  const VarDecl *SrcVD, const Expr *Copy) {
738  if (OriginalType->isArrayType()) {
739  const auto *BO = dyn_cast<BinaryOperator>(Copy);
740  if (BO && BO->getOpcode() == BO_Assign) {
741  // Perform simple memcpy for simple copying.
742  LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
743  LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
744  EmitAggregateAssign(Dest, Src, OriginalType);
745  } else {
746  // For arrays with complex element types perform element by element
747  // copying.
748  EmitOMPAggregateAssign(
749  DestAddr, SrcAddr, OriginalType,
750  [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
751  // Working with the single array element, so have to remap
752  // destination and source variables to corresponding array
753  // elements.
755  Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
756  Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
757  (void)Remap.Privatize();
758  EmitIgnoredExpr(Copy);
759  });
760  }
761  } else {
762  // Remap pseudo source variable to private copy.
764  Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
765  Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
766  (void)Remap.Privatize();
767  // Emit copying of the whole variable.
768  EmitIgnoredExpr(Copy);
769  }
770 }
771 
773  OMPPrivateScope &PrivateScope) {
774  if (!HaveInsertPoint())
775  return false;
776  bool DeviceConstTarget =
777  getLangOpts().OpenMPIsDevice &&
779  bool FirstprivateIsLastprivate = false;
780  llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
781  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
782  for (const auto *D : C->varlists())
783  Lastprivates.try_emplace(
784  cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
785  C->getKind());
786  }
787  llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
789  getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
790  // Force emission of the firstprivate copy if the directive does not emit
791  // outlined function, like omp for, omp simd, omp distribute etc.
792  bool MustEmitFirstprivateCopy =
793  CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
794  for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
795  const auto *IRef = C->varlist_begin();
796  const auto *InitsRef = C->inits().begin();
797  for (const Expr *IInit : C->private_copies()) {
798  const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
799  bool ThisFirstprivateIsLastprivate =
800  Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
801  const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
802  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
803  if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
804  !FD->getType()->isReferenceType() &&
805  (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
806  EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
807  ++IRef;
808  ++InitsRef;
809  continue;
810  }
811  // Do not emit copy for firstprivate constant variables in target regions,
812  // captured by reference.
813  if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
814  FD && FD->getType()->isReferenceType() &&
815  (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
816  (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this,
817  OrigVD);
818  ++IRef;
819  ++InitsRef;
820  continue;
821  }
822  FirstprivateIsLastprivate =
823  FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
824  if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
825  const auto *VDInit =
826  cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
827  bool IsRegistered;
828  DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
829  /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
830  (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
831  LValue OriginalLVal;
832  if (!FD) {
833  // Check if the firstprivate variable is just a constant value.
834  ConstantEmission CE = tryEmitAsConstant(&DRE);
835  if (CE && !CE.isReference()) {
836  // Constant value, no need to create a copy.
837  ++IRef;
838  ++InitsRef;
839  continue;
840  }
841  if (CE && CE.isReference()) {
842  OriginalLVal = CE.getReferenceLValue(*this, &DRE);
843  } else {
844  assert(!CE && "Expected non-constant firstprivate.");
845  OriginalLVal = EmitLValue(&DRE);
846  }
847  } else {
848  OriginalLVal = EmitLValue(&DRE);
849  }
850  QualType Type = VD->getType();
851  if (Type->isArrayType()) {
852  // Emit VarDecl with copy init for arrays.
853  // Get the address of the original variable captured in current
854  // captured region.
855  IsRegistered = PrivateScope.addPrivate(
856  OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
857  AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
858  const Expr *Init = VD->getInit();
859  if (!isa<CXXConstructExpr>(Init) ||
860  isTrivialInitializer(Init)) {
861  // Perform simple memcpy.
862  LValue Dest =
863  MakeAddrLValue(Emission.getAllocatedAddress(), Type);
864  EmitAggregateAssign(Dest, OriginalLVal, Type);
865  } else {
866  EmitOMPAggregateAssign(
867  Emission.getAllocatedAddress(),
868  OriginalLVal.getAddress(*this), Type,
869  [this, VDInit, Init](Address DestElement,
870  Address SrcElement) {
871  // Clean up any temporaries needed by the
872  // initialization.
873  RunCleanupsScope InitScope(*this);
874  // Emit initialization for single element.
875  setAddrOfLocalVar(VDInit, SrcElement);
876  EmitAnyExprToMem(Init, DestElement,
877  Init->getType().getQualifiers(),
878  /*IsInitializer*/ false);
879  LocalDeclMap.erase(VDInit);
880  });
881  }
882  EmitAutoVarCleanups(Emission);
883  return Emission.getAllocatedAddress();
884  });
885  } else {
886  Address OriginalAddr = OriginalLVal.getAddress(*this);
887  IsRegistered =
888  PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD,
889  ThisFirstprivateIsLastprivate,
890  OrigVD, &Lastprivates, IRef]() {
891  // Emit private VarDecl with copy init.
892  // Remap temp VDInit variable to the address of the original
893  // variable (for proper handling of captured global variables).
894  setAddrOfLocalVar(VDInit, OriginalAddr);
895  EmitDecl(*VD);
896  LocalDeclMap.erase(VDInit);
897  if (ThisFirstprivateIsLastprivate &&
898  Lastprivates[OrigVD->getCanonicalDecl()] ==
899  OMPC_LASTPRIVATE_conditional) {
900  // Create/init special variable for lastprivate conditionals.
901  Address VDAddr =
902  CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
903  *this, OrigVD);
904  llvm::Value *V = EmitLoadOfScalar(
905  MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(),
907  (*IRef)->getExprLoc());
908  EmitStoreOfScalar(V,
909  MakeAddrLValue(VDAddr, (*IRef)->getType(),
911  LocalDeclMap.erase(VD);
912  setAddrOfLocalVar(VD, VDAddr);
913  return VDAddr;
914  }
915  return GetAddrOfLocalVar(VD);
916  });
917  }
918  assert(IsRegistered &&
919  "firstprivate var already registered as private");
920  // Silence the warning about unused variable.
921  (void)IsRegistered;
922  }
923  ++IRef;
924  ++InitsRef;
925  }
926  }
927  return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
928 }
929 
931  const OMPExecutableDirective &D,
932  CodeGenFunction::OMPPrivateScope &PrivateScope) {
933  if (!HaveInsertPoint())
934  return;
935  llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
936  for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
937  auto IRef = C->varlist_begin();
938  for (const Expr *IInit : C->private_copies()) {
939  const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
940  if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
941  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
942  bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
943  // Emit private VarDecl with copy init.
944  EmitDecl(*VD);
945  return GetAddrOfLocalVar(VD);
946  });
947  assert(IsRegistered && "private var already registered as private");
948  // Silence the warning about unused variable.
949  (void)IsRegistered;
950  }
951  ++IRef;
952  }
953  }
954 }
955 
957  if (!HaveInsertPoint())
958  return false;
959  // threadprivate_var1 = master_threadprivate_var1;
960  // operator=(threadprivate_var2, master_threadprivate_var2);
961  // ...
962  // __kmpc_barrier(&loc, global_tid);
964  llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
965  for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
966  auto IRef = C->varlist_begin();
967  auto ISrcRef = C->source_exprs().begin();
968  auto IDestRef = C->destination_exprs().begin();
969  for (const Expr *AssignOp : C->assignment_ops()) {
970  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
971  QualType Type = VD->getType();
972  if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
973  // Get the address of the master variable. If we are emitting code with
974  // TLS support, the address is passed from the master as field in the
975  // captured declaration.
976  Address MasterAddr = Address::invalid();
977  if (getLangOpts().OpenMPUseTLS &&
978  getContext().getTargetInfo().isTLSSupported()) {
979  assert(CapturedStmtInfo->lookup(VD) &&
980  "Copyin threadprivates should have been captured!");
981  DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
982  (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
983  MasterAddr = EmitLValue(&DRE).getAddress(*this);
984  LocalDeclMap.erase(VD);
985  } else {
986  MasterAddr =
987  Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
988  : CGM.GetAddrOfGlobal(VD),
989  getContext().getDeclAlign(VD));
990  }
991  // Get the address of the threadprivate variable.
992  Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
993  if (CopiedVars.size() == 1) {
994  // At first check if current thread is a master thread. If it is, no
995  // need to copy data.
996  CopyBegin = createBasicBlock("copyin.not.master");
997  CopyEnd = createBasicBlock("copyin.not.master.end");
998  Builder.CreateCondBr(
999  Builder.CreateICmpNE(
1000  Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
1001  Builder.CreatePtrToInt(PrivateAddr.getPointer(),
1002  CGM.IntPtrTy)),
1003  CopyBegin, CopyEnd);
1004  EmitBlock(CopyBegin);
1005  }
1006  const auto *SrcVD =
1007  cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1008  const auto *DestVD =
1009  cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1010  EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1011  }
1012  ++IRef;
1013  ++ISrcRef;
1014  ++IDestRef;
1015  }
1016  }
1017  if (CopyEnd) {
1018  // Exit out of copying procedure for non-master thread.
1019  EmitBlock(CopyEnd, /*IsFinished=*/true);
1020  return true;
1021  }
1022  return false;
1023 }
1024 
1026  const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1027  if (!HaveInsertPoint())
1028  return false;
1029  bool HasAtLeastOneLastprivate = false;
1032  const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1033  for (const Expr *C : LoopDirective->counters()) {
1034  SIMDLCVs.insert(
1035  cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1036  }
1037  }
1038  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1039  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1040  HasAtLeastOneLastprivate = true;
1042  !getLangOpts().OpenMPSimd)
1043  break;
1044  const auto *IRef = C->varlist_begin();
1045  const auto *IDestRef = C->destination_exprs().begin();
1046  for (const Expr *IInit : C->private_copies()) {
1047  // Keep the address of the original variable for future update at the end
1048  // of the loop.
1049  const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1050  // Taskloops do not require additional initialization, it is done in
1051  // runtime support library.
1052  if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1053  const auto *DestVD =
1054  cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1055  PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
1056  DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1057  /*RefersToEnclosingVariableOrCapture=*/
1058  CapturedStmtInfo->lookup(OrigVD) != nullptr,
1059  (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1060  return EmitLValue(&DRE).getAddress(*this);
1061  });
1062  // Check if the variable is also a firstprivate: in this case IInit is
1063  // not generated. Initialization of this variable will happen in codegen
1064  // for 'firstprivate' clause.
1065  if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
1066  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1067  bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C,
1068  OrigVD]() {
1069  if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1070  Address VDAddr =
1071  CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this,
1072  OrigVD);
1073  setAddrOfLocalVar(VD, VDAddr);
1074  return VDAddr;
1075  }
1076  // Emit private VarDecl with copy init.
1077  EmitDecl(*VD);
1078  return GetAddrOfLocalVar(VD);
1079  });
1080  assert(IsRegistered &&
1081  "lastprivate var already registered as private");
1082  (void)IsRegistered;
1083  }
1084  }
1085  ++IRef;
1086  ++IDestRef;
1087  }
1088  }
1089  return HasAtLeastOneLastprivate;
1090 }
1091 
1093  const OMPExecutableDirective &D, bool NoFinals,
1094  llvm::Value *IsLastIterCond) {
1095  if (!HaveInsertPoint())
1096  return;
1097  // Emit following code:
1098  // if (<IsLastIterCond>) {
1099  // orig_var1 = private_orig_var1;
1100  // ...
1101  // orig_varn = private_orig_varn;
1102  // }
1103  llvm::BasicBlock *ThenBB = nullptr;
1104  llvm::BasicBlock *DoneBB = nullptr;
1105  if (IsLastIterCond) {
1106  // Emit implicit barrier if at least one lastprivate conditional is found
1107  // and this is not a simd mode.
1108  if (!getLangOpts().OpenMPSimd &&
1109  llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1110  [](const OMPLastprivateClause *C) {
1111  return C->getKind() == OMPC_LASTPRIVATE_conditional;
1112  })) {
1113  CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1114  OMPD_unknown,
1115  /*EmitChecks=*/false,
1116  /*ForceSimpleCall=*/true);
1117  }
1118  ThenBB = createBasicBlock(".omp.lastprivate.then");
1119  DoneBB = createBasicBlock(".omp.lastprivate.done");
1120  Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1121  EmitBlock(ThenBB);
1122  }
1123  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1124  llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1125  if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1126  auto IC = LoopDirective->counters().begin();
1127  for (const Expr *F : LoopDirective->finals()) {
1128  const auto *D =
1129  cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1130  if (NoFinals)
1131  AlreadyEmittedVars.insert(D);
1132  else
1133  LoopCountersAndUpdates[D] = F;
1134  ++IC;
1135  }
1136  }
1137  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1138  auto IRef = C->varlist_begin();
1139  auto ISrcRef = C->source_exprs().begin();
1140  auto IDestRef = C->destination_exprs().begin();
1141  for (const Expr *AssignOp : C->assignment_ops()) {
1142  const auto *PrivateVD =
1143  cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1144  QualType Type = PrivateVD->getType();
1145  const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1146  if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1147  // If lastprivate variable is a loop control variable for loop-based
1148  // directive, update its value before copyin back to original
1149  // variable.
1150  if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1151  EmitIgnoredExpr(FinalExpr);
1152  const auto *SrcVD =
1153  cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1154  const auto *DestVD =
1155  cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1156  // Get the address of the private variable.
1157  Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1158  if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1159  PrivateAddr =
1160  Address(Builder.CreateLoad(PrivateAddr),
1161  CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1162  // Store the last value to the private copy in the last iteration.
1163  if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1164  CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1165  *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1166  (*IRef)->getExprLoc());
1167  // Get the address of the original variable.
1168  Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1169  EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1170  }
1171  ++IRef;
1172  ++ISrcRef;
1173  ++IDestRef;
1174  }
1175  if (const Expr *PostUpdate = C->getPostUpdateExpr())
1176  EmitIgnoredExpr(PostUpdate);
1177  }
1178  if (IsLastIterCond)
1179  EmitBlock(DoneBB, /*IsFinished=*/true);
1180 }
1181 
1183  const OMPExecutableDirective &D,
1184  CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1185  if (!HaveInsertPoint())
1186  return;
1189  SmallVector<const Expr *, 4> ReductionOps;
1192  OMPTaskDataTy Data;
1195  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1196  if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1197  continue;
1198  Shareds.append(C->varlist_begin(), C->varlist_end());
1199  Privates.append(C->privates().begin(), C->privates().end());
1200  ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1201  LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1202  RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1203  if (C->getModifier() == OMPC_REDUCTION_task) {
1204  Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1205  Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1206  Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1207  Data.ReductionOps.append(C->reduction_ops().begin(),
1208  C->reduction_ops().end());
1209  TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1210  TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1211  }
1212  }
1213  ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1214  unsigned Count = 0;
1215  auto *ILHS = LHSs.begin();
1216  auto *IRHS = RHSs.begin();
1217  auto *IPriv = Privates.begin();
1218  for (const Expr *IRef : Shareds) {
1219  const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1220  // Emit private VarDecl with reduction init.
1221  RedCG.emitSharedOrigLValue(*this, Count);
1222  RedCG.emitAggregateType(*this, Count);
1223  AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1224  RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1225  RedCG.getSharedLValue(Count),
1226  [&Emission](CodeGenFunction &CGF) {
1227  CGF.EmitAutoVarInit(Emission);
1228  return true;
1229  });
1230  EmitAutoVarCleanups(Emission);
1231  Address BaseAddr = RedCG.adjustPrivateAddress(
1232  *this, Count, Emission.getAllocatedAddress());
1233  bool IsRegistered = PrivateScope.addPrivate(
1234  RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
1235  assert(IsRegistered && "private var already registered as private");
1236  // Silence the warning about unused variable.
1237  (void)IsRegistered;
1238 
1239  const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1240  const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1241  QualType Type = PrivateVD->getType();
1242  bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1243  if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1244  // Store the address of the original variable associated with the LHS
1245  // implicit variable.
1246  PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1247  return RedCG.getSharedLValue(Count).getAddress(*this);
1248  });
1249  PrivateScope.addPrivate(
1250  RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
1251  } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1252  isa<ArraySubscriptExpr>(IRef)) {
1253  // Store the address of the original variable associated with the LHS
1254  // implicit variable.
1255  PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1256  return RedCG.getSharedLValue(Count).getAddress(*this);
1257  });
1258  PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
1259  return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1260  ConvertTypeForMem(RHSVD->getType()),
1261  "rhs.begin");
1262  });
1263  } else {
1264  QualType Type = PrivateVD->getType();
1265  bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1266  Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this);
1267  // Store the address of the original variable associated with the LHS
1268  // implicit variable.
1269  if (IsArray) {
1270  OriginalAddr = Builder.CreateElementBitCast(
1271  OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1272  }
1273  PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
1274  PrivateScope.addPrivate(
1275  RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
1276  return IsArray
1277  ? Builder.CreateElementBitCast(
1278  GetAddrOfLocalVar(PrivateVD),
1279  ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1280  : GetAddrOfLocalVar(PrivateVD);
1281  });
1282  }
1283  ++ILHS;
1284  ++IRHS;
1285  ++IPriv;
1286  ++Count;
1287  }
1288  if (!Data.ReductionVars.empty()) {
1289  Data.IsReductionWithTaskMod = true;
1290  Data.IsWorksharingReduction =
1292  llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1293  *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1294  const Expr *TaskRedRef = nullptr;
1295  switch (D.getDirectiveKind()) {
1296  case OMPD_parallel:
1297  TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1298  break;
1299  case OMPD_for:
1300  TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1301  break;
1302  case OMPD_sections:
1303  TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1304  break;
1305  case OMPD_parallel_for:
1306  TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1307  break;
1308  case OMPD_parallel_master:
1309  TaskRedRef =
1310  cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1311  break;
1312  case OMPD_parallel_sections:
1313  TaskRedRef =
1314  cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1315  break;
1316  case OMPD_target_parallel:
1317  TaskRedRef =
1318  cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1319  break;
1320  case OMPD_target_parallel_for:
1321  TaskRedRef =
1322  cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1323  break;
1324  case OMPD_distribute_parallel_for:
1325  TaskRedRef =
1326  cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1327  break;
1328  case OMPD_teams_distribute_parallel_for:
1329  TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
1330  .getTaskReductionRefExpr();
1331  break;
1332  case OMPD_target_teams_distribute_parallel_for:
1333  TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
1334  .getTaskReductionRefExpr();
1335  break;
1336  case OMPD_simd:
1337  case OMPD_for_simd:
1338  case OMPD_section:
1339  case OMPD_single:
1340  case OMPD_master:
1341  case OMPD_critical:
1342  case OMPD_parallel_for_simd:
1343  case OMPD_task:
1344  case OMPD_taskyield:
1345  case OMPD_barrier:
1346  case OMPD_taskwait:
1347  case OMPD_taskgroup:
1348  case OMPD_flush:
1349  case OMPD_depobj:
1350  case OMPD_scan:
1351  case OMPD_ordered:
1352  case OMPD_atomic:
1353  case OMPD_teams:
1354  case OMPD_target:
1355  case OMPD_cancellation_point:
1356  case OMPD_cancel:
1357  case OMPD_target_data:
1358  case OMPD_target_enter_data:
1359  case OMPD_target_exit_data:
1360  case OMPD_taskloop:
1361  case OMPD_taskloop_simd:
1362  case OMPD_master_taskloop:
1363  case OMPD_master_taskloop_simd:
1364  case OMPD_parallel_master_taskloop:
1365  case OMPD_parallel_master_taskloop_simd:
1366  case OMPD_distribute:
1367  case OMPD_target_update:
1368  case OMPD_distribute_parallel_for_simd:
1369  case OMPD_distribute_simd:
1370  case OMPD_target_parallel_for_simd:
1371  case OMPD_target_simd:
1372  case OMPD_teams_distribute:
1373  case OMPD_teams_distribute_simd:
1374  case OMPD_teams_distribute_parallel_for_simd:
1375  case OMPD_target_teams:
1376  case OMPD_target_teams_distribute:
1377  case OMPD_target_teams_distribute_parallel_for_simd:
1378  case OMPD_target_teams_distribute_simd:
1379  case OMPD_declare_target:
1380  case OMPD_end_declare_target:
1381  case OMPD_threadprivate:
1382  case OMPD_allocate:
1383  case OMPD_declare_reduction:
1384  case OMPD_declare_mapper:
1385  case OMPD_declare_simd:
1386  case OMPD_requires:
1387  case OMPD_declare_variant:
1388  case OMPD_begin_declare_variant:
1389  case OMPD_end_declare_variant:
1390  case OMPD_unknown:
1391  default:
1392  llvm_unreachable("Enexpected directive with task reductions.");
1393  }
1394 
1395  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1396  EmitVarDecl(*VD);
1397  EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1398  /*Volatile=*/false, TaskRedRef->getType());
1399  }
1400 }
1401 
1403  const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1404  if (!HaveInsertPoint())
1405  return;
1410  bool HasAtLeastOneReduction = false;
1411  bool IsReductionWithTaskMod = false;
1412  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1413  // Do not emit for inscan reductions.
1414  if (C->getModifier() == OMPC_REDUCTION_inscan)
1415  continue;
1416  HasAtLeastOneReduction = true;
1417  Privates.append(C->privates().begin(), C->privates().end());
1418  LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1419  RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1420  ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1421  IsReductionWithTaskMod =
1422  IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1423  }
1424  if (HasAtLeastOneReduction) {
1425  if (IsReductionWithTaskMod) {
1426  CGM.getOpenMPRuntime().emitTaskReductionFini(
1427  *this, D.getBeginLoc(),
1429  }
1430  bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1432  ReductionKind == OMPD_simd;
1433  bool SimpleReduction = ReductionKind == OMPD_simd;
1434  // Emit nowait reduction if nowait clause is present or directive is a
1435  // parallel directive (it always has implicit barrier).
1436  CGM.getOpenMPRuntime().emitReduction(
1437  *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1438  {WithNowait, SimpleReduction, ReductionKind});
1439  }
1440 }
1441 
1443  CodeGenFunction &CGF, const OMPExecutableDirective &D,
1444  const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1445  if (!CGF.HaveInsertPoint())
1446  return;
1447  llvm::BasicBlock *DoneBB = nullptr;
1448  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1449  if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1450  if (!DoneBB) {
1451  if (llvm::Value *Cond = CondGen(CGF)) {
1452  // If the first post-update expression is found, emit conditional
1453  // block if it was requested.
1454  llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1455  DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1456  CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1457  CGF.EmitBlock(ThenBB);
1458  }
1459  }
1460  CGF.EmitIgnoredExpr(PostUpdate);
1461  }
1462  }
1463  if (DoneBB)
1464  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1465 }
1466 
1467 namespace {
1468 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1469 /// parallel function. This is necessary for combined constructs such as
1470 /// 'distribute parallel for'
1471 typedef llvm::function_ref<void(CodeGenFunction &,
1472  const OMPExecutableDirective &,
1474  CodeGenBoundParametersTy;
1475 } // anonymous namespace
1476 
1477 static void
1479  const OMPExecutableDirective &S) {
1480  if (CGF.getLangOpts().OpenMP < 50)
1481  return;
1483  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1484  for (const Expr *Ref : C->varlists()) {
1485  if (!Ref->getType()->isScalarType())
1486  continue;
1487  const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1488  if (!DRE)
1489  continue;
1490  PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1492  }
1493  }
1494  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1495  for (const Expr *Ref : C->varlists()) {
1496  if (!Ref->getType()->isScalarType())
1497  continue;
1498  const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1499  if (!DRE)
1500  continue;
1501  PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1503  }
1504  }
1505  for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1506  for (const Expr *Ref : C->varlists()) {
1507  if (!Ref->getType()->isScalarType())
1508  continue;
1509  const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1510  if (!DRE)
1511  continue;
1512  PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1514  }
1515  }
1516  // Privates should ne analyzed since they are not captured at all.
1517  // Task reductions may be skipped - tasks are ignored.
1518  // Firstprivates do not return value but may be passed by reference - no need
1519  // to check for updated lastprivate conditional.
1520  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1521  for (const Expr *Ref : C->varlists()) {
1522  if (!Ref->getType()->isScalarType())
1523  continue;
1524  const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1525  if (!DRE)
1526  continue;
1527  PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1528  }
1529  }
1531  CGF, S, PrivateDecls);
1532 }
1533 
1535  CodeGenFunction &CGF, const OMPExecutableDirective &S,
1536  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1537  const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1538  const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1539  llvm::Function *OutlinedFn =
1541  S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1542  if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1543  CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1544  llvm::Value *NumThreads =
1545  CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1546  /*IgnoreResultAssign=*/true);
1548  CGF, NumThreads, NumThreadsClause->getBeginLoc());
1549  }
1550  if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1551  CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1553  CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1554  }
1555  const Expr *IfCond = nullptr;
1556  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1557  if (C->getNameModifier() == OMPD_unknown ||
1558  C->getNameModifier() == OMPD_parallel) {
1559  IfCond = C->getCondition();
1560  break;
1561  }
1562  }
1563 
1564  OMPParallelScope Scope(CGF, S);
1566  // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1567  // lower and upper bounds with the pragma 'for' chunking mechanism.
1568  // The following lambda takes care of appending the lower and upper bound
1569  // parameters when necessary
1570  CodeGenBoundParameters(CGF, S, CapturedVars);
1571  CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1572  CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1573  CapturedVars, IfCond);
1574 }
1575 
1576 static bool isAllocatableDecl(const VarDecl *VD) {
1577  const VarDecl *CVD = VD->getCanonicalDecl();
1578  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1579  return false;
1580  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1581  // Use the default allocation.
1582  return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1583  AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1584  !AA->getAllocator());
1585 }
1586 
1588  const OMPExecutableDirective &,
1590 
1592  CodeGenFunction &CGF, const VarDecl *VD) {
1593  CodeGenModule &CGM = CGF.CGM;
1594  auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1595 
1596  if (!VD)
1597  return Address::invalid();
1598  const VarDecl *CVD = VD->getCanonicalDecl();
1599  if (!isAllocatableDecl(CVD))
1600  return Address::invalid();
1601  llvm::Value *Size;
1602  CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1603  if (CVD->getType()->isVariablyModifiedType()) {
1604  Size = CGF.getTypeSize(CVD->getType());
1605  // Align the size: ((size + align - 1) / align) * align
1606  Size = CGF.Builder.CreateNUWAdd(
1607  Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1608  Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1609  Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1610  } else {
1611  CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1612  Size = CGM.getSize(Sz.alignTo(Align));
1613  }
1614 
1615  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1616  assert(AA->getAllocator() &&
1617  "Expected allocator expression for non-default allocator.");
1618  llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1619  // According to the standard, the original allocator type is a enum (integer).
1620  // Convert to pointer type, if required.
1621  if (Allocator->getType()->isIntegerTy())
1622  Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1623  else if (Allocator->getType()->isPointerTy())
1624  Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1625  CGM.VoidPtrTy);
1626 
1627  llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1628  CGF.Builder, Size, Allocator,
1629  getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1630  llvm::CallInst *FreeCI =
1631  OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
1632 
1633  CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1635  Addr,
1637  getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1638  return Address(Addr, Align);
1639 }
1640 
1642  CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1643  SourceLocation Loc) {
1644  CodeGenModule &CGM = CGF.CGM;
1645  if (CGM.getLangOpts().OpenMPUseTLS &&
1647  return VDAddr;
1648 
1649  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1650 
1651  llvm::Type *VarTy = VDAddr.getElementType();
1652  llvm::Value *Data =
1653  CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
1654  llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1655  std::string Suffix = getNameWithSeparators({"cache", ""});
1656  llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1657 
1658  llvm::CallInst *ThreadPrivateCacheCall =
1659  OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1660 
1661  return Address(ThreadPrivateCacheCall, VDAddr.getAlignment());
1662 }
1663 
1665  ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1666  SmallString<128> Buffer;
1667  llvm::raw_svector_ostream OS(Buffer);
1668  StringRef Sep = FirstSeparator;
1669  for (StringRef Part : Parts) {
1670  OS << Sep << Part;
1671  Sep = Separator;
1672  }
1673  return OS.str().str();
1674 }
1676  if (CGM.getLangOpts().OpenMPIRBuilder) {
1677  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1678  // Check if we have any if clause associated with the directive.
1679  llvm::Value *IfCond = nullptr;
1680  if (const auto *C = S.getSingleClause<OMPIfClause>())
1681  IfCond = EmitScalarExpr(C->getCondition(),
1682  /*IgnoreResultAssign=*/true);
1683 
1684  llvm::Value *NumThreads = nullptr;
1685  if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1686  NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1687  /*IgnoreResultAssign=*/true);
1688 
1689  ProcBindKind ProcBind = OMP_PROC_BIND_default;
1690  if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1691  ProcBind = ProcBindClause->getProcBindKind();
1692 
1693  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1694 
1695  // The cleanup callback that finalizes all variabels at the given location,
1696  // thus calls destructors etc.
1697  auto FiniCB = [this](InsertPointTy IP) {
1698  OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1699  };
1700 
1701  // Privatization callback that performs appropriate action for
1702  // shared/private/firstprivate/lastprivate/copyin/... variables.
1703  //
1704  // TODO: This defaults to shared right now.
1705  auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1706  llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1707  // The next line is appropriate only for variables (Val) with the
1708  // data-sharing attribute "shared".
1709  ReplVal = &Val;
1710 
1711  return CodeGenIP;
1712  };
1713 
1714  const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1715  const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1716 
1717  auto BodyGenCB = [ParallelRegionBodyStmt,
1718  this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1719  llvm::BasicBlock &ContinuationBB) {
1720  OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP,
1721  ContinuationBB);
1722  OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt,
1723  CodeGenIP, ContinuationBB);
1724  };
1725 
1726  CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1727  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1728  llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1729  AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1730  Builder.restoreIP(
1731  OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1732  IfCond, NumThreads, ProcBind, S.hasCancel()));
1733  return;
1734  }
1735 
1736  // Emit parallel region as a standalone region.
1737  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1738  Action.Enter(CGF);
1739  OMPPrivateScope PrivateScope(CGF);
1740  bool Copyins = CGF.EmitOMPCopyinClause(S);
1741  (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1742  if (Copyins) {
1743  // Emit implicit barrier to synchronize threads and avoid data races on
1744  // propagation master's thread values of threadprivate variables to local
1745  // instances of that variables of all other implicit threads.
1747  CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1748  /*ForceSimpleCall=*/true);
1749  }
1750  CGF.EmitOMPPrivateClause(S, PrivateScope);
1751  CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1752  (void)PrivateScope.Privatize();
1753  CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1754  CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1755  };
1756  {
1757  auto LPCRegion =
1759  emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1762  [](CodeGenFunction &) { return nullptr; });
1763  }
1764  // Check for outer lastprivate conditional update.
1766 }
1767 
1768 namespace {
1769 /// RAII to handle scopes for loop transformation directives.
1770 class OMPTransformDirectiveScopeRAII {
1771  OMPLoopScope *Scope = nullptr;
1772  CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1773  CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1774 
1775 public:
1776  OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1777  if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) {
1778  Scope = new OMPLoopScope(CGF, *Dir);
1780  CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1781  }
1782  }
1783  ~OMPTransformDirectiveScopeRAII() {
1784  if (!Scope)
1785  return;
1786  delete CapInfoRAII;
1787  delete CGSI;
1788  delete Scope;
1789  }
1790 };
1791 } // namespace
1792 
1793 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1794  int MaxLevel, int Level = 0) {
1795  assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1796  const Stmt *SimplifiedS = S->IgnoreContainers();
1797  if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1798  PrettyStackTraceLoc CrashInfo(
1799  CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1800  "LLVM IR generation of compound statement ('{}')");
1801 
1802  // Keep track of the current cleanup stack depth, including debug scopes.
1803  CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1804  for (const Stmt *CurStmt : CS->body())
1805  emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1806  return;
1807  }
1808  if (SimplifiedS == NextLoop) {
1809  OMPTransformDirectiveScopeRAII PossiblyTransformDirectiveScope(CGF,
1810  SimplifiedS);
1811  if (auto *Dir = dyn_cast<OMPTileDirective>(SimplifiedS))
1812  SimplifiedS = Dir->getTransformedStmt();
1813  if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
1814  SimplifiedS = CanonLoop->getLoopStmt();
1815  if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1816  S = For->getBody();
1817  } else {
1818  assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1819  "Expected canonical for loop or range-based for loop.");
1820  const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
1821  CGF.EmitStmt(CXXFor->getLoopVarStmt());
1822  S = CXXFor->getBody();
1823  }
1824  if (Level + 1 < MaxLevel) {
1826  S, /*TryImperfectlyNestedLoops=*/true);
1827  emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
1828  return;
1829  }
1830  }
1831  CGF.EmitStmt(S);
1832 }
1833 
1835  JumpDest LoopExit) {
1836  RunCleanupsScope BodyScope(*this);
1837  // Update counters values on current iteration.
1838  for (const Expr *UE : D.updates())
1839  EmitIgnoredExpr(UE);
1840  // Update the linear variables.
1841  // In distribute directives only loop counters may be marked as linear, no
1842  // need to generate the code for them.
1844  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1845  for (const Expr *UE : C->updates())
1846  EmitIgnoredExpr(UE);
1847  }
1848  }
1849 
1850  // On a continue in the body, jump to the end.
1851  JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1852  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1853  for (const Expr *E : D.finals_conditions()) {
1854  if (!E)
1855  continue;
1856  // Check that loop counter in non-rectangular nest fits into the iteration
1857  // space.
1858  llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1859  EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1860  getProfileCount(D.getBody()));
1861  EmitBlock(NextBB);
1862  }
1863 
1864  OMPPrivateScope InscanScope(*this);
1865  EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1866  bool IsInscanRegion = InscanScope.Privatize();
1867  if (IsInscanRegion) {
1868  // Need to remember the block before and after scan directive
1869  // to dispatch them correctly depending on the clause used in
1870  // this directive, inclusive or exclusive. For inclusive scan the natural
1871  // order of the blocks is used, for exclusive clause the blocks must be
1872  // executed in reverse order.
1873  OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
1874  OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
1875  // No need to allocate inscan exit block, in simd mode it is selected in the
1876  // codegen for the scan directive.
1877  if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
1878  OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
1879  OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
1880  EmitBranch(OMPScanDispatch);
1881  EmitBlock(OMPBeforeScanBlock);
1882  }
1883 
1884  // Emit loop variables for C++ range loops.
1885  const Stmt *Body =
1887  // Emit loop body.
1888  emitBody(*this, Body,
1890  Body, /*TryImperfectlyNestedLoops=*/true),
1891  D.getLoopsNumber());
1892 
1893  // Jump to the dispatcher at the end of the loop body.
1894  if (IsInscanRegion)
1895  EmitBranch(OMPScanExitBlock);
1896 
1897  // The end (updates/cleanups).
1898  EmitBlock(Continue.getBlock());
1899  BreakContinueStack.pop_back();
1900 }
1901 
1902 using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
1903 
1904 /// Emit a captured statement and return the function as well as its captured
1905 /// closure context.
1907  const CapturedStmt *S) {
1908  LValue CapStruct = ParentCGF.InitCapturedStruct(*S);
1909  CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
1910  std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
1911  std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S);
1912  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
1913  llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S);
1914 
1915  return {F, CapStruct.getPointer(ParentCGF)};
1916 }
1917 
1918 /// Emit a call to a previously captured closure.
1919 static llvm::CallInst *
1922  // Append the closure context to the argument.
1923  SmallVector<llvm::Value *> EffectiveArgs;
1924  EffectiveArgs.reserve(Args.size() + 1);
1925  llvm::append_range(EffectiveArgs, Args);
1926  EffectiveArgs.push_back(Cap.second);
1927 
1928  return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs);
1929 }
1930 
1931 llvm::CanonicalLoopInfo *
1933  assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
1934 
1935  EmitStmt(S);
1936  assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
1937 
1938  // The last added loop is the outermost one.
1939  return OMPLoopNestStack.back();
1940 }
1941 
1943  const Stmt *SyntacticalLoop = S->getLoopStmt();
1944  if (!getLangOpts().OpenMPIRBuilder) {
1945  // Ignore if OpenMPIRBuilder is not enabled.
1946  EmitStmt(SyntacticalLoop);
1947  return;
1948  }
1949 
1950  LexicalScope ForScope(*this, S->getSourceRange());
1951 
1952  // Emit init statements. The Distance/LoopVar funcs may reference variable
1953  // declarations they contain.
1954  const Stmt *BodyStmt;
1955  if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) {
1956  if (const Stmt *InitStmt = For->getInit())
1957  EmitStmt(InitStmt);
1958  BodyStmt = For->getBody();
1959  } else if (const auto *RangeFor =
1960  dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) {
1961  if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
1962  EmitStmt(RangeStmt);
1963  if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
1964  EmitStmt(BeginStmt);
1965  if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
1966  EmitStmt(EndStmt);
1967  if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
1968  EmitStmt(LoopVarStmt);
1969  BodyStmt = RangeFor->getBody();
1970  } else
1971  llvm_unreachable("Expected for-stmt or range-based for-stmt");
1972 
1973  // Emit closure for later use. By-value captures will be captured here.
1974  const CapturedStmt *DistanceFunc = S->getDistanceFunc();
1975  EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
1976  const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
1977  EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
1978 
1979  // Call the distance function to get the number of iterations of the loop to
1980  // come.
1981  QualType LogicalTy = DistanceFunc->getCapturedDecl()
1982  ->getParam(0)
1983  ->getType()
1985  Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
1986  emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
1987  llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count");
1988 
1989  // Emit the loop structure.
1990  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1991  auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
1992  llvm::Value *IndVar) {
1993  Builder.restoreIP(CodeGenIP);
1994 
1995  // Emit the loop body: Convert the logical iteration number to the loop
1996  // variable and emit the body.
1997  const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
1998  LValue LCVal = EmitLValue(LoopVarRef);
1999  Address LoopVarAddress = LCVal.getAddress(*this);
2000  emitCapturedStmtCall(*this, LoopVarClosure,
2001  {LoopVarAddress.getPointer(), IndVar});
2002 
2003  RunCleanupsScope BodyScope(*this);
2004  EmitStmt(BodyStmt);
2005  };
2006  llvm::CanonicalLoopInfo *CL =
2007  OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal);
2008 
2009  // Finish up the loop.
2010  Builder.restoreIP(CL->getAfterIP());
2011  ForScope.ForceCleanup();
2012 
2013  // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2014  OMPLoopNestStack.push_back(CL);
2015 }
2016 
2018  const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2019  const Expr *IncExpr,
2020  const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2021  const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2022  auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
2023 
2024  // Start the loop with a block that tests the condition.
2025  auto CondBlock = createBasicBlock("omp.inner.for.cond");
2026  EmitBlock(CondBlock);
2027  const SourceRange R = S.getSourceRange();
2028 
2029  // If attributes are attached, push to the basic block with them.
2030  const auto &OMPED = cast<OMPExecutableDirective>(S);
2031  const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2032  const Stmt *SS = ICS->getCapturedStmt();
2033  const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
2034  OMPLoopNestStack.clear();
2035  if (AS)
2036  LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
2037  AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
2038  SourceLocToDebugLoc(R.getEnd()));
2039  else
2040  LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2041  SourceLocToDebugLoc(R.getEnd()));
2042 
2043  // If there are any cleanups between here and the loop-exit scope,
2044  // create a block to stage a loop exit along.
2045  llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2046  if (RequiresCleanup)
2047  ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
2048 
2049  llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
2050 
2051  // Emit condition.
2052  EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
2053  if (ExitBlock != LoopExit.getBlock()) {
2054  EmitBlock(ExitBlock);
2055  EmitBranchThroughCleanup(LoopExit);
2056  }
2057 
2058  EmitBlock(LoopBody);
2059  incrementProfileCounter(&S);
2060 
2061  // Create a block for the increment.
2062  JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
2063  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2064 
2065  BodyGen(*this);
2066 
2067  // Emit "IV = IV + 1" and a back-edge to the condition block.
2068  EmitBlock(Continue.getBlock());
2069  EmitIgnoredExpr(IncExpr);
2070  PostIncGen(*this);
2071  BreakContinueStack.pop_back();
2072  EmitBranch(CondBlock);
2073  LoopStack.pop();
2074  // Emit the fall-through block.
2075  EmitBlock(LoopExit.getBlock());
2076 }
2077 
2079  if (!HaveInsertPoint())
2080  return false;
2081  // Emit inits for the linear variables.
2082  bool HasLinears = false;
2083  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2084  for (const Expr *Init : C->inits()) {
2085  HasLinears = true;
2086  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
2087  if (const auto *Ref =
2088  dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
2089  AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
2090  const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
2091  DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2092  CapturedStmtInfo->lookup(OrigVD) != nullptr,
2093  VD->getInit()->getType(), VK_LValue,
2094  VD->getInit()->getExprLoc());
2095  EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
2096  VD->getType()),
2097  /*capturedByInit=*/false);
2098  EmitAutoVarCleanups(Emission);
2099  } else {
2100  EmitVarDecl(*VD);
2101  }
2102  }
2103  // Emit the linear steps for the linear clauses.
2104  // If a step is not constant, it is pre-calculated before the loop.
2105  if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
2106  if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
2107  EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
2108  // Emit calculation of the linear step.
2109  EmitIgnoredExpr(CS);
2110  }
2111  }
2112  return HasLinears;
2113 }
2114 
2116  const OMPLoopDirective &D,
2117  const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2118  if (!HaveInsertPoint())
2119  return;
2120  llvm::BasicBlock *DoneBB = nullptr;
2121  // Emit the final values of the linear variables.
2122  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2123  auto IC = C->varlist_begin();
2124  for (const Expr *F : C->finals()) {
2125  if (!DoneBB) {
2126  if (llvm::Value *Cond = CondGen(*this)) {
2127  // If the first post-update expression is found, emit conditional
2128  // block if it was requested.
2129  llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
2130  DoneBB = createBasicBlock(".omp.linear.pu.done");
2131  Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2132  EmitBlock(ThenBB);
2133  }
2134  }
2135  const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
2136  DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2137  CapturedStmtInfo->lookup(OrigVD) != nullptr,
2138  (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2139  Address OrigAddr = EmitLValue(&DRE).getAddress(*this);
2140  CodeGenFunction::OMPPrivateScope VarScope(*this);
2141  VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
2142  (void)VarScope.Privatize();
2143  EmitIgnoredExpr(F);
2144  ++IC;
2145  }
2146  if (const Expr *PostUpdate = C->getPostUpdateExpr())
2147  EmitIgnoredExpr(PostUpdate);
2148  }
2149  if (DoneBB)
2150  EmitBlock(DoneBB, /*IsFinished=*/true);
2151 }
2152 
2154  const OMPExecutableDirective &D) {
2155  if (!CGF.HaveInsertPoint())
2156  return;
2157  for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2158  llvm::APInt ClauseAlignment(64, 0);
2159  if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2160  auto *AlignmentCI =
2161  cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2162  ClauseAlignment = AlignmentCI->getValue();
2163  }
2164  for (const Expr *E : Clause->varlists()) {
2165  llvm::APInt Alignment(ClauseAlignment);
2166  if (Alignment == 0) {
2167  // OpenMP [2.8.1, Description]
2168  // If no optional parameter is specified, implementation-defined default
2169  // alignments for SIMD instructions on the target platforms are assumed.
2170  Alignment =
2171  CGF.getContext()
2173  E->getType()->getPointeeType()))
2174  .getQuantity();
2175  }
2176  assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2177  "alignment is not power of 2");
2178  if (Alignment != 0) {
2179  llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2181  PtrValue, E, /*No second loc needed*/ SourceLocation(),
2182  llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2183  }
2184  }
2185  }
2186 }
2187 
2189  const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2190  if (!HaveInsertPoint())
2191  return;
2192  auto I = S.private_counters().begin();
2193  for (const Expr *E : S.counters()) {
2194  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2195  const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2196  // Emit var without initialization.
2197  AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
2198  EmitAutoVarCleanups(VarEmission);
2199  LocalDeclMap.erase(PrivateVD);
2200  (void)LoopScope.addPrivate(VD, [&VarEmission]() {
2201  return VarEmission.getAllocatedAddress();
2202  });
2203  if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
2204  VD->hasGlobalStorage()) {
2205  (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
2206  DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2207  LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
2208  E->getType(), VK_LValue, E->getExprLoc());
2209  return EmitLValue(&DRE).getAddress(*this);
2210  });
2211  } else {
2212  (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
2213  return VarEmission.getAllocatedAddress();
2214  });
2215  }
2216  ++I;
2217  }
2218  // Privatize extra loop counters used in loops for ordered(n) clauses.
2219  for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2220  if (!C->getNumForLoops())
2221  continue;
2222  for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2223  I < E; ++I) {
2224  const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2225  const auto *VD = cast<VarDecl>(DRE->getDecl());
2226  // Override only those variables that can be captured to avoid re-emission
2227  // of the variables declared within the loops.
2228  if (DRE->refersToEnclosingVariableOrCapture()) {
2229  (void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
2230  return CreateMemTemp(DRE->getType(), VD->getName());
2231  });
2232  }
2233  }
2234  }
2235 }
2236 
2237 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2238  const Expr *Cond, llvm::BasicBlock *TrueBlock,
2239  llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2240  if (!CGF.HaveInsertPoint())
2241  return;
2242  {
2243  CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2244  CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
2245  (void)PreCondScope.Privatize();
2246  // Get initial values of real counters.
2247  for (const Expr *I : S.inits()) {
2248  CGF.EmitIgnoredExpr(I);
2249  }
2250  }
2251  // Create temp loop control variables with their init values to support
2252  // non-rectangular loops.
2253  CodeGenFunction::OMPMapVars PreCondVars;
2254  for (const Expr * E: S.dependent_counters()) {
2255  if (!E)
2256  continue;
2257  assert(!E->getType().getNonReferenceType()->isRecordType() &&
2258  "dependent counter must not be an iterator.");
2259  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2260  Address CounterAddr =
2262  (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
2263  }
2264  (void)PreCondVars.apply(CGF);
2265  for (const Expr *E : S.dependent_inits()) {
2266  if (!E)
2267  continue;
2268  CGF.EmitIgnoredExpr(E);
2269  }
2270  // Check that loop is executed at least one time.
2271  CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2272  PreCondVars.restore(CGF);
2273 }
2274 
2276  const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2277  if (!HaveInsertPoint())
2278  return;
2281  const auto *LoopDirective = cast<OMPLoopDirective>(&D);
2282  for (const Expr *C : LoopDirective->counters()) {
2283  SIMDLCVs.insert(
2284  cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
2285  }
2286  }
2287  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2288  auto CurPrivate = C->privates().begin();
2289  for (const Expr *E : C->varlists()) {
2290  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2291  const auto *PrivateVD =
2292  cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2293  if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2294  bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() {
2295  // Emit private VarDecl with copy init.
2296  EmitVarDecl(*PrivateVD);
2297  return GetAddrOfLocalVar(PrivateVD);
2298  });
2299  assert(IsRegistered && "linear var already registered as private");
2300  // Silence the warning about unused variable.
2301  (void)IsRegistered;
2302  } else {
2303  EmitVarDecl(*PrivateVD);
2304  }
2305  ++CurPrivate;
2306  }
2307  }
2308 }
2309 
2311  const OMPExecutableDirective &D,
2312  bool IsMonotonic) {
2313  if (!CGF.HaveInsertPoint())
2314  return;
2315  if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2316  RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2317  /*ignoreResult=*/true);
2318  auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2319  CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2320  // In presence of finite 'safelen', it may be unsafe to mark all
2321  // the memory instructions parallel, because loop-carried
2322  // dependences of 'safelen' iterations are possible.
2323  if (!IsMonotonic)
2325  } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2326  RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2327  /*ignoreResult=*/true);
2328  auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2329  CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2330  // In presence of finite 'safelen', it may be unsafe to mark all
2331  // the memory instructions parallel, because loop-carried
2332  // dependences of 'safelen' iterations are possible.
2333  CGF.LoopStack.setParallel(/*Enable=*/false);
2334  }
2335 }
2336 
2338  bool IsMonotonic) {
2339  // Walk clauses and process safelen/lastprivate.
2340  LoopStack.setParallel(!IsMonotonic);
2341  LoopStack.setVectorizeEnable();
2342  emitSimdlenSafelenClause(*this, D, IsMonotonic);
2343  if (const auto *C = D.getSingleClause<OMPOrderClause>())
2344  if (C->getKind() == OMPC_ORDER_concurrent)
2345  LoopStack.setParallel(/*Enable=*/true);
2346  if ((D.getDirectiveKind() == OMPD_simd ||
2347  (getLangOpts().OpenMPSimd &&
2349  llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2350  [](const OMPReductionClause *C) {
2351  return C->getModifier() == OMPC_REDUCTION_inscan;
2352  }))
2353  // Disable parallel access in case of prefix sum.
2354  LoopStack.setParallel(/*Enable=*/false);
2355 }
2356 
2358  const OMPLoopDirective &D,
2359  const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2360  if (!HaveInsertPoint())
2361  return;
2362  llvm::BasicBlock *DoneBB = nullptr;
2363  auto IC = D.counters().begin();
2364  auto IPC = D.private_counters().begin();
2365  for (const Expr *F : D.finals()) {
2366  const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
2367  const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
2368  const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
2369  if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
2370  OrigVD->hasGlobalStorage() || CED) {
2371  if (!DoneBB) {
2372  if (llvm::Value *Cond = CondGen(*this)) {
2373  // If the first post-update expression is found, emit conditional
2374  // block if it was requested.
2375  llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
2376  DoneBB = createBasicBlock(".omp.final.done");
2377  Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2378  EmitBlock(ThenBB);
2379  }
2380  }
2381  Address OrigAddr = Address::invalid();
2382  if (CED) {
2383  OrigAddr =
2384  EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this);
2385  } else {
2386  DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2387  /*RefersToEnclosingVariableOrCapture=*/false,
2388  (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2389  OrigAddr = EmitLValue(&DRE).getAddress(*this);
2390  }
2391  OMPPrivateScope VarScope(*this);
2392  VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
2393  (void)VarScope.Privatize();
2394  EmitIgnoredExpr(F);
2395  }
2396  ++IC;
2397  ++IPC;
2398  }
2399  if (DoneBB)
2400  EmitBlock(DoneBB, /*IsFinished=*/true);
2401 }
2402 
2404  const OMPLoopDirective &S,
2406  CGF.EmitOMPLoopBody(S, LoopExit);
2407  CGF.EmitStopPoint(&S);
2408 }
2409 
2410 /// Emit a helper variable and return corresponding lvalue.
2412  const DeclRefExpr *Helper) {
2413  auto VDecl = cast<VarDecl>(Helper->getDecl());
2414  CGF.EmitVarDecl(*VDecl);
2415  return CGF.EmitLValue(Helper);
2416 }
2417 
2419  const RegionCodeGenTy &SimdInitGen,
2420  const RegionCodeGenTy &BodyCodeGen) {
2421  auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2422  PrePostActionTy &) {
2423  CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2425  SimdInitGen(CGF);
2426 
2427  BodyCodeGen(CGF);
2428  };
2429  auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2431  CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2432 
2433  BodyCodeGen(CGF);
2434  };
2435  const Expr *IfCond = nullptr;
2436  if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2437  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2438  if (CGF.getLangOpts().OpenMP >= 50 &&
2439  (C->getNameModifier() == OMPD_unknown ||
2440  C->getNameModifier() == OMPD_simd)) {
2441  IfCond = C->getCondition();
2442  break;
2443  }
2444  }
2445  }
2446  if (IfCond) {
2447  CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2448  } else {
2449  RegionCodeGenTy ThenRCG(ThenGen);
2450  ThenRCG(CGF);
2451  }
2452 }
2453 
2455  PrePostActionTy &Action) {
2456  Action.Enter(CGF);
2457  assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2458  "Expected simd directive");
2459  OMPLoopScope PreInitScope(CGF, S);
2460  // if (PreCond) {
2461  // for (IV in 0..LastIteration) BODY;
2462  // <Final counter/linear vars updates>;
2463  // }
2464  //
2465  if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2466  isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
2467  isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
2468  (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2469  (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2470  }
2471 
2472  // Emit: if (PreCond) - begin.
2473  // If the condition constant folds and can be elided, avoid emitting the
2474  // whole loop.
2475  bool CondConstant;
2476  llvm::BasicBlock *ContBlock = nullptr;
2477  if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2478  if (!CondConstant)
2479  return;
2480  } else {
2481  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
2482  ContBlock = CGF.createBasicBlock("simd.if.end");
2483  emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
2484  CGF.getProfileCount(&S));
2485  CGF.EmitBlock(ThenBlock);
2486  CGF.incrementProfileCounter(&S);
2487  }
2488 
2489  // Emit the loop iteration variable.
2490  const Expr *IVExpr = S.getIterationVariable();
2491  const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
2492  CGF.EmitVarDecl(*IVDecl);
2493  CGF.EmitIgnoredExpr(S.getInit());
2494 
2495  // Emit the iterations count variable.
2496  // If it is not a variable, Sema decided to calculate iterations count on
2497  // each iteration (e.g., it is foldable into a constant).
2498  if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2499  CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2500  // Emit calculation of the iterations count.
2501  CGF.EmitIgnoredExpr(S.getCalcLastIteration());
2502  }
2503 
2504  emitAlignedClause(CGF, S);
2505  (void)CGF.EmitOMPLinearClauseInit(S);
2506  {
2507  CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2508  CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2509  CGF.EmitOMPLinearClause(S, LoopScope);
2510  CGF.EmitOMPPrivateClause(S, LoopScope);
2511  CGF.EmitOMPReductionClauseInit(S, LoopScope);
2513  CGF, S, CGF.EmitLValue(S.getIterationVariable()));
2514  bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2515  (void)LoopScope.Privatize();
2516  if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2518 
2520  CGF, S,
2521  [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2522  CGF.EmitOMPSimdInit(S);
2523  },
2524  [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2525  CGF.EmitOMPInnerLoop(
2526  S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2527  [&S](CodeGenFunction &CGF) {
2528  emitOMPLoopBodyWithStopPoint(CGF, S,
2529  CodeGenFunction::JumpDest());
2530  },
2531  [](CodeGenFunction &) {});
2532  });
2533  CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2534  // Emit final copy of the lastprivate variables at the end of loops.
2535  if (HasLastprivateClause)
2536  CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2537  CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2539  [](CodeGenFunction &) { return nullptr; });
2540  }
2541  CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
2542  // Emit: if (PreCond) - end.
2543  if (ContBlock) {
2544  CGF.EmitBranch(ContBlock);
2545  CGF.EmitBlock(ContBlock, true);
2546  }
2547 }
2548 
2550  ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2551  OMPFirstScanLoop = true;
2552  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2553  emitOMPSimdRegion(CGF, S, Action);
2554  };
2555  {
2556  auto LPCRegion =
2558  OMPLexicalScope Scope(*this, S, OMPD_unknown);
2559  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2560  }
2561  // Check for outer lastprivate conditional update.
2563 }
2564 
2566  // Emit the de-sugared statement.
2567  OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2568  EmitStmt(S.getTransformedStmt());
2569 }
2570 
2571 void CodeGenFunction::EmitOMPOuterLoop(
2572  bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2574  const CodeGenFunction::OMPLoopArguments &LoopArgs,
2575  const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2576  const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2577  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2578 
2579  const Expr *IVExpr = S.getIterationVariable();
2580  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2581  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2582 
2583  JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
2584 
2585  // Start the loop with a block that tests the condition.
2586  llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2587  EmitBlock(CondBlock);
2588  const SourceRange R = S.getSourceRange();
2589  OMPLoopNestStack.clear();
2590  LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2591  SourceLocToDebugLoc(R.getEnd()));
2592 
2593  llvm::Value *BoolCondVal = nullptr;
2594  if (!DynamicOrOrdered) {
2595  // UB = min(UB, GlobalUB) or
2596  // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2597  // 'distribute parallel for')
2598  EmitIgnoredExpr(LoopArgs.EUB);
2599  // IV = LB
2600  EmitIgnoredExpr(LoopArgs.Init);
2601  // IV < UB
2602  BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
2603  } else {
2604  BoolCondVal =
2605  RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
2606  LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
2607  }
2608 
2609  // If there are any cleanups between here and the loop-exit scope,
2610  // create a block to stage a loop exit along.
2611  llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2612  if (LoopScope.requiresCleanups())
2613  ExitBlock = createBasicBlock("omp.dispatch.cleanup");
2614 
2615  llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
2616  Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
2617  if (ExitBlock != LoopExit.getBlock()) {
2618  EmitBlock(ExitBlock);
2619  EmitBranchThroughCleanup(LoopExit);
2620  }
2621  EmitBlock(LoopBody);
2622 
2623  // Emit "IV = LB" (in case of static schedule, we have already calculated new
2624  // LB for loop condition and emitted it above).
2625  if (DynamicOrOrdered)
2626  EmitIgnoredExpr(LoopArgs.Init);
2627 
2628  // Create a block for the increment.
2629  JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
2630  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2631 
2633  *this, S,
2634  [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2635  // Generate !llvm.loop.parallel metadata for loads and stores for loops
2636  // with dynamic/guided scheduling and without ordered clause.
2637  if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2638  CGF.LoopStack.setParallel(!IsMonotonic);
2639  if (const auto *C = S.getSingleClause<OMPOrderClause>())
2640  if (C->getKind() == OMPC_ORDER_concurrent)
2641  CGF.LoopStack.setParallel(/*Enable=*/true);
2642  } else {
2643  CGF.EmitOMPSimdInit(S, IsMonotonic);
2644  }
2645  },
2646  [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2647  &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2648  SourceLocation Loc = S.getBeginLoc();
2649  // when 'distribute' is not combined with a 'for':
2650  // while (idx <= UB) { BODY; ++idx; }
2651  // when 'distribute' is combined with a 'for'
2652  // (e.g. 'distribute parallel for')
2653  // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2654  CGF.EmitOMPInnerLoop(
2655  S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2656  [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2657  CodeGenLoop(CGF, S, LoopExit);
2658  },
2659  [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2660  CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2661  });
2662  });
2663 
2664  EmitBlock(Continue.getBlock());
2665  BreakContinueStack.pop_back();
2666  if (!DynamicOrOrdered) {
2667  // Emit "LB = LB + Stride", "UB = UB + Stride".
2668  EmitIgnoredExpr(LoopArgs.NextLB);
2669  EmitIgnoredExpr(LoopArgs.NextUB);
2670  }
2671 
2672  EmitBranch(CondBlock);
2673  OMPLoopNestStack.clear();
2674  LoopStack.pop();
2675  // Emit the fall-through block.
2676  EmitBlock(LoopExit.getBlock());
2677 
2678  // Tell the runtime we are done.
2679  auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
2680  if (!DynamicOrOrdered)
2681  CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2682  S.getDirectiveKind());
2683  };
2684  OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2685 }
2686 
2687 void CodeGenFunction::EmitOMPForOuterLoop(
2688  const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2689  const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2690  const OMPLoopArguments &LoopArgs,
2691  const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2692  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2693 
2694  // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2695  const bool DynamicOrOrdered =
2696  Ordered || RT.isDynamic(ScheduleKind.Schedule);
2697 
2698  assert((Ordered ||
2699  !RT.isStaticNonchunked(ScheduleKind.Schedule,
2700  LoopArgs.Chunk != nullptr)) &&
2701  "static non-chunked schedule does not need outer loop");
2702 
2703  // Emit outer loop.
2704  //
2705  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2706  // When schedule(dynamic,chunk_size) is specified, the iterations are
2707  // distributed to threads in the team in chunks as the threads request them.
2708  // Each thread executes a chunk of iterations, then requests another chunk,
2709  // until no chunks remain to be distributed. Each chunk contains chunk_size
2710  // iterations, except for the last chunk to be distributed, which may have
2711  // fewer iterations. When no chunk_size is specified, it defaults to 1.
2712  //
2713  // When schedule(guided,chunk_size) is specified, the iterations are assigned
2714  // to threads in the team in chunks as the executing threads request them.
2715  // Each thread executes a chunk of iterations, then requests another chunk,
2716  // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2717  // each chunk is proportional to the number of unassigned iterations divided
2718  // by the number of threads in the team, decreasing to 1. For a chunk_size
2719  // with value k (greater than 1), the size of each chunk is determined in the
2720  // same way, with the restriction that the chunks do not contain fewer than k
2721  // iterations (except for the last chunk to be assigned, which may have fewer
2722  // than k iterations).
2723  //
2724  // When schedule(auto) is specified, the decision regarding scheduling is
2725  // delegated to the compiler and/or runtime system. The programmer gives the
2726  // implementation the freedom to choose any possible mapping of iterations to
2727  // threads in the team.
2728  //
2729  // When schedule(runtime) is specified, the decision regarding scheduling is
2730  // deferred until run time, and the schedule and chunk size are taken from the
2731  // run-sched-var ICV. If the ICV is set to auto, the schedule is
2732  // implementation defined
2733  //
2734  // while(__kmpc_dispatch_next(&LB, &UB)) {
2735  // idx = LB;
2736  // while (idx <= UB) { BODY; ++idx;
2737  // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2738  // } // inner loop
2739  // }
2740  //
2741  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2742  // When schedule(static, chunk_size) is specified, iterations are divided into
2743  // chunks of size chunk_size, and the chunks are assigned to the threads in
2744  // the team in a round-robin fashion in the order of the thread number.
2745  //
2746  // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2747  // while (idx <= UB) { BODY; ++idx; } // inner loop
2748  // LB = LB + ST;
2749  // UB = UB + ST;
2750  // }
2751  //
2752 
2753  const Expr *IVExpr = S.getIterationVariable();
2754  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2755  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2756 
2757  if (DynamicOrOrdered) {
2758  const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
2759  CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
2760  llvm::Value *LBVal = DispatchBounds.first;
2761  llvm::Value *UBVal = DispatchBounds.second;
2762  CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
2763  LoopArgs.Chunk};
2764  RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
2765  IVSigned, Ordered, DipatchRTInputValues);
2766  } else {
2767  CGOpenMPRuntime::StaticRTInput StaticInit(
2768  IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
2769  LoopArgs.ST, LoopArgs.Chunk);
2770  RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
2771  ScheduleKind, StaticInit);
2772  }
2773 
2774  auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
2775  const unsigned IVSize,
2776  const bool IVSigned) {
2777  if (Ordered) {
2778  CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
2779  IVSigned);
2780  }
2781  };
2782 
2783  OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
2784  LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
2785  OuterLoopArgs.IncExpr = S.getInc();
2786  OuterLoopArgs.Init = S.getInit();
2787  OuterLoopArgs.Cond = S.getCond();
2788  OuterLoopArgs.NextLB = S.getNextLowerBound();
2789  OuterLoopArgs.NextUB = S.getNextUpperBound();
2790  EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
2791  emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
2792 }
2793 
2795  const unsigned IVSize, const bool IVSigned) {}
2796 
2797 void CodeGenFunction::EmitOMPDistributeOuterLoop(
2798  OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
2799  OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
2800  const CodeGenLoopTy &CodeGenLoopContent) {
2801 
2802  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2803 
2804  // Emit outer loop.
2805  // Same behavior as a OMPForOuterLoop, except that schedule cannot be
2806  // dynamic
2807  //
2808 
2809  const Expr *IVExpr = S.getIterationVariable();
2810  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2811  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2812 
2813  CGOpenMPRuntime::StaticRTInput StaticInit(
2814  IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
2815  LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
2816  RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
2817 
2818  // for combined 'distribute' and 'for' the increment expression of distribute
2819  // is stored in DistInc. For 'distribute' alone, it is in Inc.
2820  Expr *IncExpr;
2821  if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
2822  IncExpr = S.getDistInc();
2823  else
2824  IncExpr = S.getInc();
2825 
2826  // this routine is shared by 'omp distribute parallel for' and
2827  // 'omp distribute': select the right EUB expression depending on the
2828  // directive
2829  OMPLoopArguments OuterLoopArgs;
2830  OuterLoopArgs.LB = LoopArgs.LB;
2831  OuterLoopArgs.UB = LoopArgs.UB;
2832  OuterLoopArgs.ST = LoopArgs.ST;
2833  OuterLoopArgs.IL = LoopArgs.IL;
2834  OuterLoopArgs.Chunk = LoopArgs.Chunk;
2835  OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2836  ? S.getCombinedEnsureUpperBound()
2837  : S.getEnsureUpperBound();
2838  OuterLoopArgs.IncExpr = IncExpr;
2839  OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2840  ? S.getCombinedInit()
2841  : S.getInit();
2842  OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2843  ? S.getCombinedCond()
2844  : S.getCond();
2845  OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2846  ? S.getCombinedNextLowerBound()
2847  : S.getNextLowerBound();
2848  OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2849  ? S.getCombinedNextUpperBound()
2850  : S.getNextUpperBound();
2851 
2852  EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2853  LoopScope, OuterLoopArgs, CodeGenLoopContent,
2855 }
2856 
2857 static std::pair<LValue, LValue>
2859  const OMPExecutableDirective &S) {
2860  const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2861  LValue LB =
2862  EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2863  LValue UB =
2864  EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2865 
2866  // When composing 'distribute' with 'for' (e.g. as in 'distribute
2867  // parallel for') we need to use the 'distribute'
2868  // chunk lower and upper bounds rather than the whole loop iteration
2869  // space. These are parameters to the outlined function for 'parallel'
2870  // and we copy the bounds of the previous schedule into the
2871  // the current ones.
2872  LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2873  LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2874  llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
2875  PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
2876  PrevLBVal = CGF.EmitScalarConversion(
2877  PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2878  LS.getIterationVariable()->getType(),
2880  llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
2881  PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
2882  PrevUBVal = CGF.EmitScalarConversion(
2883  PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2884  LS.getIterationVariable()->getType(),
2886 
2887  CGF.EmitStoreOfScalar(PrevLBVal, LB);
2888  CGF.EmitStoreOfScalar(PrevUBVal, UB);
2889 
2890  return {LB, UB};
2891 }
2892 
2893 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2894 /// we need to use the LB and UB expressions generated by the worksharing
2895 /// code generation support, whereas in non combined situations we would
2896 /// just emit 0 and the LastIteration expression
2897 /// This function is necessary due to the difference of the LB and UB
2898 /// types for the RT emission routines for 'for_static_init' and
2899 /// 'for_dispatch_init'
2900 static std::pair<llvm::Value *, llvm::Value *>
2902  const OMPExecutableDirective &S,
2903  Address LB, Address UB) {
2904  const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2905  const Expr *IVExpr = LS.getIterationVariable();
2906  // when implementing a dynamic schedule for a 'for' combined with a
2907  // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
2908  // is not normalized as each team only executes its own assigned
2909  // distribute chunk
2910  QualType IteratorTy = IVExpr->getType();
2911  llvm::Value *LBVal =
2912  CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2913  llvm::Value *UBVal =
2914  CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2915  return {LBVal, UBVal};
2916 }
2917 
2919  CodeGenFunction &CGF, const OMPExecutableDirective &S,
2920  llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
2921  const auto &Dir = cast<OMPLoopDirective>(S);
2922  LValue LB =
2923  CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
2924  llvm::Value *LBCast =
2925  CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)),
2926  CGF.SizeTy, /*isSigned=*/false);
2927  CapturedVars.push_back(LBCast);
2928  LValue UB =
2929  CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
2930 
2931  llvm::Value *UBCast =
2932  CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)),
2933  CGF.SizeTy, /*isSigned=*/false);
2934  CapturedVars.push_back(UBCast);
2935 }
2936 
2937 static void
2939  const OMPLoopDirective &S,
2941  auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2942  PrePostActionTy &Action) {
2943  Action.Enter(CGF);
2944  bool HasCancel = false;
2945  if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2946  if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
2947  HasCancel = D->hasCancel();
2948  else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
2949  HasCancel = D->hasCancel();
2950  else if (const auto *D =
2951  dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
2952  HasCancel = D->hasCancel();
2953  }
2954  CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
2955  HasCancel);
2956  CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2959  };
2960 
2962  CGF, S,
2963  isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
2964  CGInlinedWorksharingLoop,
2966 }
2967 
2970  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2972  S.getDistInc());
2973  };
2974  OMPLexicalScope Scope(*this, S, OMPD_parallel);
2975  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2976 }
2977 
2980  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2982  S.getDistInc());
2983  };
2984  OMPLexicalScope Scope(*this, S, OMPD_parallel);
2985  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2986 }
2987 
2989  const OMPDistributeSimdDirective &S) {
2990  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2992  };
2993  OMPLexicalScope Scope(*this, S, OMPD_unknown);
2994  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2995 }
2996 
2998  CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
2999  // Emit SPMD target parallel for region as a standalone region.
3000  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3001  emitOMPSimdRegion(CGF, S, Action);
3002  };
3003  llvm::Function *Fn;
3004  llvm::Constant *Addr;
3005  // Emit target region as a standalone region.
3007  S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3008  assert(Fn && Addr && "Target device function emission failed.");
3009 }
3010 
3012  const OMPTargetSimdDirective &S) {
3013  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3014  emitOMPSimdRegion(CGF, S, Action);
3015  };
3016  emitCommonOMPTargetDirective(*this, S, CodeGen);
3017 }
3018 
3019 namespace {
3020  struct ScheduleKindModifiersTy {
3024  ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3027  : Kind(Kind), M1(M1), M2(M2) {}
3028  };
3029 } // namespace
3030 
3032  const OMPLoopDirective &S, Expr *EUB,
3033  const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3034  const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3035  // Emit the loop iteration variable.
3036  const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3037  const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
3038  EmitVarDecl(*IVDecl);
3039 
3040  // Emit the iterations count variable.
3041  // If it is not a variable, Sema decided to calculate iterations count on each
3042  // iteration (e.g., it is foldable into a constant).
3043  if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3044  EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3045  // Emit calculation of the iterations count.
3046  EmitIgnoredExpr(S.getCalcLastIteration());
3047  }
3048 
3049  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3050 
3051  bool HasLastprivateClause;
3052  // Check pre-condition.
3053  {
3054  OMPLoopScope PreInitScope(*this, S);
3055  // Skip the entire loop if we don't meet the precondition.
3056  // If the condition constant folds and can be elided, avoid emitting the
3057  // whole loop.
3058  bool CondConstant;
3059  llvm::BasicBlock *ContBlock = nullptr;
3060  if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3061  if (!CondConstant)
3062  return false;
3063  } else {
3064  llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
3065  ContBlock = createBasicBlock("omp.precond.end");
3066  emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3067  getProfileCount(&S));
3068  EmitBlock(ThenBlock);
3069  incrementProfileCounter(&S);
3070  }
3071 
3072  RunCleanupsScope DoacrossCleanupScope(*this);
3073  bool Ordered = false;
3074  if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3075  if (OrderedClause->getNumForLoops())
3076  RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
3077  else
3078  Ordered = true;
3079  }
3080 
3081  llvm::DenseSet<const Expr *> EmittedFinals;
3082  emitAlignedClause(*this, S);
3083  bool HasLinears = EmitOMPLinearClauseInit(S);
3084  // Emit helper vars inits.
3085 
3086  std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3087  LValue LB = Bounds.first;
3088  LValue UB = Bounds.second;
3089  LValue ST =
3090  EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3091  LValue IL =
3092  EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3093 
3094  // Emit 'then' code.
3095  {
3096  OMPPrivateScope LoopScope(*this);
3097  if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
3098  // Emit implicit barrier to synchronize threads and avoid data races on
3099  // initialization of firstprivate variables and post-update of
3100  // lastprivate variables.
3101  CGM.getOpenMPRuntime().emitBarrierCall(
3102  *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3103  /*ForceSimpleCall=*/true);
3104  }
3105  EmitOMPPrivateClause(S, LoopScope);
3107  *this, S, EmitLValue(S.getIterationVariable()));
3108  HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3109  EmitOMPReductionClauseInit(S, LoopScope);
3110  EmitOMPPrivateLoopCounters(S, LoopScope);
3111  EmitOMPLinearClause(S, LoopScope);
3112  (void)LoopScope.Privatize();
3113  if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3114  CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
3115 
3116  // Detect the loop schedule kind and chunk.
3117  const Expr *ChunkExpr = nullptr;
3118  OpenMPScheduleTy ScheduleKind;
3119  if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3120  ScheduleKind.Schedule = C->getScheduleKind();
3121  ScheduleKind.M1 = C->getFirstScheduleModifier();
3122  ScheduleKind.M2 = C->getSecondScheduleModifier();
3123  ChunkExpr = C->getChunkSize();
3124  } else {
3125  // Default behaviour for schedule clause.
3126  CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3127  *this, S, ScheduleKind.Schedule, ChunkExpr);
3128  }
3129  bool HasChunkSizeOne = false;
3130  llvm::Value *Chunk = nullptr;
3131  if (ChunkExpr) {
3132  Chunk = EmitScalarExpr(ChunkExpr);
3133  Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
3134  S.getIterationVariable()->getType(),
3135  S.getBeginLoc());
3136  Expr::EvalResult Result;
3137  if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
3138  llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3139  HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3140  }
3141  }
3142  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3143  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3144  // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3145  // If the static schedule kind is specified or if the ordered clause is
3146  // specified, and if no monotonic modifier is specified, the effect will
3147  // be as if the monotonic modifier was specified.
3148  bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
3149  /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
3150  isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
3151  bool IsMonotonic =
3152  Ordered ||
3153  ((ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
3154  ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) &&
3155  !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3156  ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3157  ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3158  ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3159  if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
3160  /* Chunked */ Chunk != nullptr) ||
3161  StaticChunkedOne) &&
3162  !Ordered) {
3163  JumpDest LoopExit =
3164  getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3166  *this, S,
3167  [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
3168  if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3169  CGF.EmitOMPSimdInit(S, IsMonotonic);
3170  } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3171  if (C->getKind() == OMPC_ORDER_concurrent)
3172  CGF.LoopStack.setParallel(/*Enable=*/true);
3173  }
3174  },
3175  [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3176  &S, ScheduleKind, LoopExit,
3177  &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3178  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3179  // When no chunk_size is specified, the iteration space is divided
3180  // into chunks that are approximately equal in size, and at most
3181  // one chunk is distributed to each thread. Note that the size of
3182  // the chunks is unspecified in this case.
3183  CGOpenMPRuntime::StaticRTInput StaticInit(
3184  IVSize, IVSigned, Ordered, IL.getAddress(CGF),
3185  LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF),
3186  StaticChunkedOne ? Chunk : nullptr);
3187  CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3188  CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
3189  StaticInit);
3190  // UB = min(UB, GlobalUB);
3191  if (!StaticChunkedOne)
3192  CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
3193  // IV = LB;
3194  CGF.EmitIgnoredExpr(S.getInit());
3195  // For unchunked static schedule generate:
3196  //
3197  // while (idx <= UB) {
3198  // BODY;
3199  // ++idx;
3200  // }
3201  //
3202  // For static schedule with chunk one:
3203  //
3204  // while (IV <= PrevUB) {
3205  // BODY;
3206  // IV += ST;
3207  // }
3208  CGF.EmitOMPInnerLoop(
3209  S, LoopScope.requiresCleanups(),
3210  StaticChunkedOne ? S.getCombinedParForInDistCond()
3211  : S.getCond(),
3212  StaticChunkedOne ? S.getDistInc() : S.getInc(),
3213  [&S, LoopExit](CodeGenFunction &CGF) {
3214  emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3215  },
3216  [](CodeGenFunction &) {});
3217  });
3218  EmitBlock(LoopExit.getBlock());
3219  // Tell the runtime we are done.
3220  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3221  CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3222  S.getDirectiveKind());
3223  };
3224  OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
3225  } else {
3226  // Emit the outer loop, which requests its work chunk [LB..UB] from
3227  // runtime and runs the inner loop to process it.
3228  const OMPLoopArguments LoopArguments(
3229  LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
3230  IL.getAddress(*this), Chunk, EUB);
3231  EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3232  LoopArguments, CGDispatchBounds);
3233  }
3234  if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3235  EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3236  return CGF.Builder.CreateIsNotNull(
3237  CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3238  });
3239  }
3240  EmitOMPReductionClauseFinal(
3241  S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
3242  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3243  : /*Parallel only*/ OMPD_parallel);
3244  // Emit post-update of the reduction variables if IsLastIter != 0.
3246  *this, S, [IL, &S](CodeGenFunction &CGF) {
3247  return CGF.Builder.CreateIsNotNull(
3248  CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3249  });
3250  // Emit final copy of the lastprivate variables if IsLastIter != 0.
3251  if (HasLastprivateClause)
3252  EmitOMPLastprivateClauseFinal(
3253  S, isOpenMPSimdDirective(S.getDirectiveKind()),
3254  Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3255  }
3256  EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
3257  return CGF.Builder.CreateIsNotNull(
3258  CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3259  });
3260  DoacrossCleanupScope.ForceCleanup();
3261  // We're now done with the loop, so jump to the continuation block.
3262  if (ContBlock) {
3263  EmitBranch(ContBlock);
3264  EmitBlock(ContBlock, /*IsFinished=*/true);
3265  }
3266  }
3267  return HasLastprivateClause;
3268 }
3269 
3270 /// The following two functions generate expressions for the loop lower
3271 /// and upper bounds in case of static and dynamic (dispatch) schedule
3272 /// of the associated 'for' or 'distribute' loop.
3273 static std::pair<LValue, LValue>
3275  const auto &LS = cast<OMPLoopDirective>(S);
3276  LValue LB =
3277  EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3278  LValue UB =
3279  EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3280  return {LB, UB};
3281 }
3282 
3283 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3284 /// consider the lower and upper bound expressions generated by the
3285 /// worksharing loop support, but we use 0 and the iteration space size as
3286 /// constants
3287 static std::pair<llvm::Value *, llvm::Value *>
3289  Address LB, Address UB) {
3290  const auto &LS = cast<OMPLoopDirective>(S);
3291  const Expr *IVExpr = LS.getIterationVariable();
3292  const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
3293  llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
3294  llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
3295  return {LBVal, UBVal};
3296 }
3297 
3298 /// Emits the code for the directive with inscan reductions.
3299 /// The code is the following:
3300 /// \code
3301 /// size num_iters = <num_iters>;
3302 /// <type> buffer[num_iters];
3303 /// #pragma omp ...
3304 /// for (i: 0..<num_iters>) {
3305 /// <input phase>;
3306 /// buffer[i] = red;
3307 /// }
3308 /// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3309 /// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3310 /// buffer[i] op= buffer[i-pow(2,k)];
3311 /// #pragma omp ...
3312 /// for (0..<num_iters>) {
3313 /// red = InclusiveScan ? buffer[i] : buffer[i-1];
3314 /// <scan phase>;
3315 /// }
3316 /// \endcode
3318  CodeGenFunction &CGF, const OMPLoopDirective &S,
3319  llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3320  llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3321  llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3322  llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3323  NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3326  SmallVector<const Expr *, 4> ReductionOps;
3330  SmallVector<const Expr *, 4> CopyArrayTemps;
3331  SmallVector<const Expr *, 4> CopyArrayElems;
3332  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3333  assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3334  "Only inscan reductions are expected.");
3335  Shareds.append(C->varlist_begin(), C->varlist_end());
3336  Privates.append(C->privates().begin(), C->privates().end());
3337  ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3338  LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3339  RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3340  CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
3341  CopyArrayTemps.append(C->copy_array_temps().begin(),
3342  C->copy_array_temps().end());
3343  CopyArrayElems.append(C->copy_array_elems().begin(),
3344  C->copy_array_elems().end());
3345  }
3346  {
3347  // Emit buffers for each reduction variables.
3348  // ReductionCodeGen is required to emit correctly the code for array
3349  // reductions.
3350  ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3351  unsigned Count = 0;
3352  auto *ITA = CopyArrayTemps.begin();
3353  for (const Expr *IRef : Privates) {
3354  const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
3355  // Emit variably modified arrays, used for arrays/array sections
3356  // reductions.
3357  if (PrivateVD->getType()->isVariablyModifiedType()) {
3358  RedCG.emitSharedOrigLValue(CGF, Count);
3359  RedCG.emitAggregateType(CGF, Count);
3360  }
3362  CGF,
3363  cast<OpaqueValueExpr>(
3364  cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3365  ->getSizeExpr()),
3366  RValue::get(OMPScanNumIterations));
3367  // Emit temp buffer.
3368  CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
3369  ++ITA;
3370  ++Count;
3371  }
3372  }
3374  {
3375  // Emit loop with input phase:
3376  // #pragma omp ...
3377  // for (i: 0..<num_iters>) {
3378  // <input phase>;
3379  // buffer[i] = red;
3380  // }
3381  CGF.OMPFirstScanLoop = true;
3383  FirstGen(CGF);
3384  }
3385  // Emit prefix reduction:
3386  // for (int k = 0; k <= ceil(log2(n)); ++k)
3387  llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3388  llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
3389  llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
3390  llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3391  llvm::Value *Arg =
3392  CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
3393  llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
3395  LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
3396  LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
3397  llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3398  OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
3399  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
3400  CGF.EmitBlock(LoopBB);
3401  auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
3402  // size pow2k = 1;
3403  auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3404  Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
3405  Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
3406  // for (size i = n - 1; i >= 2 ^ k; --i)
3407  // tmp[i] op= tmp[i-pow2k];
3408  llvm::BasicBlock *InnerLoopBB =
3409  CGF.createBasicBlock("omp.inner.log.scan.body");
3410  llvm::BasicBlock *InnerExitBB =
3411  CGF.createBasicBlock("omp.inner.log.scan.exit");
3412  llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
3413  CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3414  CGF.EmitBlock(InnerLoopBB);
3415  auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3416  IVal->addIncoming(NMin1, LoopBB);
3417  {
3418  CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3419  auto *ILHS = LHSs.begin();
3420  auto *IRHS = RHSs.begin();
3421  for (const Expr *CopyArrayElem : CopyArrayElems) {
3422  const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3423  const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3424  Address LHSAddr = Address::invalid();
3425  {
3427  CGF,
3428  cast<OpaqueValueExpr>(
3429  cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3430  RValue::get(IVal));
3431  LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3432  }
3433  PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
3434  Address RHSAddr = Address::invalid();
3435  {
3436  llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
3438  CGF,
3439  cast<OpaqueValueExpr>(
3440  cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3441  RValue::get(OffsetIVal));
3442  RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3443  }
3444  PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
3445  ++ILHS;
3446  ++IRHS;
3447  }
3448  PrivScope.Privatize();
3450  CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3451  {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3452  }
3453  llvm::Value *NextIVal =
3454  CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
3455  IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
3456  CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
3457  CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3458  CGF.EmitBlock(InnerExitBB);
3459  llvm::Value *Next =
3460  CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
3461  Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
3462  // pow2k <<= 1;
3463  llvm::Value *NextPow2K = CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
3464  Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
3465  llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
3466  CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
3467  auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
3468  CGF.EmitBlock(ExitBB);
3469 
3470  CGF.OMPFirstScanLoop = false;
3471  SecondGen(CGF);
3472 }
3473 
3475  const OMPLoopDirective &S,
3476  bool HasCancel) {
3477  bool HasLastprivates;
3478  if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3479  [](const OMPReductionClause *C) {
3480  return C->getModifier() == OMPC_REDUCTION_inscan;
3481  })) {
3482  const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3484  OMPLoopScope LoopScope(CGF, S);
3485  return CGF.EmitScalarExpr(S.getNumIterations());
3486  };
3487  const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3489  CGF, S.getDirectiveKind(), HasCancel);
3490  (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3493  // Emit an implicit barrier at the end.
3494  CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3495  OMPD_for);
3496  };
3497  const auto &&SecondGen = [&S, HasCancel,
3498  &HasLastprivates](CodeGenFunction &CGF) {
3500  CGF, S.getDirectiveKind(), HasCancel);
3501  HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3504  };
3505  emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3506  } else {
3507  CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3508  HasCancel);
3509  HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3512  }
3513  return HasLastprivates;
3514 }
3515 
3517  if (S.hasCancel())
3518  return false;
3519  for (OMPClause *C : S.clauses())
3520  if (!isa<OMPNowaitClause>(C))
3521  return false;
3522 
3523  return true;
3524 }
3525 
3527  bool HasLastprivates = false;
3528  bool UseOMPIRBuilder =
3529  CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
3530  auto &&CodeGen = [this, &S, &HasLastprivates,
3531  UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
3532  // Use the OpenMPIRBuilder if enabled.
3533  if (UseOMPIRBuilder) {
3534  // Emit the associated statement and get its loop representation.
3535  const Stmt *Inner = S.getRawStmt();
3536  llvm::CanonicalLoopInfo *CLI =
3537  EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
3538 
3539  bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
3540  llvm::OpenMPIRBuilder &OMPBuilder =
3541  CGM.getOpenMPRuntime().getOMPBuilder();
3542  llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3543  AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3544  OMPBuilder.createWorkshareLoop(Builder, CLI, AllocaIP, NeedsBarrier);
3545  return;
3546  }
3547 
3548  HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
3549  };
3550  {
3551  auto LPCRegion =
3553  OMPLexicalScope Scope(*this, S, OMPD_unknown);
3554  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
3555  S.hasCancel());
3556  }
3557 
3558  if (!UseOMPIRBuilder) {
3559  // Emit an implicit barrier at the end.
3560  if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3561  CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3562  }
3563  // Check for outer lastprivate conditional update.
3565 }
3566 
3568  bool HasLastprivates = false;
3569  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3570  PrePostActionTy &) {
3571  HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3572  };
3573  {
3574  auto LPCRegion =
3576  OMPLexicalScope Scope(*this, S, OMPD_unknown);
3577  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3578  }
3579 
3580  // Emit an implicit barrier at the end.
3581  if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3582  CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3583  // Check for outer lastprivate conditional update.
3585 }
3586 
3588  const Twine &Name,
3589  llvm::Value *Init = nullptr) {
3590  LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
3591  if (Init)
3592  CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
3593  return LVal;
3594 }
3595 
3596 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
3597  const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3598  const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3599  bool HasLastprivates = false;
3600  auto &&CodeGen = [&S, CapturedStmt, CS,
3601  &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
3602  const ASTContext &C = CGF.getContext();
3603  QualType KmpInt32Ty =
3604  C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3605  // Emit helper vars inits.
3606  LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
3607  CGF.Builder.getInt32(0));
3608  llvm::ConstantInt *GlobalUBVal = CS != nullptr
3609  ? CGF.Builder.getInt32(CS->size() - 1)
3610  : CGF.Builder.getInt32(0);
3611  LValue UB =
3612  createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
3613  LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
3614  CGF.Builder.getInt32(1));
3615  LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
3616  CGF.Builder.getInt32(0));
3617  // Loop counter.
3618  LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
3619  OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3620  CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
3621  OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3622  CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
3623  // Generate condition for loop.
3625  C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary,
3626  S.getBeginLoc(), FPOptionsOverride());
3627  // Increment for loop counter.
3629  C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
3630  S.getBeginLoc(), true, FPOptionsOverride());
3631  auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
3632  // Iterate through all sections and emit a switch construct:
3633  // switch (IV) {
3634  // case 0:
3635  // <SectionStmt[0]>;
3636  // break;
3637  // ...
3638  // case <NumSection> - 1:
3639  // <SectionStmt[<NumSection> - 1]>;
3640  // break;
3641  // }
3642  // .omp.sections.exit:
3643  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
3644  llvm::SwitchInst *SwitchStmt =
3645  CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
3646  ExitBB, CS == nullptr ? 1 : CS->size());
3647  if (CS) {
3648  unsigned CaseNumber = 0;
3649  for (const Stmt *SubStmt : CS->children()) {
3650  auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
3651  CGF.EmitBlock(CaseBB);
3652  SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
3653  CGF.EmitStmt(SubStmt);
3654  CGF.EmitBranch(ExitBB);
3655  ++CaseNumber;
3656  }
3657  } else {
3658  llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
3659  CGF.EmitBlock(CaseBB);
3660  SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
3661  CGF.EmitStmt(CapturedStmt);
3662  CGF.EmitBranch(ExitBB);
3663  }
3664  CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
3665  };
3666 
3667  CodeGenFunction::OMPPrivateScope LoopScope(CGF);
3668  if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
3669  // Emit implicit barrier to synchronize threads and avoid data races on
3670  // initialization of firstprivate variables and post-update of lastprivate
3671  // variables.
3672  CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3673  CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3674  /*ForceSimpleCall=*/true);
3675  }
3676  CGF.EmitOMPPrivateClause(S, LoopScope);
3677  CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
3678  HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
3679  CGF.EmitOMPReductionClauseInit(S, LoopScope);
3680  (void)LoopScope.Privatize();
3681  if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3682  CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
3683 
3684  // Emit static non-chunked loop.
3685  OpenMPScheduleTy ScheduleKind;
3686  ScheduleKind.Schedule = OMPC_SCHEDULE_static;
3687  CGOpenMPRuntime::StaticRTInput StaticInit(
3688  /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF),
3689  LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF));
3690  CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3691  CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
3692  // UB = min(UB, GlobalUB);
3693  llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
3694  llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
3695  CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
3696  CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
3697  // IV = LB;
3698  CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
3699  // while (idx <= UB) { BODY; ++idx; }
3700  CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
3701  [](CodeGenFunction &) {});
3702  // Tell the runtime we are done.
3703  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3704  CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3705  S.getDirectiveKind());
3706  };
3707  CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
3708  CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
3709  // Emit post-update of the reduction variables if IsLastIter != 0.
3710  emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
3711  return CGF.Builder.CreateIsNotNull(
3712  CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3713  });
3714 
3715  // Emit final copy of the lastprivate variables if IsLastIter != 0.
3716  if (HasLastprivates)
3718  S, /*NoFinals=*/false,
3719  CGF.Builder.CreateIsNotNull(
3720  CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
3721  };
3722 
3723  bool HasCancel = false;
3724  if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
3725  HasCancel = OSD->hasCancel();
3726  else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
3727  HasCancel = OPSD->hasCancel();
3728  OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
3729  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
3730  HasCancel);
3731  // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
3732  // clause. Otherwise the barrier will be generated by the codegen for the
3733  // directive.
3734  if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
3735  // Emit implicit barrier to synchronize threads and avoid data races on
3736  // initialization of firstprivate variables.
3737  CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3738  OMPD_unknown);
3739  }
3740 }
3741 
3743  {
3744  auto LPCRegion =
3746  OMPLexicalScope Scope(*this, S, OMPD_unknown);
3747  EmitSections(S);
3748  }
3749  // Emit an implicit barrier at the end.
3750  if (!S.getSingleClause<OMPNowaitClause>()) {
3751  CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3752  OMPD_sections);
3753  }
3754  // Check for outer lastprivate conditional update.
3756 }
3757 
3759  LexicalScope Scope(*this, S.getSourceRange());
3760  EmitStopPoint(&S);
3761  EmitStmt(S.getAssociatedStmt());
3762 }
3763 
3765  llvm::SmallVector<const Expr *, 8> CopyprivateVars;
3768  llvm::SmallVector<const Expr *, 8> AssignmentOps;
3769  // Check if there are any 'copyprivate' clauses associated with this
3770  // 'single' construct.
3771  // Build a list of copyprivate variables along with helper expressions
3772  // (<source>, <destination>, <destination>=<source> expressions)
3773  for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
3774  CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
3775  DestExprs.append(C->destination_exprs().begin(),
3776  C->destination_exprs().end());
3777  SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
3778  AssignmentOps.append(C->assignment_ops().begin(),
3779  C->assignment_ops().end());
3780  }
3781  // Emit code for 'single' region along with 'copyprivate' clauses
3782  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3783  Action.Enter(CGF);
3784  OMPPrivateScope SingleScope(CGF);
3785  (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
3786  CGF.EmitOMPPrivateClause(S, SingleScope);
3787  (void)SingleScope.Privatize();
3788  CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3789  };
3790  {
3791  auto LPCRegion =
3793  OMPLexicalScope Scope(*this, S, OMPD_unknown);
3794  CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
3795  CopyprivateVars, DestExprs,
3796  SrcExprs, AssignmentOps);
3797  }
3798  // Emit an implicit barrier at the end (to avoid data race on firstprivate
3799  // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
3800  if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
3801  CGM.getOpenMPRuntime().emitBarrierCall(
3802  *this, S.getBeginLoc(),
3803  S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
3804  }
3805  // Check for outer lastprivate conditional update.
3807 }
3808 
3810  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3811  Action.Enter(CGF);
3812  CGF.EmitStmt(S.getRawStmt());
3813  };
3814  CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3815 }
3816 
3818  if (CGM.getLangOpts().OpenMPIRBuilder) {
3819  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3820  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3821 
3822  const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
3823 
3824  auto FiniCB = [this](InsertPointTy IP) {
3825  OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3826  };
3827 
3828  auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
3829  InsertPointTy CodeGenIP,
3830  llvm::BasicBlock &FiniBB) {
3831  OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3832  OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt,
3833  CodeGenIP, FiniBB);
3834  };
3835 
3836  LexicalScope Scope(*this, S.getSourceRange());
3837  EmitStopPoint(&S);
3838  Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
3839 
3840  return;
3841  }
3842  LexicalScope Scope(*this, S.getSourceRange());
3843  EmitStopPoint(&S);
3844  emitMaster(*this, S);
3845 }
3846 
3848  if (CGM.getLangOpts().OpenMPIRBuilder) {
3849  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3850  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3851 
3852  const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
3853  const Expr *Hint = nullptr;
3854  if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
3855  Hint = HintClause->getHint();
3856 
3857  // TODO: This is slightly different from what's currently being done in
3858  // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
3859  // about typing is final.
3860  llvm::Value *HintInst = nullptr;
3861  if (Hint)
3862  HintInst =
3863  Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
3864 
3865  auto FiniCB = [this](InsertPointTy IP) {
3866  OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3867  };
3868 
3869  auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
3870  InsertPointTy CodeGenIP,
3871  llvm::BasicBlock &FiniBB) {
3872  OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3873  OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt,
3874  CodeGenIP, FiniBB);
3875  };
3876 
3877  LexicalScope Scope(*this, S.getSourceRange());
3878  EmitStopPoint(&S);
3879  Builder.restoreIP(OMPBuilder.createCritical(
3880  Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
3881  HintInst));
3882 
3883  return;
3884  }
3885 
3886  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3887  Action.Enter(CGF);
3888  CGF.EmitStmt(S.getAssociatedStmt());
3889  };
3890  const Expr *Hint = nullptr;
3891  if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
3892  Hint = HintClause->getHint();
3893  LexicalScope Scope(*this, S.getSourceRange());
3894  EmitStopPoint(&S);
3895  CGM.getOpenMPRuntime().emitCriticalRegion(*this,
3896  S.getDirectiveName().getAsString(),
3897  CodeGen, S.getBeginLoc(), Hint);
3898 }
3899 
3901  const OMPParallelForDirective &S) {
3902  // Emit directive as a combined directive that consists of two implicit
3903  // directives: 'parallel' with 'for' directive.
3904  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3905  Action.Enter(CGF);
3906  (void)emitWorksharingDirective(CGF, S, S.hasCancel());
3907  };
3908  {
3909  auto LPCRegion =
3911  emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
3913  }
3914  // Check for outer lastprivate conditional update.
3916 }
3917 
3919  const OMPParallelForSimdDirective &S) {
3920  // Emit directive as a combined directive that consists of two implicit
3921  // directives: 'parallel' with 'for' directive.
3922  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3923  Action.Enter(CGF);
3924  (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3925  };
3926  {
3927  auto LPCRegion =
3929  emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
3931  }
3932  // Check for outer lastprivate conditional update.
3934 }
3935 
3937  const OMPParallelMasterDirective &S) {
3938  // Emit directive as a combined directive that consists of two implicit
3939  // directives: 'parallel' with 'master' directive.
3940  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3941  Action.Enter(CGF);
3942  OMPPrivateScope PrivateScope(CGF);
3943  bool Copyins = CGF.EmitOMPCopyinClause(S);
3944  (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3945  if (Copyins) {
3946  // Emit implicit barrier to synchronize threads and avoid data races on
3947  // propagation master's thread values of threadprivate variables to local
3948  // instances of that variables of all other implicit threads.
3950  CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3951  /*ForceSimpleCall=*/true);
3952  }
3953  CGF.EmitOMPPrivateClause(S, PrivateScope);
3954  CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3955  (void)PrivateScope.Privatize();
3956  emitMaster(CGF, S);
3957  CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
3958  };
3959  {
3960  auto LPCRegion =
3962  emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
3965  [](CodeGenFunction &) { return nullptr; });
3966  }
3967  // Check for outer lastprivate conditional update.
3969 }
3970 
3972  const OMPParallelSectionsDirective &S) {
3973  // Emit directive as a combined directive that consists of two implicit
3974  // directives: 'parallel' with 'sections' directive.
3975  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3976  Action.Enter(CGF);
3977  CGF.EmitSections(S);
3978  };
3979  {
3980  auto LPCRegion =
3982  emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
3984  }
3985  // Check for outer lastprivate conditional update.
3987 }
3988 
3989 namespace {
3990 /// Get the list of variables declared in the context of the untied tasks.
3991 class CheckVarsEscapingUntiedTaskDeclContext final
3992  : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
3994 
3995 public:
3996  explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
3997  virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
3998  void VisitDeclStmt(const DeclStmt *S) {
3999  if (!S)
4000  return;
4001  // Need to privatize only local vars, static locals can be processed as is.
4002  for (const Decl *D : S->decls()) {
4003  if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
4004  if (VD->hasLocalStorage())
4005  PrivateDecls.push_back(VD);
4006  }
4007  }
4008  void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; }
4009  void VisitCapturedStmt(const CapturedStmt *) { return; }
4010  void VisitLambdaExpr(const LambdaExpr *) { return; }
4011  void VisitBlockExpr(const BlockExpr *) { return; }
4012  void VisitStmt(const Stmt *S) {
4013  if (!S)
4014  return;
4015  for (const Stmt *Child : S->children())
4016  if (Child)
4017  Visit(Child);
4018  }
4019 
4020  /// Swaps list of vars with the provided one.
4021  ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4022 };
4023 } // anonymous namespace
4024 
4026  const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
4027  const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
4028  OMPTaskDataTy &Data) {
4029  // Emit outlined function for task construct.
4030  const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
4031  auto I = CS->getCapturedDecl()->param_begin();
4032  auto PartId = std::next(I);
4033  auto TaskT = std::next(I, 4);
4034  // Check if the task is final
4035  if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
4036  // If the condition constant folds and can be elided, try to avoid emitting
4037  // the condition and the dead arm of the if/else.
4038  const Expr *Cond = Clause->getCondition();
4039  bool CondConstant;
4040  if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
4041  Data.Final.setInt(CondConstant);
4042  else
4043  Data.Final.setPointer(EvaluateExprAsBool(Cond));
4044  } else {
4045  // By default the task is not final.
4046  Data.Final.setInt(/*IntVal=*/false);
4047  }
4048  // Check if the task has 'priority' clause.
4049  if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
4050  const Expr *Prio = Clause->getPriority();
4051  Data.Priority.setInt(/*IntVal=*/true);
4052  Data.Priority.setPointer(EmitScalarConversion(
4053  EmitScalarExpr(Prio), Prio->getType(),
4054  getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4055  Prio->getExprLoc()));
4056  }
4057  // The first function argument for tasks is a thread id, the second one is a
4058  // part id (0 for tied tasks, >=0 for untied task).
4059  llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
4060  // Get list of private variables.
4061  for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
4062  auto IRef = C->varlist_begin();
4063  for (const Expr *IInit : C->private_copies()) {
4064  const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4065  if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4066  Data.PrivateVars.push_back(*IRef);
4067  Data.PrivateCopies.push_back(IInit);
4068  }
4069  ++IRef;
4070  }
4071  }
4072  EmittedAsPrivate.clear();
4073  // Get list of firstprivate variables.
4074  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4075  auto IRef = C->varlist_begin();
4076  auto IElemInitRef = C->inits().begin();
4077  for (const Expr *IInit : C->private_copies()) {
4078  const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4079  if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4080  Data.FirstprivateVars.push_back(*IRef);
4081  Data.FirstprivateCopies.push_back(IInit);
4082  Data.FirstprivateInits.push_back(*IElemInitRef);
4083  }
4084  ++IRef;
4085  ++IElemInitRef;
4086  }
4087  }
4088  // Get list of lastprivate variables (for taskloops).
4089  llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
4090  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
4091  auto IRef = C->varlist_begin();
4092  auto ID = C->destination_exprs().begin();
4093  for (const Expr *IInit : C->private_copies()) {
4094  const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4095  if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4096  Data.LastprivateVars.push_back(*IRef);
4097  Data.LastprivateCopies.push_back(IInit);
4098  }
4099  LastprivateDstsOrigs.insert(
4100  {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
4101  cast<DeclRefExpr>(*IRef)});
4102  ++IRef;
4103  ++ID;
4104  }
4105  }
4108  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4109  Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4110  Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4111  Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4112  Data.ReductionOps.append(C->reduction_ops().begin(),
4113  C->reduction_ops().end());
4114  LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4115  RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4116  }
4117  Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
4118  *this, S.getBeginLoc(), LHSs, RHSs, Data);
4119  // Build list of dependences.
4120  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4122  Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4123  DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4124  }
4125  // Get list of local vars for untied tasks.
4126  if (!Data.Tied) {
4127  CheckVarsEscapingUntiedTaskDeclContext Checker;
4128  Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
4129  Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
4130  Checker.getPrivateDecls().end());
4131  }
4132  auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
4133  CapturedRegion](CodeGenFunction &CGF,
4134  PrePostActionTy &Action) {
4135  llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, std::pair<Address, Address>>
4136  UntiedLocalVars;
4137  // Set proper addresses for generated private copies.
4138  OMPPrivateScope Scope(CGF);
4140  if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
4141  !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
4142  enum { PrivatesParam = 2, CopyFnParam = 3 };
4143  llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4144  CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4145  llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4146  CS->getCapturedDecl()->getParam(PrivatesParam)));
4147  // Map privates.
4151  CallArgs.push_back(PrivatesPtr);
4152  ParamTypes.push_back(PrivatesPtr->getType());
4153  for (const Expr *E : Data.PrivateVars) {
4154  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4155  Address PrivatePtr = CGF.CreateMemTemp(
4156  CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
4157  PrivatePtrs.emplace_back(VD, PrivatePtr);
4158  CallArgs.push_back(PrivatePtr.getPointer());
4159  ParamTypes.push_back(PrivatePtr.getType());
4160  }
4161  for (const Expr *E : Data.FirstprivateVars) {
4162  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4163  Address PrivatePtr =
4164  CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4165  ".firstpriv.ptr.addr");
4166  PrivatePtrs.emplace_back(VD, PrivatePtr);
4167  FirstprivatePtrs.emplace_back(VD, PrivatePtr);
4168  CallArgs.push_back(PrivatePtr.getPointer());
4169  ParamTypes.push_back(PrivatePtr.getType());
4170  }
4171  for (const Expr *E : Data.LastprivateVars) {
4172  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4173  Address PrivatePtr =
4174  CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4175  ".lastpriv.ptr.addr");
4176  PrivatePtrs.emplace_back(VD, PrivatePtr);
4177  CallArgs.push_back(PrivatePtr.getPointer());
4178  ParamTypes.push_back(PrivatePtr.getType());
4179  }
4180  for (const VarDecl *VD : Data.PrivateLocals) {
4181  QualType Ty = VD->getType().getNonReferenceType();
4182  if (VD->getType()->isLValueReferenceType())
4183  Ty = CGF.getContext().getPointerType(Ty);
4184  if (isAllocatableDecl(VD))
4185  Ty = CGF.getContext().getPointerType(Ty);
4186  Address PrivatePtr = CGF.CreateMemTemp(
4187  CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
4188  UntiedLocalVars.try_emplace(VD, PrivatePtr, Address::invalid());
4189  CallArgs.push_back(PrivatePtr.getPointer());
4190  ParamTypes.push_back(PrivatePtr.getType());
4191  }
4192  auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4193  ParamTypes, /*isVarArg=*/false);
4194  CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4195  CopyFn, CopyFnTy->getPointerTo());
4196  CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4197  CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4198  for (const auto &Pair : LastprivateDstsOrigs) {
4199  const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
4200  DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
4201  /*RefersToEnclosingVariableOrCapture=*/
4202  CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
4203  Pair.second->getType(), VK_LValue,
4204  Pair.second->getExprLoc());
4205  Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
4206  return CGF.EmitLValue(&DRE).getAddress(CGF);
4207  });
4208  }
4209  for (const auto &Pair : PrivatePtrs) {
4210  Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4211  CGF.getContext().getDeclAlign(Pair.first));
4212  Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4213  }
4214  // Adjust mapping for internal locals by mapping actual memory instead of
4215  // a pointer to this memory.
4216  for (auto &Pair : UntiedLocalVars) {
4217  if (isAllocatableDecl(Pair.first)) {
4218  llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4219  Address Replacement(Ptr, CGF.getPointerAlign());
4220  Pair.getSecond().first = Replacement;
4221  Ptr = CGF.Builder.CreateLoad(Replacement);
4222  Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4223  Pair.getSecond().second = Replacement;
4224  } else {
4225  llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4226  Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4227  Pair.getSecond().first = Replacement;
4228  }
4229  }
4230  }
4231  if (Data.Reductions) {
4232  OMPPrivateScope FirstprivateScope(CGF);
4233  for (const auto &Pair : FirstprivatePtrs) {
4234  Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4235  CGF.getContext().getDeclAlign(Pair.first));
4236  FirstprivateScope.addPrivate(Pair.first,
4237  [Replacement]() { return Replacement; });
4238  }
4239  (void)FirstprivateScope.Privatize();
4240  OMPLexicalScope LexScope(CGF, S, CapturedRegion);
4241  ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
4242  Data.ReductionCopies, Data.ReductionOps);
4243  llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
4244  CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
4245  for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
4246  RedCG.emitSharedOrigLValue(CGF, Cnt);
4247  RedCG.emitAggregateType(CGF, Cnt);
4248  // FIXME: This must removed once the runtime library is fixed.
4249  // Emit required threadprivate variables for
4250  // initializer/combiner/finalizer.
4251  CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4252  RedCG, Cnt);
4253  Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4254  CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4255  Replacement =
4256  Address(CGF.EmitScalarConversion(
4257  Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4258  CGF.getContext().getPointerType(
4259  Data.ReductionCopies[Cnt]->getType()),
4260  Data.ReductionCopies[Cnt]->getExprLoc()),
4261  Replacement.getAlignment());
4262  Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4263  Scope.addPrivate(RedCG.getBaseDecl(Cnt),
4264  [Replacement]() { return Replacement; });
4265  }
4266  }
4267  // Privatize all private variables except for in_reduction items.
4268  (void)Scope.Privatize();
4269  SmallVector<const Expr *, 4> InRedVars;
4270  SmallVector<const Expr *, 4> InRedPrivs;
4272  SmallVector<const Expr *, 4> TaskgroupDescriptors;
4273  for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4274  auto IPriv = C->privates().begin();
4275  auto IRed = C->reduction_ops().begin();
4276  auto ITD = C->taskgroup_descriptors().begin();
4277  for (const Expr *Ref : C->varlists()) {
4278  InRedVars.emplace_back(Ref);
4279  InRedPrivs.emplace_back(*IPriv);
4280  InRedOps.emplace_back(*IRed);
4281  TaskgroupDescriptors.emplace_back(*ITD);
4282  std::advance(IPriv, 1);
4283  std::advance(IRed, 1);
4284  std::advance(ITD, 1);
4285  }
4286  }
4287  // Privatize in_reduction items here, because taskgroup descriptors must be
4288  // privatized earlier.
4289  OMPPrivateScope InRedScope(CGF);
4290  if (!InRedVars.empty()) {
4291  ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4292  for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
4293  RedCG.emitSharedOrigLValue(CGF, Cnt);
4294  RedCG.emitAggregateType(CGF, Cnt);
4295  // The taskgroup descriptor variable is always implicit firstprivate and
4296  // privatized already during processing of the firstprivates.
4297  // FIXME: This must removed once the runtime library is fixed.
4298  // Emit required threadprivate variables for
4299  // initializer/combiner/finalizer.
4300  CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4301  RedCG, Cnt);
4302  llvm::Value *ReductionsPtr;
4303  if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
4304  ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
4305  TRExpr->getExprLoc());
4306  } else {
4307  ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4308  }
4309  Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4310  CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4311  Replacement = Address(
4312  CGF.EmitScalarConversion(
4313  Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4314  CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
4315  InRedPrivs[Cnt]->getExprLoc()),
4316  Replacement.getAlignment());
4317  Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4318  InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
4319  [Replacement]() { return Replacement; });
4320  }
4321  }
4322  (void)InRedScope.Privatize();
4323 
4324  CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
4325  UntiedLocalVars);
4326  Action.Enter(CGF);
4327  BodyGen(CGF);
4328  };
4329  llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4330  S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
4331  Data.NumberOfParts);
4332  OMPLexicalScope Scope(*this, S, llvm::None,
4333  !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4334  !isOpenMPSimdDirective(S.getDirectiveKind()));
4335  TaskGen(*this, OutlinedFn, Data);
4336 }
4337 
4338 static ImplicitParamDecl *
4340  QualType Ty, CapturedDecl *CD,
4341  SourceLocation Loc) {
4342  auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4344  auto *OrigRef = DeclRefExpr::Create(
4345  C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
4346  /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4347  auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4349  auto *PrivateRef = DeclRefExpr::Create(
4350  C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
4351  /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4352  QualType ElemType = C.getBaseElementType(Ty);
4353  auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
4355  auto *InitRef = DeclRefExpr::Create(
4356  C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
4357  /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
4358  PrivateVD->setInitStyle(VarDecl::CInit);
4359  PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
4360  InitRef, /*BasePath=*/nullptr,
4362  Data.FirstprivateVars.emplace_back(OrigRef);
4363  Data.FirstprivateCopies.emplace_back(PrivateRef);
4364  Data.FirstprivateInits.emplace_back(InitRef);
4365  return OrigVD;
4366 }
4367 
4369  const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
4370  OMPTargetDataInfo &InputInfo) {
4371  // Emit outlined function for task construct.
4372  const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4373  Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4374  QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4375  auto I = CS->getCapturedDecl()->param_begin();
4376  auto PartId = std::next(I);
4377  auto TaskT = std::next(I, 4);
4378  OMPTaskDataTy Data;
4379  // The task is not final.
4380  Data.Final.setInt(/*IntVal=*/false);
4381  // Get list of firstprivate variables.
4382  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4383  auto IRef = C->varlist_begin();
4384  auto IElemInitRef = C->inits().begin();
4385  for (auto *IInit : C->private_copies()) {
4386  Data.FirstprivateVars.push_back(*IRef);
4387  Data.FirstprivateCopies.push_back(IInit);
4388  Data.FirstprivateInits.push_back(*IElemInitRef);
4389  ++IRef;
4390  ++IElemInitRef;
4391  }
4392  }
4393  OMPPrivateScope TargetScope(*this);
4394  VarDecl *BPVD = nullptr;
4395  VarDecl *PVD = nullptr;
4396  VarDecl *SVD = nullptr;
4397  VarDecl *MVD = nullptr;
4398  if (InputInfo.NumberOfTargetItems > 0) {
4399  auto *CD = CapturedDecl::Create(
4400  getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
4401  llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
4402  QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
4403  getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
4404  /*IndexTypeQuals=*/0);
4406  getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4408  getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4409  QualType SizesType = getContext().getConstantArrayType(
4410  getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
4411  ArrSize, nullptr, ArrayType::Normal,
4412  /*IndexTypeQuals=*/0);
4413  SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
4414  S.getBeginLoc());
4415  TargetScope.addPrivate(
4416  BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
4417  TargetScope.addPrivate(PVD,
4418  [&InputInfo]() { return InputInfo.PointersArray; });
4419  TargetScope.addPrivate(SVD,
4420  [&InputInfo]() { return InputInfo.SizesArray; });
4421  // If there is no user-defined mapper, the mapper array will be nullptr. In
4422  // this case, we don't need to privatize it.
4423  if (!dyn_cast_or_null<llvm::ConstantPointerNull>(
4424  InputInfo.MappersArray.getPointer())) {
4426  getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4427  TargetScope.addPrivate(MVD,
4428  [&InputInfo]() { return InputInfo.MappersArray; });
4429  }
4430  }
4431  (void)TargetScope.Privatize();
4432  // Build list of dependences.
4433  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4435  Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4436  DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4437  }
4438  auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
4439  &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
4440  // Set proper addresses for generated private copies.
4441  OMPPrivateScope Scope(CGF);
4442  if (!Data.FirstprivateVars.empty()) {
4443  enum { PrivatesParam = 2, CopyFnParam = 3 };
4444  llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4445  CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4446  llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4447  CS->getCapturedDecl()->getParam(PrivatesParam)));
4448  // Map privates.
4452  CallArgs.push_back(PrivatesPtr);
4453  ParamTypes.push_back(PrivatesPtr->getType());
4454  for (const Expr *E : Data.FirstprivateVars) {
4455  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4456  Address PrivatePtr =
4457  CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4458  ".firstpriv.ptr.addr");
4459  PrivatePtrs.emplace_back(VD, PrivatePtr);
4460  CallArgs.push_back(PrivatePtr.getPointer());
4461  ParamTypes.push_back(PrivatePtr.getType());
4462  }
4463  auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4464  ParamTypes, /*isVarArg=*/false);
4465  CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4466  CopyFn, CopyFnTy->getPointerTo());
4467  CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4468  CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4469  for (const auto &Pair : PrivatePtrs) {
4470  Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4471  CGF.getContext().getDeclAlign(Pair.first));
4472  Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4473  }
4474  }
4475  // Privatize all private variables except for in_reduction items.
4476  (void)Scope.Privatize();
4477  if (InputInfo.NumberOfTargetItems > 0) {
4478  InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
4479  CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
4480  InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
4481  CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
4482  InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
4483  CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
4484  // If MVD is nullptr, the mapper array is not privatized
4485  if (MVD)
4486  InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
4487  CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
4488  }
4489 
4490  Action.Enter(CGF);
4491  OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
4492  BodyGen(CGF);
4493  };
4494  llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4495  S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
4496  Data.NumberOfParts);
4497  llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
4498  IntegerLiteral IfCond(getContext(), TrueOrFalse,
4499  getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
4500  SourceLocation());
4501 
4502  CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
4503  SharedsTy, CapturedStruct, &IfCond, Data);
4504 }
4505 
4507  // Emit outlined function for task construct.
4508  const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4509  Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4510  QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4511  const Expr *IfCond = nullptr;
4512  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4513  if (C->getNameModifier() == OMPD_unknown ||
4514  C->getNameModifier() == OMPD_task) {
4515  IfCond = C->getCondition();
4516  break;
4517  }
4518  }
4519 
4520  OMPTaskDataTy Data;
4521  // Check if we should emit tied or untied task.
4522  Data.Tied = !S.getSingleClause<OMPUntiedClause>();
4523  auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
4524  CGF.EmitStmt(CS->getCapturedStmt());
4525  };
4526  auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4527  IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
4528  const OMPTaskDataTy &Data) {
4529  CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
4530  SharedsTy, CapturedStruct, IfCond,
4531  Data);
4532  };
4533  auto LPCRegion =
4535  EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
4536 }
4537 
4539  const OMPTaskyieldDirective &S) {
4540  CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
4541 }
4542 
4544  CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
4545 }
4546 
4548  CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc());
4549 }
4550 
4552  const OMPTaskgroupDirective &S) {
4553  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4554  Action.Enter(CGF);
4555  if (const Expr *E = S.getReductionRef()) {
4558  OMPTaskDataTy Data;
4559  for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
4560  Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4561  Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4562  Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4563  Data.ReductionOps.append(C->reduction_ops().begin(),
4564  C->reduction_ops().end());
4565  LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4566  RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4567  }
4568  llvm::Value *ReductionDesc =
4569  CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
4570  LHSs, RHSs, Data);
4571  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4572  CGF.EmitVarDecl(*VD);
4573  CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
4574  /*Volatile=*/false, E->getType());
4575  }
4576  CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4577  };
4578  OMPLexicalScope Scope(*this, S, OMPD_unknown);
4579  CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
4580 }
4581 
4583  llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
4584  ? llvm::AtomicOrdering::NotAtomic
4585  : llvm::AtomicOrdering::AcquireRelease;
4586  CGM.getOpenMPRuntime().emitFlush(
4587  *this,
4588  [&S]() -> ArrayRef<const Expr *> {
4589  if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
4590  return llvm::makeArrayRef(FlushClause->varlist_begin(),
4591  FlushClause->varlist_end());
4592  return llvm::None;
4593  }(),
4594  S.getBeginLoc(), AO);
4595 }
4596 
4598  const auto *DO = S.getSingleClause<OMPDepobjClause>();
4599  LValue DOLVal = EmitLValue(DO->getDepobj());
4600  if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
4601  OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
4602  DC->getModifier());
4603  Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
4604  Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
4605  *this, Dependencies, DC->getBeginLoc());
4606  EmitStoreOfScalar(DepAddr.getPointer(), DOLVal);
4607  return;
4608  }
4609  if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
4610  CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
4611  return;
4612  }
4613  if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
4614  CGM.getOpenMPRuntime().emitUpdateClause(
4615  *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
4616  return;
4617  }
4618 }
4619 
4621  if (!OMPParentLoopDirectiveForScan)
4622  return;
4623  const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
4624  bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
4629  SmallVector<const Expr *, 4> ReductionOps;
4631  SmallVector<const Expr *, 4> CopyArrayTemps;
4632  SmallVector<const Expr *, 4> CopyArrayElems;
4633  for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
4634  if (C->getModifier() != OMPC_REDUCTION_inscan)
4635  continue;
4636  Shareds.append(C->varlist_begin(), C->varlist_end());
4637  Privates.append(C->privates().begin(), C->privates().end());
4638  LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4639  RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4640  ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
4641  CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
4642  CopyArrayTemps.append(C->copy_array_temps().begin(),
4643  C->copy_array_temps().end());
4644  CopyArrayElems.append(C->copy_array_elems().begin(),
4645  C->copy_array_elems().end());
4646  }
4647  if (ParentDir.getDirectiveKind() == OMPD_simd ||
4648  (getLangOpts().OpenMPSimd &&
4649  isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
4650  // For simd directive and simd-based directives in simd only mode, use the
4651  // following codegen:
4652  // int x = 0;
4653  // #pragma omp simd reduction(inscan, +: x)
4654  // for (..) {
4655  // <first part>
4656  // #pragma omp scan inclusive(x)
4657  // <second part>
4658  // }
4659  // is transformed to:
4660  // int x = 0;
4661  // for (..) {
4662  // int x_priv = 0;
4663  // <first part>
4664  // x = x_priv + x;
4665  // x_priv = x;
4666  // <second part>
4667  // }
4668  // and
4669  // int x = 0;
4670  // #pragma omp simd reduction(inscan, +: x)
4671  // for (..) {
4672  // <first part>
4673  // #pragma omp scan exclusive(x)
4674  // <second part>
4675  // }
4676  // to
4677  // int x = 0;
4678  // for (..) {
4679  // int x_priv = 0;
4680  // <second part>
4681  // int temp = x;
4682  // x = x_priv + x;
4683  // x_priv = temp;
4684  // <first part>
4685  // }
4686  llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
4687  EmitBranch(IsInclusive
4688  ? OMPScanReduce
4689  : BreakContinueStack.back().ContinueBlock.getBlock());
4690  EmitBlock(OMPScanDispatch);
4691  {
4692  // New scope for correct construction/destruction of temp variables for
4693  // exclusive scan.
4694  LexicalScope Scope(*this, S.getSourceRange());
4695  EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
4696  EmitBlock(OMPScanReduce);
4697  if (!IsInclusive) {
4698  // Create temp var and copy LHS value to this temp value.
4699  // TMP = LHS;
4700  for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4701  const Expr *PrivateExpr = Privates[I];
4702  const Expr *TempExpr = CopyArrayTemps[I];
4703  EmitAutoVarDecl(
4704  *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
4705  LValue DestLVal = EmitLValue(TempExpr);
4706  LValue SrcLVal = EmitLValue(LHSs[I]);
4707  EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4708  SrcLVal.getAddress(*this),
4709  cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4710  cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4711  CopyOps[I]);
4712  }
4713  }
4714  CGM.getOpenMPRuntime().emitReduction(
4715  *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
4716  {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
4717  for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4718  const Expr *PrivateExpr = Privates[I];
4719  LValue DestLVal;
4720  LValue SrcLVal;
4721  if (IsInclusive) {
4722  DestLVal = EmitLValue(RHSs[I]);
4723  SrcLVal = EmitLValue(LHSs[I]);
4724  } else {
4725  const Expr *TempExpr = CopyArrayTemps[I];
4726  DestLVal = EmitLValue(RHSs[I]);
4727  SrcLVal = EmitLValue(TempExpr);
4728  }
4729  EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4730  SrcLVal.getAddress(*this),
4731  cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4732  cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4733  CopyOps[I]);
4734  }
4735  }
4736  EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
4737  OMPScanExitBlock = IsInclusive
4738  ? BreakContinueStack.back().ContinueBlock.getBlock()
4739  : OMPScanReduce;
4740  EmitBlock(OMPAfterScanBlock);
4741  return;
4742  }
4743  if (!IsInclusive) {
4744  EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4745  EmitBlock(OMPScanExitBlock);
4746  }
4747  if (OMPFirstScanLoop) {
4748  // Emit buffer[i] = red; at the end of the input phase.
4749  const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
4750  .getIterationVariable()
4751  ->IgnoreParenImpCasts();
4752  LValue IdxLVal = EmitLValue(IVExpr);
4753  llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
4754  IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
4755  for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4756  const Expr *PrivateExpr = Privates[I];
4757  const Expr *OrigExpr = Shareds[I];
4758  const Expr *CopyArrayElem = CopyArrayElems[I];
4759  OpaqueValueMapping IdxMapping(
4760  *this,
4761  cast<OpaqueValueExpr>(
4762  cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
4763  RValue::get(IdxVal));
4764  LValue DestLVal = EmitLValue(CopyArrayElem);
4765  LValue SrcLVal = EmitLValue(OrigExpr);
4766  EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4767  SrcLVal.getAddress(*this),
4768  cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4769  cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4770  CopyOps[I]);
4771  }
4772  }
4773  EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4774  if (IsInclusive) {
4775  EmitBlock(OMPScanExitBlock);
4776  EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4777  }
4778  EmitBlock(OMPScanDispatch);
4779  if (!OMPFirstScanLoop) {
4780  // Emit red = buffer[i]; at the entrance to the scan phase.
4781  const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
4782  .getIterationVariable()
4783  ->IgnoreParenImpCasts();
4784  LValue IdxLVal = EmitLValue(IVExpr);
4785  llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
4786  IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
4787  llvm::BasicBlock *ExclusiveExitBB = nullptr;
4788  if (!IsInclusive) {
4789  llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
4790  ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
4791  llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
4792  Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
4793  EmitBlock(ContBB);
4794  // Use idx - 1 iteration for exclusive scan.
4795  IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
4796  }
4797  for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4798  const Expr *PrivateExpr = Privates[I];
4799  const Expr *OrigExpr = Shareds[I];
4800  const Expr *CopyArrayElem = CopyArrayElems[I];
4801  OpaqueValueMapping IdxMapping(
4802  *this,
4803  cast<OpaqueValueExpr>(
4804  cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
4805  RValue::get(IdxVal));
4806  LValue SrcLVal = EmitLValue(CopyArrayElem);
4807  LValue DestLVal = EmitLValue(OrigExpr);
4808  EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4809  SrcLVal.getAddress(*this),
4810  cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4811  cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4812  CopyOps[I]);
4813  }
4814  if (!IsInclusive) {
4815  EmitBlock(ExclusiveExitBB);
4816  }
4817  }
4818  EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
4819  : OMPAfterScanBlock);
4820  EmitBlock(OMPAfterScanBlock);
4821 }
4822 
4824  const CodeGenLoopTy &CodeGenLoop,
4825  Expr *IncExpr) {
4826  // Emit the loop iteration variable.
4827  const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
4828  const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
4829  EmitVarDecl(*IVDecl);
4830 
4831  // Emit the iterations count variable.
4832  // If it is not a variable, Sema decided to calculate iterations count on each
4833  // iteration (e.g., it is foldable into a constant).
4834  if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4835  EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4836  // Emit calculation of the iterations count.
4837  EmitIgnoredExpr(S.getCalcLastIteration());
4838  }
4839 
4840  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
4841 
4842  bool HasLastprivateClause = false;
4843  // Check pre-condition.
4844  {
4845  OMPLoopScope PreInitScope(*this, S);
4846  // Skip the entire loop if we don't meet the precondition.
4847  // If the condition constant folds and can be elided, avoid emitting the
4848  // whole loop.
4849  bool CondConstant;
4850  llvm::BasicBlock *ContBlock = nullptr;
4851  if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4852  if (!CondConstant)
4853  return;
4854  } else {
4855  llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
4856  ContBlock = createBasicBlock("omp.precond.end");
4857  emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
4858  getProfileCount(&S));
4859  EmitBlock(ThenBlock);
4860  incrementProfileCounter(&S);
4861  }
4862 
4863  emitAlignedClause(*this, S);
4864  // Emit 'then' code.
4865  {
4866  // Emit helper vars inits.
4867 
4868  LValue LB = EmitOMPHelperVar(
4869  *this, cast<DeclRefExpr>(
4870  (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4871  ? S.getCombinedLowerBoundVariable()
4872  : S.getLowerBoundVariable())));
4873  LValue UB = EmitOMPHelperVar(
4874  *this, cast<DeclRefExpr>(
4875  (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4876  ? S.getCombinedUpperBoundVariable()
4877  : S.getUpperBoundVariable())));
4878  LValue ST =
4879  EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
4880  LValue IL =
4881  EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
4882 
4883  OMPPrivateScope LoopScope(*this);
4884  if (EmitOMPFirstprivateClause(S, LoopScope)) {
4885  // Emit implicit barrier to synchronize threads and avoid data races
4886  // on initialization of firstprivate variables and post-update of
4887  // lastprivate variables.
4888  CGM.getOpenMPRuntime().emitBarrierCall(
4889  *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4890  /*ForceSimpleCall=*/true);
4891  }
4892  EmitOMPPrivateClause(S, LoopScope);
4893  if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
4894  !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4895  !isOpenMPTeamsDirective(S.getDirectiveKind()))
4896  EmitOMPReductionClauseInit(S, LoopScope);
4897  HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
4898  EmitOMPPrivateLoopCounters(S, LoopScope);
4899  (void)LoopScope.Privatize();
4900  if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4901  CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
4902 
4903  // Detect the distribute schedule kind and chunk.
4904  llvm::Value *Chunk = nullptr;
4906  if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
4907  ScheduleKind = C->getDistScheduleKind();
4908  if (const Expr *Ch = C->getChunkSize()) {
4909  Chunk = EmitScalarExpr(Ch);
4910  Chunk = EmitScalarConversion(Chunk, Ch->getType(),
4911  S.getIterationVariable()->getType(),
4912  S.getBeginLoc());
4913  }
4914  } else {
4915  // Default behaviour for dist_schedule clause.
4916  CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
4917  *this, S, ScheduleKind, Chunk);
4918  }
4919  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
4920  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
4921 
4922  // OpenMP [2.10.8, distribute Construct, Description]
4923  // If dist_schedule is specified, kind must be static. If specified,
4924  // iterations are divided into chunks of size chunk_size, chunks are
4925  // assigned to the teams of the league in a round-robin fashion in the
4926  // order of the team number. When no chunk_size is specified, the
4927  // iteration space is divided into chunks that are approximately equal
4928  // in size, and at most one chunk is distributed to each team of the
4929  // league. The size of the chunks is unspecified in this case.
4930  bool StaticChunked = RT.isStaticChunked(
4931  ScheduleKind, /* Chunked */ Chunk != nullptr) &&
4932  isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
4933  if (RT.isStaticNonchunked(ScheduleKind,
4934  /* Chunked */ Chunk != nullptr) ||
4935  StaticChunked) {
4936  CGOpenMPRuntime::StaticRTInput StaticInit(
4937  IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this),
4938  LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
4939  StaticChunked ? Chunk : nullptr);
4940  RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
4941  StaticInit);
4942  JumpDest LoopExit =
4943  getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
4944  // UB = min(UB, GlobalUB);
4945  EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4946  ? S.getCombinedEnsureUpperBound()
4947  : S.getEnsureUpperBound());
4948  // IV = LB;
4949  EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4950  ? S.getCombinedInit()
4951  : S.getInit());
4952 
4953  const Expr *Cond =
4954  isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
4955  ? S.getCombinedCond()
4956  : S.getCond();
4957 
4958  if (StaticChunked)
4959  Cond = S.getCombinedDistCond();
4960 
4961  // For static unchunked schedules generate:
4962  //
4963  // 1. For distribute alone, codegen
4964  // while (idx <= UB) {
4965  // BODY;
4966  // ++idx;
4967  // }
4968  //
4969  // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
4970  // while (idx <= UB) {
4971  // <CodeGen rest of pragma>(LB, UB);
4972  // idx += ST;
4973  // }
4974  //
4975  // For static chunk one schedule generate:
4976  //
4977  // while (IV <= GlobalUB) {
4978  // <CodeGen rest of pragma>(LB, UB);
4979  // LB += ST;
4980  // UB += ST;
4981  // UB = min(UB, GlobalUB);
4982  // IV = LB;
4983  // }
4984  //
4986  *this, S,
4987  [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4988  if (isOpenMPSimdDirective(S.getDirectiveKind()))
4989  CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true);
4990  },
4991  [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
4992  StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
4993  CGF.EmitOMPInnerLoop(
4994  S, LoopScope.requiresCleanups(), Cond, IncExpr,
4995  [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
4996  CodeGenLoop(CGF, S, LoopExit);
4997  },
4998  [&S, StaticChunked](CodeGenFunction &CGF) {
4999  if (StaticChunked) {
5000  CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
5001  CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
5002  CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
5003  CGF.EmitIgnoredExpr(S.getCombinedInit());
5004  }
5005  });
5006  });
5007  EmitBlock(LoopExit.getBlock());
5008  // Tell the runtime we are done.
5009  RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind());
5010  } else {
5011  // Emit the outer loop, which requests its work chunk [LB..UB] from
5012  // runtime and runs the inner loop to process it.
5013  const OMPLoopArguments LoopArguments = {
5014  LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
5015  IL.getAddress(*this), Chunk};
5016  EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
5017  CodeGenLoop);
5018  }
5019  if (isOpenMPSimdDirective(S.getDirectiveKind())) {
5020  EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
5021  return CGF.Builder.CreateIsNotNull(
5022  CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5023  });
5024  }
5025  if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5026  !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5027  !isOpenMPTeamsDirective(S.getDirectiveKind())) {
5028  EmitOMPReductionClauseFinal(S, OMPD_simd);
5029  // Emit post-update of the reduction variables if IsLastIter != 0.
5031  *this, S, [IL, &S](CodeGenFunction &CGF) {
5032  return CGF.Builder.CreateIsNotNull(
5033  CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5034  });
5035  }
5036  // Emit final copy of the lastprivate variables if IsLastIter != 0.
5037  if (HasLastprivateClause) {
5038  EmitOMPLastprivateClauseFinal(
5039  S, /*NoFinals=*/false,
5040  Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
5041  }
5042  }
5043 
5044  // We're now done with the loop, so jump to the continuation block.
5045  if (ContBlock) {
5046  EmitBranch(ContBlock);
5047  EmitBlock(ContBlock, true);
5048  }
5049  }
5050 }
5051 
5053  const OMPDistributeDirective &S) {
5054  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5056  };
5057  OMPLexicalScope Scope(*this, S, OMPD_unknown);
5058  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
5059 }
5060 
5061 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
5062  const CapturedStmt *S,
5063  SourceLocation Loc) {
5064  CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
5066  CGF.CapturedStmtInfo = &CapStmtInfo;
5067  llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
5068  Fn->setDoesNotRecurse();
5069  return Fn;
5070 }
5071 
5073  if (S.hasClausesOfKind<OMPDependClause>()) {
5074  assert(!S.hasAssociatedStmt() &&
5075  "No associated statement must be in ordered depend construct.");
5076  for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5078  return;
5079  }
5080  const auto *C = S.getSingleClause<OMPSIMDClause>();
5081  auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
5082  PrePostActionTy &Action) {
5083  const CapturedStmt *CS = S.getInnermostCapturedStmt();
5084  if (C) {
5086  CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
5087  llvm::Function *OutlinedFn =
5088  emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
5089  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
5090  OutlinedFn, CapturedVars);
5091  } else {
5092  Action.Enter(CGF);
5093  CGF.EmitStmt(CS->getCapturedStmt());
5094  }
5095  };
5096  OMPLexicalScope Scope(*this, S, OMPD_unknown);
5097  CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
5098 }
5099 
5101  QualType SrcType, QualType DestType,
5102  SourceLocation Loc) {
5103  assert(CGF.hasScalarEvaluationKind(DestType) &&
5104  "DestType must have scalar evaluation kind.");
5105  assert(!Val.isAggregate() && "Must be a scalar or complex.");
5106  return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
5107  DestType, Loc)
5109  Val.getComplexVal(), SrcType, DestType, Loc);
5110 }
5111 
5114  QualType DestType, SourceLocation Loc) {
5115  assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
5116  "DestType must have complex evaluation kind.");
5117  CodeGenFunction::ComplexPairTy ComplexVal;
5118  if (Val.isScalar()) {
5119  // Convert the input element to the element type of the complex.
5120  QualType DestElementType =
5121  DestType->castAs<ComplexType>()->getElementType();
5122  llvm::Value *ScalarVal = CGF.EmitScalarConversion(
5123  Val.getScalarVal(), SrcType, DestElementType, Loc);
5124  ComplexVal = CodeGenFunction::ComplexPairTy(
5125  ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
5126  } else {
5127  assert(Val.isComplex() && "Must be a scalar or complex.");
5128  QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
5129  QualType DestElementType =
5130  DestType->castAs<ComplexType>()->getElementType();
5131  ComplexVal.first = CGF.EmitScalarConversion(
5132  Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
5133  ComplexVal.second = CGF.EmitScalarConversion(
5134  Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
5135  }
5136  return ComplexVal;
5137 }
5138 
5139 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
5140  LValue LVal, RValue RVal) {
5141  if (LVal.isGlobalReg())
5142  CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
5143  else
5144  CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false);
5145 }
5146 
5148  llvm::AtomicOrdering AO, LValue LVal,
5149  SourceLocation Loc) {
5150  if (LVal.isGlobalReg())
5151  return CGF.EmitLoadOfLValue(LVal, Loc);
5152  return CGF.EmitAtomicLoad(
5153  LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO),
5154  LVal.isVolatile());
5155 }
5156 
5158  QualType RValTy, SourceLocation Loc) {
5159  switch (getEvaluationKind(LVal.getType())) {
5160  case TEK_Scalar:
5162  *this, RVal, RValTy, LVal.getType(), Loc)),
5163  LVal);
5164  break;
5165  case TEK_Complex:
5167  convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
5168  /*isInit=*/false);
5169  break;
5170  case TEK_Aggregate:
5171  llvm_unreachable("Must be a scalar or complex.");
5172  }
5173 }
5174 
5175 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
5176  const Expr *X, const Expr *V,
5177  SourceLocation Loc) {
5178  // v = x;
5179  assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
5180  assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
5181  LValue XLValue = CGF.EmitLValue(X);
5182  LValue VLValue = CGF.EmitLValue(V);
5183  RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc);
5184  // OpenMP, 2.17.7, atomic Construct
5185  // If the read or capture clause is specified and the acquire, acq_rel, or
5186  // seq_cst clause is specified then the strong flush on exit from the atomic
5187  // operation is also an acquire flush.
5188  switch (AO) {
5189  case llvm::AtomicOrdering::Acquire:
5190  case llvm::AtomicOrdering::AcquireRelease:
5191  case llvm::AtomicOrdering::SequentiallyConsistent:
5192  CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5193  llvm::AtomicOrdering::Acquire);
5194  break;
5195  case llvm::AtomicOrdering::Monotonic:
5196  case llvm::AtomicOrdering::Release:
5197  break;
5198  case llvm::AtomicOrdering::NotAtomic:
5199  case llvm::AtomicOrdering::Unordered:
5200  llvm_unreachable("Unexpected ordering.");
5201  }
5202  CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
5204 }
5205 
5207  llvm::AtomicOrdering AO, const Expr *X,
5208  const Expr *E, SourceLocation Loc) {
5209  // x = expr;
5210  assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
5211  emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
5213  // OpenMP, 2.17.7, atomic Construct
5214  // If the write, update, or capture clause is specified and the release,
5215  // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5216  // the atomic operation is also a release flush.
5217  switch (AO) {
5218  case llvm::AtomicOrdering::Release:
5219  case llvm::AtomicOrdering::AcquireRelease:
5220  case llvm::AtomicOrdering::SequentiallyConsistent:
5221  CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5222  llvm::AtomicOrdering::Release);
5223  break;
5224  case llvm::AtomicOrdering::Acquire:
5225  case llvm::AtomicOrdering::Monotonic:
5226  break;
5227  case llvm::AtomicOrdering::NotAtomic:
5228  case llvm::AtomicOrdering::Unordered:
5229  llvm_unreachable("Unexpected ordering.");
5230  }
5231 }
5232 
5233 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
5234  RValue Update,
5235  BinaryOperatorKind BO,
5236  llvm::AtomicOrdering AO,
5237  bool IsXLHSInRHSPart) {
5238  ASTContext &Context = CGF.getContext();
5239  // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
5240  // expression is simple and atomic is allowed for the given type for the
5241  // target platform.
5242  if (BO == BO_Comma || !Update.isScalar() ||
5243  !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() ||
5244  (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
5245  (Update.getScalarVal()->getType() !=
5246  X.getAddress(CGF).getElementType())) ||
5247  !X.getAddress(CGF).getElementType()->isIntegerTy() ||
5248  !Context.getTargetInfo().hasBuiltinAtomic(
5249  Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
5250  return std::make_pair(false, RValue::get(nullptr));
5251 
5252  llvm::AtomicRMWInst::BinOp RMWOp;
5253  switch (BO) {
5254  case BO_Add:
5255  RMWOp = llvm::AtomicRMWInst::Add;
5256  break;
5257  case BO_Sub:
5258  if (!IsXLHSInRHSPart)
5259  return std::make_pair(false, RValue::get(nullptr));
5260  RMWOp = llvm::AtomicRMWInst::Sub;
5261  break;
5262  case BO_And:
5263  RMWOp = llvm::AtomicRMWInst::And;
5264  break;
5265  case BO_Or:
5266  RMWOp = llvm::AtomicRMWInst::Or;
5267  break;
5268  case BO_Xor:
5269  RMWOp = llvm::AtomicRMWInst::Xor;
5270  break;
5271  case BO_LT:
5272  RMWOp = X.getType()->hasSignedIntegerRepresentation()
5273  ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
5274  : llvm::AtomicRMWInst::Max)
5275  : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
5276  : llvm::AtomicRMWInst::UMax);
5277  break;
5278  case BO_GT:
5279  RMWOp = X.getType()->hasSignedIntegerRepresentation()
5280  ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
5281  : llvm::AtomicRMWInst::Min)
5282  : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
5283  : llvm::AtomicRMWInst::UMin);
5284  break;
5285  case BO_Assign:
5286  RMWOp = llvm::AtomicRMWInst::Xchg;
5287  break;
5288  case BO_Mul:
5289  case BO_Div:
5290  case BO_Rem:
5291  case BO_Shl:
5292  case BO_Shr:
5293  case BO_LAnd:
5294  case BO_LOr:
5295  return std::make_pair(false, RValue::get(nullptr));
5296  case BO_PtrMemD:
5297  case BO_PtrMemI:
5298  case BO_LE:
5299  case BO_GE:
5300  case BO_EQ:
5301  case BO_NE:
5302  case BO_Cmp:
5303  case BO_AddAssign:
5304  case BO_SubAssign:
5305  case BO_AndAssign:
5306  case BO_OrAssign:
5307  case BO_XorAssign:
5308  case BO_MulAssign:
5309  case BO_DivAssign:
5310  case BO_RemAssign:
5311  case BO_ShlAssign:
5312  case BO_ShrAssign:
5313  case BO_Comma:
5314  llvm_unreachable("Unsupported atomic update operation");
5315  }
5316  llvm::Value *UpdateVal = Update.getScalarVal();
5317  if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
5318  UpdateVal = CGF.Builder.CreateIntCast(
5319  IC, X.getAddress(CGF).getElementType(),
5320  X.getType()->hasSignedIntegerRepresentation());
5321  }
5322  llvm::Value *Res =
5323  CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO);
5324  return std::make_pair(true, RValue::get(Res));
5325 }
5326 
5328  LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
5329  llvm::AtomicOrdering AO, SourceLocation Loc,
5330  const llvm::function_ref<RValue(RValue)> CommonGen) {
5331  // Update expressions are allowed to have the following forms:
5332  // x binop= expr; -> xrval + expr;
5333  // x++, ++x -> xrval + 1;
5334  // x--, --x -> xrval - 1;
5335  // x = x binop expr; -> xrval binop expr
5336  // x = expr Op x; - > expr binop xrval;
5337  auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
5338  if (!Res.first) {
5339  if (X.isGlobalReg()) {
5340  // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
5341  // 'xrval'.
5342  EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
5343  } else {
5344  // Perform compare-and-swap procedure.
5345  EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
5346  }
5347  }
5348  return Res;
5349 }
5350 
5352  llvm::AtomicOrdering AO, const Expr *X,
5353  const Expr *E, const Expr *UE,
5354  bool IsXLHSInRHSPart, SourceLocation Loc) {
5355  assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
5356  "Update expr in 'atomic update' must be a binary operator.");
5357  const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
5358  // Update expressions are allowed to have the following forms:
5359  // x binop= expr; -> xrval + expr;
5360  // x++, ++x -> xrval + 1;
5361  // x--, --x -> xrval - 1;
5362  // x = x binop expr; -> xrval binop expr
5363  // x = expr Op x; - > expr binop xrval;
5364  assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
5365  LValue XLValue = CGF.EmitLValue(X);
5366  RValue ExprRValue = CGF.EmitAnyExpr(E);
5367  const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
5368  const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
5369  const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
5370  const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
5371  auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
5372  CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
5373  CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
5374  return CGF.EmitAnyExpr(UE);
5375  };
5377  XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
5379  // OpenMP, 2.17.7, atomic Construct
5380  // If the write, update, or capture clause is specified and the release,
5381  // acq_rel, or seq_cst clause is specified then the strong flush on entry to
5382  // the atomic operation is also a release flush.
5383  switch (AO) {
5384  case llvm::AtomicOrdering::Release:
5385  case llvm::AtomicOrdering::AcquireRelease:
5386  case llvm::AtomicOrdering::SequentiallyConsistent:
5387  CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
5388  llvm::AtomicOrdering::Release);
5389  break;
5390  case llvm::AtomicOrdering::Acquire:
5391  case llvm::AtomicOrdering::Monotonic:
5392  break;
5393  case llvm::AtomicOrdering::NotAtomic:
5394  case llvm::AtomicOrdering::Unordered:
5395  llvm_unreachable("Unexpected ordering.");
5396  }
5397 }
5398 
5400  QualType SourceType, QualType ResType,
5401  SourceLocation Loc) {
5402  switch (CGF.getEvaluationKind(ResType)) {
5403  case TEK_Scalar:
5404  return RValue::get(
5405  convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
5406  case TEK_Complex: {
5407  auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
5408  return RValue::getComplex(Res.first, Res.second);
5409  }
5410  case TEK_Aggregate:
5411  break;
5412  }
5413  llvm_unreachable("Must be a scalar or complex.");
5414 }
5415 
5417  llvm::AtomicOrdering AO,
5418  bool IsPostfixUpdate, const Expr *V,
5419  const Expr *X, const Expr *E,
5420  const Expr *UE, bool IsXLHSInRHSPart,
5421