clang 23.0.0git
CGStmtOpenMP.cpp
Go to the documentation of this file.
1//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit OpenMP nodes as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGCleanup.h"
14#include "CGDebugInfo.h"
15#include "CGOpenMPRuntime.h"
16#include "CodeGenFunction.h"
17#include "CodeGenModule.h"
18#include "CodeGenPGO.h"
19#include "TargetInfo.h"
21#include "clang/AST/Attr.h"
24#include "clang/AST/Stmt.h"
31#include "llvm/ADT/SmallSet.h"
32#include "llvm/BinaryFormat/Dwarf.h"
33#include "llvm/Frontend/OpenMP/OMPConstants.h"
34#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DebugInfoMetadata.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/IntrinsicInst.h"
39#include "llvm/IR/Metadata.h"
40#include "llvm/Support/AtomicOrdering.h"
41#include "llvm/Support/Debug.h"
42#include <optional>
43using namespace clang;
44using namespace CodeGen;
45using namespace llvm::omp;
46
47#define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
48
49static const VarDecl *getBaseDecl(const Expr *Ref);
52
53/// Whether a combined `distribute parallel for` may use the fused
54/// distr_static_chunk + static_chunkone schedule (enum 93): one
55/// for_static_init, no surrounding distribute_static_init.
57 const OMPLoopDirective &S,
58 OpenMPDirectiveKind DKind) {
59 // Reduction-only for now. Non-reduction cases might follow in the future, but
60 // need more analysis for maximum profit.
61 return CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU() &&
63 S.hasClausesOfKind<OMPReductionClause>() &&
64 !S.getSingleClause<OMPDistScheduleClause>() &&
65 !S.getSingleClause<OMPScheduleClause>() &&
66 !S.getSingleClause<OMPOrderedClause>();
67}
68
69namespace {
70/// Lexical scope for OpenMP executable constructs, that handles correct codegen
71/// for captured expressions.
72class OMPLexicalScope : public CodeGenFunction::LexicalScope {
73 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
74 for (const auto *C : S.clauses()) {
75 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
76 if (const auto *PreInit =
77 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
78 for (const auto *I : PreInit->decls()) {
79 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
81 } else {
82 CodeGenFunction::AutoVarEmission Emission =
84 CGF.EmitAutoVarCleanups(Emission);
85 }
86 }
87 }
88 }
89 }
90 }
91 CodeGenFunction::OMPPrivateScope InlinedShareds;
92
93 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
94 return CGF.LambdaCaptureFields.lookup(VD) ||
95 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
96 (isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl) &&
97 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
98 }
99
100public:
101 OMPLexicalScope(
102 CodeGenFunction &CGF, const OMPExecutableDirective &S,
103 const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt,
104 const bool EmitPreInitStmt = true)
105 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
106 InlinedShareds(CGF) {
107 if (EmitPreInitStmt)
108 emitPreInitStmt(CGF, S);
109 if (!CapturedRegion)
110 return;
111 assert(S.hasAssociatedStmt() &&
112 "Expected associated statement for inlined directive.");
113 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
114 for (const auto &C : CS->captures()) {
115 if (C.capturesVariable() || C.capturesVariableByCopy()) {
116 auto *VD = C.getCapturedVar();
117 assert(VD == VD->getCanonicalDecl() &&
118 "Canonical decl must be captured.");
119 DeclRefExpr DRE(
120 CGF.getContext(), const_cast<VarDecl *>(VD),
121 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
122 InlinedShareds.isGlobalVarCaptured(VD)),
123 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
124 InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
125 }
126 }
127 (void)InlinedShareds.Privatize();
128 }
129};
130
131/// Lexical scope for OpenMP parallel construct, that handles correct codegen
132/// for captured expressions.
133class OMPParallelScope final : public OMPLexicalScope {
134 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
136 return !(isOpenMPTargetExecutionDirective(EKind) ||
139 }
140
141public:
142 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
143 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
144 EmitPreInitStmt(S)) {}
145};
146
147/// Lexical scope for OpenMP teams construct, that handles correct codegen
148/// for captured expressions.
149class OMPTeamsScope final : public OMPLexicalScope {
150 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
152 return !isOpenMPTargetExecutionDirective(EKind) &&
154 }
155
156public:
157 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
158 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
159 EmitPreInitStmt(S)) {}
160};
161
162/// Private scope for OpenMP loop-based directives, that supports capturing
163/// of used expression from loop statement.
164class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
165 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
166 const Stmt *PreInits;
167 CodeGenFunction::OMPMapVars PreCondVars;
168 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
169 // Emit init, __range, __begin and __end variables for C++ range loops.
170 (void)OMPLoopBasedDirective::doForAllLoops(
171 LD->getInnermostCapturedStmt()->getCapturedStmt(),
172 /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
173 [&CGF](unsigned Cnt, const Stmt *CurStmt) {
174 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) {
175 if (const Stmt *Init = CXXFor->getInit())
176 CGF.EmitStmt(Init);
177 CGF.EmitStmt(CXXFor->getRangeStmt());
178 CGF.EmitStmt(CXXFor->getBeginStmt());
179 CGF.EmitStmt(CXXFor->getEndStmt());
180 }
181 return false;
182 });
183 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
184 for (const auto *E : LD->counters()) {
185 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
186 EmittedAsPrivate.insert(VD->getCanonicalDecl());
187 (void)PreCondVars.setVarAddr(
188 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
189 }
190 // Mark private vars as undefs.
191 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
192 for (const Expr *IRef : C->varlist()) {
193 const auto *OrigVD =
194 cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
195 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
196 QualType OrigVDTy = OrigVD->getType().getNonReferenceType();
197 (void)PreCondVars.setVarAddr(
198 CGF, OrigVD,
199 Address(llvm::UndefValue::get(CGF.ConvertTypeForMem(
200 CGF.getContext().getPointerType(OrigVDTy))),
201 CGF.ConvertTypeForMem(OrigVDTy),
202 CGF.getContext().getDeclAlign(OrigVD)));
203 }
204 }
205 }
206 (void)PreCondVars.apply(CGF);
207 PreInits = LD->getPreInits();
208 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) {
209 PreInits = Tile->getPreInits();
210 } else if (const auto *Stripe = dyn_cast<OMPStripeDirective>(&S)) {
211 PreInits = Stripe->getPreInits();
212 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) {
213 PreInits = Unroll->getPreInits();
214 } else if (const auto *Reverse = dyn_cast<OMPReverseDirective>(&S)) {
215 PreInits = Reverse->getPreInits();
216 } else if (const auto *Split = dyn_cast<OMPSplitDirective>(&S)) {
217 PreInits = Split->getPreInits();
218 } else if (const auto *Interchange =
219 dyn_cast<OMPInterchangeDirective>(&S)) {
220 PreInits = Interchange->getPreInits();
221 } else {
222 llvm_unreachable("Unknown loop-based directive kind.");
223 }
224 doEmitPreinits(PreInits);
225 PreCondVars.restore(CGF);
226 }
227
228 void
229 emitPreInitStmt(CodeGenFunction &CGF,
231 const Stmt *PreInits;
232 if (const auto *Fuse = dyn_cast<OMPFuseDirective>(&S)) {
233 PreInits = Fuse->getPreInits();
234 } else {
235 llvm_unreachable(
236 "Unknown canonical loop sequence transform directive kind.");
237 }
238 doEmitPreinits(PreInits);
239 }
240
241 void doEmitPreinits(const Stmt *PreInits) {
242 if (PreInits) {
243 // CompoundStmts and DeclStmts are used as lists of PreInit statements and
244 // declarations. Since declarations must be visible in the the following
245 // that they initialize, unpack the CompoundStmt they are nested in.
246 SmallVector<const Stmt *> PreInitStmts;
247 if (auto *PreInitCompound = dyn_cast<CompoundStmt>(PreInits))
248 llvm::append_range(PreInitStmts, PreInitCompound->body());
249 else
250 PreInitStmts.push_back(PreInits);
251
252 for (const Stmt *S : PreInitStmts) {
253 // EmitStmt skips any OMPCapturedExprDecls, but needs to be emitted
254 // here.
255 if (auto *PreInitDecl = dyn_cast<DeclStmt>(S)) {
256 for (Decl *I : PreInitDecl->decls())
257 CGF.EmitVarDecl(cast<VarDecl>(*I));
258 continue;
259 }
260 CGF.EmitStmt(S);
261 }
262 }
263 }
264
265public:
266 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
267 : CodeGenFunction::RunCleanupsScope(CGF) {
268 emitPreInitStmt(CGF, S);
269 }
270 OMPLoopScope(CodeGenFunction &CGF,
272 : CodeGenFunction::RunCleanupsScope(CGF) {
273 emitPreInitStmt(CGF, S);
274 }
275};
276
277class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
278 CodeGenFunction::OMPPrivateScope InlinedShareds;
279
280 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
281 return CGF.LambdaCaptureFields.lookup(VD) ||
282 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
283 (isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl) &&
284 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
285 }
286
287public:
288 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
289 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
290 InlinedShareds(CGF) {
291 for (const auto *C : S.clauses()) {
292 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
293 if (const auto *PreInit =
294 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
295 for (const auto *I : PreInit->decls()) {
296 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
297 CGF.EmitVarDecl(cast<VarDecl>(*I));
298 } else {
299 CodeGenFunction::AutoVarEmission Emission =
300 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
301 CGF.EmitAutoVarCleanups(Emission);
302 }
303 }
304 }
305 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
306 for (const Expr *E : UDP->varlist()) {
307 const Decl *D = cast<DeclRefExpr>(E)->getDecl();
308 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
309 CGF.EmitVarDecl(*OED);
310 }
311 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
312 for (const Expr *E : UDP->varlist()) {
313 const Decl *D = getBaseDecl(E);
314 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
315 CGF.EmitVarDecl(*OED);
316 }
317 }
318 }
320 CGF.EmitOMPPrivateClause(S, InlinedShareds);
321 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
322 if (const Expr *E = TG->getReductionRef())
323 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
324 }
325 // Temp copy arrays for inscan reductions should not be emitted as they are
326 // not used in simd only mode.
327 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
328 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
329 if (C->getModifier() != OMPC_REDUCTION_inscan)
330 continue;
331 for (const Expr *E : C->copy_array_temps())
332 CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl());
333 }
334 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
335 while (CS) {
336 for (auto &C : CS->captures()) {
337 if (C.capturesVariable() || C.capturesVariableByCopy()) {
338 auto *VD = C.getCapturedVar();
339 if (CopyArrayTemps.contains(VD))
340 continue;
341 assert(VD == VD->getCanonicalDecl() &&
342 "Canonical decl must be captured.");
343 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
344 isCapturedVar(CGF, VD) ||
345 (CGF.CapturedStmtInfo &&
346 InlinedShareds.isGlobalVarCaptured(VD)),
348 C.getLocation());
349 InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
350 }
351 }
352 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
353 }
354 (void)InlinedShareds.Privatize();
355 }
356};
357
358} // namespace
359
360// The loop directive with a bind clause will be mapped to a different
361// directive with corresponding semantics.
364 OpenMPDirectiveKind Kind = S.getDirectiveKind();
365 if (Kind != OMPD_loop)
366 return Kind;
367
369 if (const auto *C = S.getSingleClause<OMPBindClause>())
370 BindKind = C->getBindKind();
371
372 switch (BindKind) {
373 case OMPC_BIND_parallel:
374 return OMPD_for;
375 case OMPC_BIND_teams:
376 return OMPD_distribute;
377 case OMPC_BIND_thread:
378 return OMPD_simd;
379 default:
380 return OMPD_loop;
381 }
382}
383
385 const OMPExecutableDirective &S,
386 const RegionCodeGenTy &CodeGen);
387
389 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
390 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
391 OrigVD = OrigVD->getCanonicalDecl();
392 bool IsCaptured =
393 LambdaCaptureFields.lookup(OrigVD) ||
394 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
395 (isa_and_nonnull<BlockDecl>(CurCodeDecl));
396 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
397 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
398 return EmitLValue(&DRE);
399 }
400 }
401 return EmitLValue(E);
402}
403
406 llvm::Value *Size = nullptr;
407 auto SizeInChars = C.getTypeSizeInChars(Ty);
408 if (SizeInChars.isZero()) {
409 // getTypeSizeInChars() returns 0 for a VLA.
410 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
411 VlaSizePair VlaSize = getVLASize(VAT);
412 Ty = VlaSize.Type;
413 Size =
414 Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts;
415 }
416 SizeInChars = C.getTypeSizeInChars(Ty);
417 if (SizeInChars.isZero())
418 return llvm::ConstantInt::get(SizeTy, /*V=*/0);
419 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
420 }
421 return CGM.getSize(SizeInChars);
422}
423
425 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
426 const RecordDecl *RD = S.getCapturedRecordDecl();
427 auto CurField = RD->field_begin();
428 auto CurCap = S.captures().begin();
430 E = S.capture_init_end();
431 I != E; ++I, ++CurField, ++CurCap) {
432 if (CurField->hasCapturedVLAType()) {
433 const VariableArrayType *VAT = CurField->getCapturedVLAType();
434 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
435 CapturedVars.push_back(Val);
436 } else if (CurCap->capturesThis()) {
437 CapturedVars.push_back(CXXThisValue);
438 } else if (CurCap->capturesVariableByCopy()) {
439 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
440
441 // If the field is not a pointer, we need to save the actual value
442 // and load it as a void pointer.
443 if (!CurField->getType()->isAnyPointerType()) {
444 ASTContext &Ctx = getContext();
446 Ctx.getUIntPtrType(),
447 Twine(CurCap->getCapturedVar()->getName(), ".casted"));
448 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
449
450 llvm::Value *SrcAddrVal = EmitScalarConversion(
451 DstAddr.emitRawPointer(*this),
453 Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
454 LValue SrcLV =
455 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
456
457 // Store the value using the source type pointer.
459
460 // Load the value using the destination type pointer.
461 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
462 }
463 CapturedVars.push_back(CV);
464 } else {
465 assert(CurCap->capturesVariable() && "Expected capture by reference.");
466 CapturedVars.push_back(EmitLValue(*I).getAddress().emitRawPointer(*this));
467 }
468 }
469}
470
472 QualType DstType, StringRef Name,
473 LValue AddrLV) {
474 ASTContext &Ctx = CGF.getContext();
475
476 llvm::Value *CastedPtr = CGF.EmitScalarConversion(
477 AddrLV.getAddress().emitRawPointer(CGF), Ctx.getUIntPtrType(),
478 Ctx.getPointerType(DstType), Loc);
479 // FIXME: should the pointee type (DstType) be passed?
480 Address TmpAddr =
481 CGF.MakeNaturalAlignAddrLValue(CastedPtr, DstType).getAddress();
482 return TmpAddr;
483}
484
486 if (T->isLValueReferenceType())
487 return C.getLValueReferenceType(
488 getCanonicalParamType(C, T.getNonReferenceType()),
489 /*SpelledAsLValue=*/false);
490 if (T->isPointerType())
491 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
492 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
493 if (const auto *VLA = dyn_cast<VariableArrayType>(A))
494 return getCanonicalParamType(C, VLA->getElementType());
495 if (!A->isVariablyModifiedType())
496 return C.getCanonicalType(T);
497 }
498 return C.getCanonicalParamType(T);
499}
500
501namespace {
502/// Contains required data for proper outlined function codegen.
503struct FunctionOptions {
504 /// Captured statement for which the function is generated.
505 const CapturedStmt *S = nullptr;
506 /// true if cast to/from UIntPtr is required for variables captured by
507 /// value.
508 const bool UIntPtrCastRequired = true;
509 /// true if only casted arguments must be registered as local args or VLA
510 /// sizes.
511 const bool RegisterCastedArgsOnly = false;
512 /// Name of the generated function.
513 const StringRef FunctionName;
514 /// Location of the non-debug version of the outlined function.
515 SourceLocation Loc;
516 const bool IsDeviceKernel = false;
517 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
518 bool RegisterCastedArgsOnly, StringRef FunctionName,
519 SourceLocation Loc, bool IsDeviceKernel)
520 : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
521 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
522 FunctionName(FunctionName), Loc(Loc), IsDeviceKernel(IsDeviceKernel) {}
523};
524} // namespace
525
526static llvm::Function *emitOutlinedFunctionPrologue(
528 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
529 &LocalAddrs,
530 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
531 &VLASizes,
532 llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
533 const CapturedDecl *CD = FO.S->getCapturedDecl();
534 const RecordDecl *RD = FO.S->getCapturedRecordDecl();
535 assert(CD->hasBody() && "missing CapturedDecl body");
536
537 CXXThisValue = nullptr;
538 // Build the argument list.
539 CodeGenModule &CGM = CGF.CGM;
540 ASTContext &Ctx = CGM.getContext();
541 FunctionArgList TargetArgs;
542 Args.append(CD->param_begin(),
543 std::next(CD->param_begin(), CD->getContextParamPosition()));
544 TargetArgs.append(
545 CD->param_begin(),
546 std::next(CD->param_begin(), CD->getContextParamPosition()));
547 auto I = FO.S->captures().begin();
548 FunctionDecl *DebugFunctionDecl = nullptr;
549 if (!FO.UIntPtrCastRequired) {
551 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, {}, EPI);
552 DebugFunctionDecl = FunctionDecl::Create(
553 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
554 SourceLocation(), DeclarationName(), FunctionTy,
555 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
556 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
557 /*hasWrittenPrototype=*/false);
558 }
559 for (const FieldDecl *FD : RD->fields()) {
560 QualType ArgType = FD->getType();
561 IdentifierInfo *II = nullptr;
562 VarDecl *CapVar = nullptr;
563
564 // If this is a capture by copy and the type is not a pointer, the outlined
565 // function argument type should be uintptr and the value properly casted to
566 // uintptr. This is necessary given that the runtime library is only able to
567 // deal with pointers. We can pass in the same way the VLA type sizes to the
568 // outlined function.
569 if (FO.UIntPtrCastRequired &&
570 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
571 I->capturesVariableArrayType()))
572 ArgType = Ctx.getUIntPtrType();
573
574 if (I->capturesVariable() || I->capturesVariableByCopy()) {
575 CapVar = I->getCapturedVar();
576 II = CapVar->getIdentifier();
577 } else if (I->capturesThis()) {
578 II = &Ctx.Idents.get("this");
579 } else {
580 assert(I->capturesVariableArrayType());
581 II = &Ctx.Idents.get("vla");
582 }
583 if (ArgType->isVariablyModifiedType())
584 ArgType = getCanonicalParamType(Ctx, ArgType);
585 VarDecl *Arg;
586 if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) {
587 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
588 II, ArgType,
590 } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
592 Ctx, DebugFunctionDecl,
593 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
594 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
595 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
596 } else {
597 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
598 II, ArgType, ImplicitParamKind::Other);
599 }
600 Args.emplace_back(Arg);
601 // Do not cast arguments if we emit function with non-original types.
602 TargetArgs.emplace_back(
603 FO.UIntPtrCastRequired
604 ? Arg
605 : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
606 ++I;
607 }
608 Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
609 CD->param_end());
610 TargetArgs.append(
611 std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
612 CD->param_end());
613
614 // Create the function declaration.
615 const CGFunctionInfo &FuncInfo =
616 FO.IsDeviceKernel
618 TargetArgs)
620 TargetArgs);
621 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
622
623 auto *F =
624 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
625 FO.FunctionName, &CGM.getModule());
626 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
627
628 // Adjust the calling convention for SPIR-V targets to avoid mismatches
629 // between callee and caller.
630 if (CGM.getTriple().isSPIRV() && !FO.IsDeviceKernel)
631 F->setCallingConv(llvm::CallingConv::SPIR_FUNC);
632
633 if (CD->isNothrow())
634 F->setDoesNotThrow();
635 F->setDoesNotRecurse();
636
637 // Always inline the outlined function if optimizations are enabled.
638 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
639 F->removeFnAttr(llvm::Attribute::NoInline);
640 F->addFnAttr(llvm::Attribute::AlwaysInline);
641 }
642 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
643 F->addFnAttr("sample-profile-suffix-elision-policy", "selected");
644
645 // Generate the function.
646 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
647 FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
648 FO.UIntPtrCastRequired ? FO.Loc
649 : CD->getBody()->getBeginLoc());
650 unsigned Cnt = CD->getContextParamPosition();
651 I = FO.S->captures().begin();
652 for (const FieldDecl *FD : RD->fields()) {
653 // Do not map arguments if we emit function with non-original types.
654 Address LocalAddr(Address::invalid());
655 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
656 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
657 TargetArgs[Cnt]);
658 } else {
659 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
660 }
661 // If we are capturing a pointer by copy we don't need to do anything, just
662 // use the value that we get from the arguments.
663 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
664 const VarDecl *CurVD = I->getCapturedVar();
665 if (!FO.RegisterCastedArgsOnly)
666 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
667 ++Cnt;
668 ++I;
669 continue;
670 }
671
672 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
674 if (FD->hasCapturedVLAType()) {
675 if (FO.UIntPtrCastRequired) {
676 ArgLVal = CGF.MakeAddrLValue(
677 castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
678 Args[Cnt]->getName(), ArgLVal),
680 }
681 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
682 const VariableArrayType *VAT = FD->getCapturedVLAType();
683 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
684 } else if (I->capturesVariable()) {
685 const VarDecl *Var = I->getCapturedVar();
686 QualType VarTy = Var->getType();
687 Address ArgAddr = ArgLVal.getAddress();
688 if (ArgLVal.getType()->isLValueReferenceType()) {
689 ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
690 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
691 assert(ArgLVal.getType()->isPointerType());
692 ArgAddr = CGF.EmitLoadOfPointer(
693 ArgAddr, ArgLVal.getType()->castAs<PointerType>());
694 }
695 if (!FO.RegisterCastedArgsOnly) {
696 LocalAddrs.insert(
697 {Args[Cnt], {Var, ArgAddr.withAlignment(Ctx.getDeclAlign(Var))}});
698 }
699 } else if (I->capturesVariableByCopy()) {
700 assert(!FD->getType()->isAnyPointerType() &&
701 "Not expecting a captured pointer.");
702 const VarDecl *Var = I->getCapturedVar();
703 LocalAddrs.insert({Args[Cnt],
704 {Var, FO.UIntPtrCastRequired
706 CGF, I->getLocation(), FD->getType(),
707 Args[Cnt]->getName(), ArgLVal)
708 : ArgLVal.getAddress()}});
709 } else {
710 // If 'this' is captured, load it into CXXThisValue.
711 assert(I->capturesThis());
712 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
713 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
714 }
715 ++Cnt;
716 ++I;
717 }
718
719 return F;
720}
721
724 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
725 &LocalAddrs,
726 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
727 &VLASizes,
728 llvm::Value *&CXXThisValue, llvm::Value *&ContextV, const CapturedStmt &CS,
729 SourceLocation Loc, StringRef FunctionName) {
730 const CapturedDecl *CD = CS.getCapturedDecl();
731 const RecordDecl *RD = CS.getCapturedRecordDecl();
732
733 CXXThisValue = nullptr;
734 CodeGenModule &CGM = CGF.CGM;
735 ASTContext &Ctx = CGM.getContext();
736 Args.push_back(CD->getContextParam());
737
738 const CGFunctionInfo &FuncInfo =
740 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
741
742 auto *F =
743 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
744 FunctionName, &CGM.getModule());
745 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
746 if (CD->isNothrow())
747 F->setDoesNotThrow();
748 F->setDoesNotRecurse();
749
750 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, Loc, Loc);
751 Address ContextAddr = CGF.GetAddrOfLocalVar(CD->getContextParam());
752 ContextV = CGF.Builder.CreateLoad(ContextAddr);
753
754 // The runtime passes arguments as a flat array of promoted intptr_t values.
755 llvm::Type *IntPtrTy = CGF.IntPtrTy;
756 llvm::Type *PtrTy = CGF.Builder.getPtrTy();
757 llvm::Align PtrAlign = CGM.getDataLayout().getPointerABIAlignment(0);
758 CharUnits SlotAlign = CharUnits::fromQuantity(PtrAlign.value());
759
760 for (auto [FD, C, FieldIdx] :
761 llvm::zip(RD->fields(), CS.captures(),
762 llvm::seq<unsigned>(RD->getNumFields()))) {
763 llvm::Value *Slot =
764 CGF.Builder.CreateConstInBoundsGEP1_32(IntPtrTy, ContextV, FieldIdx);
765
766 // Generate the appropriate load from the GEP into the __context struct.
767 // This includes all of the user arguments as well as the implicit kernel
768 // argument pointer.
769 if (C.capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
770 const VarDecl *CurVD = C.getCapturedVar();
771 Slot->setName(CurVD->getName());
772 Address SlotAddr(Slot, PtrTy, SlotAlign);
773 LocalAddrs.insert({FD, {CurVD, SlotAddr}});
774 } else if (FD->hasCapturedVLAType()) {
775 // VLA size is stored as intptr_t directly in the slot.
776 Address SlotAddr(Slot, CGF.ConvertTypeForMem(FD->getType()), SlotAlign);
777 LValue ArgLVal =
778 CGF.MakeAddrLValue(SlotAddr, FD->getType(), AlignmentSource::Decl);
779 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, C.getLocation());
780 const VariableArrayType *VAT = FD->getCapturedVLAType();
781 VLASizes.try_emplace(FD, VAT->getSizeExpr(), ExprArg);
782 } else if (C.capturesVariable()) {
783 const VarDecl *Var = C.getCapturedVar();
784 QualType VarTy = Var->getType();
785
786 if (VarTy->isVariablyModifiedType() && VarTy->isPointerType()) {
787 Slot->setName(Var->getName() + ".addr");
788 Address SlotAddr(Slot, PtrTy, SlotAlign);
789 LocalAddrs.insert({FD, {Var, SlotAddr}});
790 } else {
791 llvm::Value *VarAddr = CGF.Builder.CreateAlignedLoad(
792 PtrTy, Slot, PtrAlign, Var->getName());
793 LocalAddrs.insert({FD,
794 {Var, Address(VarAddr, CGF.ConvertTypeForMem(VarTy),
795 Ctx.getDeclAlign(Var))}});
796 }
797 } else if (C.capturesVariableByCopy()) {
798 assert(!FD->getType()->isAnyPointerType() &&
799 "Not expecting a captured pointer.");
800 const VarDecl *Var = C.getCapturedVar();
801 QualType FieldTy = FD->getType();
802
803 // Scalar values are promoted and stored directly in the slot.
804 Address SlotAddr(Slot, CGF.ConvertTypeForMem(FieldTy), SlotAlign);
805 Address CopyAddr =
806 CGF.CreateMemTemp(FieldTy, Ctx.getDeclAlign(FD), Var->getName());
807 LValue SrcLVal =
808 CGF.MakeAddrLValue(SlotAddr, FieldTy, AlignmentSource::Decl);
809 LValue CopyLVal =
810 CGF.MakeAddrLValue(CopyAddr, FieldTy, AlignmentSource::Decl);
811
812 RValue ArgRVal = CGF.EmitLoadOfLValue(SrcLVal, C.getLocation());
813 CGF.EmitStoreThroughLValue(ArgRVal, CopyLVal);
814
815 LocalAddrs.insert({FD, {Var, CopyAddr}});
816 } else {
817 assert(C.capturesThis() && "Default case expected to be CXX 'this'");
818 CXXThisValue =
819 CGF.Builder.CreateAlignedLoad(PtrTy, Slot, PtrAlign, "this");
820 Address SlotAddr(Slot, PtrTy, SlotAlign);
821 LocalAddrs.insert({FD, {nullptr, SlotAddr}});
822 }
823 }
824
825 return F;
826}
827
829 const CapturedStmt &S, const OMPExecutableDirective &D) {
830 SourceLocation Loc = D.getBeginLoc();
831 assert(
833 "CapturedStmtInfo should be set when generating the captured function");
834 const CapturedDecl *CD = S.getCapturedDecl();
835 // Build the argument list.
836 bool NeedWrapperFunction =
837 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
838 FunctionArgList Args, WrapperArgs;
839 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs,
840 WrapperLocalAddrs;
841 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes,
842 WrapperVLASizes;
843 SmallString<256> Buffer;
844 llvm::raw_svector_ostream Out(Buffer);
845 Out << CapturedStmtInfo->getHelperName();
847 bool IsDeviceKernel = CGM.getOpenMPRuntime().isGPU() &&
849 D.getCapturedStmt(OMPD_target) == &S;
850 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
851 llvm::Function *WrapperF = nullptr;
852 if (NeedWrapperFunction) {
853 // Emit the final kernel early to allow attributes to be added by the
854 // OpenMPI-IR-Builder.
855 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
856 /*RegisterCastedArgsOnly=*/true,
857 CapturedStmtInfo->getHelperName(), Loc,
858 IsDeviceKernel);
860 WrapperF =
861 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
862 WrapperCGF.CXXThisValue, WrapperFO);
863 Out << "_debug__";
864 }
865 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
866 Out.str(), Loc, !NeedWrapperFunction && IsDeviceKernel);
867 llvm::Function *F = emitOutlinedFunctionPrologue(
868 *this, WrapperArgs, WrapperLocalAddrs, WrapperVLASizes, CXXThisValue, FO);
869 CodeGenFunction::OMPPrivateScope LocalScope(*this);
870 for (const auto &LocalAddrPair : WrapperLocalAddrs) {
871 if (LocalAddrPair.second.first) {
872 LocalScope.addPrivate(LocalAddrPair.second.first,
873 LocalAddrPair.second.second);
874 }
875 }
876 (void)LocalScope.Privatize();
877 for (const auto &VLASizePair : WrapperVLASizes)
878 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
879 PGO->assignRegionCounters(GlobalDecl(CD), F);
880 CapturedStmtInfo->EmitBody(*this, CD->getBody());
881 LocalScope.ForceCleanup();
883 if (!NeedWrapperFunction)
884 return F;
885
886 // Reverse the order.
887 WrapperF->removeFromParent();
888 F->getParent()->getFunctionList().insertAfter(F->getIterator(), WrapperF);
889
891 auto *PI = F->arg_begin();
892 for (const auto *Arg : Args) {
893 llvm::Value *CallArg;
894 auto I = LocalAddrs.find(Arg);
895 if (I != LocalAddrs.end()) {
896 LValue LV = WrapperCGF.MakeAddrLValue(
897 I->second.second,
898 I->second.first ? I->second.first->getType() : Arg->getType(),
900 if (LV.getType()->isAnyComplexType())
901 LV.setAddress(LV.getAddress().withElementType(PI->getType()));
902 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
903 } else {
904 auto EI = VLASizes.find(Arg);
905 if (EI != VLASizes.end()) {
906 CallArg = EI->second.second;
907 } else {
908 LValue LV =
909 WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
911 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
912 }
913 }
914 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
915 ++PI;
916 }
917 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
918 WrapperCGF.FinishFunction();
919 return WrapperF;
920}
921
923 const CapturedStmt &S, const OMPExecutableDirective &D) {
924 SourceLocation Loc = D.getBeginLoc();
925 assert(
927 "CapturedStmtInfo should be set when generating the captured function");
928 const CapturedDecl *CD = S.getCapturedDecl();
929 const RecordDecl *RD = S.getCapturedRecordDecl();
930 StringRef FunctionName = CapturedStmtInfo->getHelperName();
931 bool NeedWrapperFunction =
932 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
933
934 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
935 llvm::Function *WrapperF = nullptr;
936 llvm::Value *WrapperContextV = nullptr;
937 if (NeedWrapperFunction) {
939 FunctionArgList WrapperArgs;
940 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
941 WrapperLocalAddrs;
942 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
943 WrapperVLASizes;
945 WrapperCGF, WrapperArgs, WrapperLocalAddrs, WrapperVLASizes,
946 WrapperCGF.CXXThisValue, WrapperContextV, S, Loc, FunctionName);
947 }
948
949 FunctionArgList Args;
950 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
951 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
952 llvm::Function *F;
953
954 if (NeedWrapperFunction) {
955 SmallString<256> Buffer;
956 llvm::raw_svector_ostream Out(Buffer);
957 Out << FunctionName << "_debug__";
958
959 FunctionOptions FO(&S, /*UIntPtrCastRequired=*/false,
960 /*RegisterCastedArgsOnly=*/false, Out.str(), Loc,
961 /*IsDeviceKernel=*/false);
962 F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, VLASizes,
963 CXXThisValue, FO);
964 } else {
965 llvm::Value *ContextV = nullptr;
966 F = emitOutlinedFunctionPrologueAggregate(*this, Args, LocalAddrs, VLASizes,
967 CXXThisValue, ContextV, S, Loc,
968 FunctionName);
969
970 const RecordDecl *RD = S.getCapturedRecordDecl();
971 unsigned FieldIdx = RD->getNumFields();
972 for (unsigned I = 0; I < CD->getNumParams(); ++I) {
973 const ImplicitParamDecl *Param = CD->getParam(I);
974 if (Param == CD->getContextParam())
975 continue;
976 llvm::Value *ParamAddr = Builder.CreateConstInBoundsGEP1_32(
977 IntPtrTy, ContextV, FieldIdx, Twine(Param->getName()) + ".addr");
978 llvm::Value *ParamVal = Builder.CreateAlignedLoad(
979 Builder.getPtrTy(), ParamAddr,
980 CGM.getDataLayout().getPointerABIAlignment(0), Param->getName());
981 Address ParamLocalAddr =
982 CreateMemTemp(Param->getType(), Param->getName());
983 Builder.CreateStore(ParamVal, ParamLocalAddr);
984 LocalAddrs.insert({Param, {Param, ParamLocalAddr}});
985 ++FieldIdx;
986 }
987 }
988
989 CodeGenFunction::OMPPrivateScope LocalScope(*this);
990 for (const auto &LocalAddrPair : LocalAddrs) {
991 if (LocalAddrPair.second.first)
992 LocalScope.addPrivate(LocalAddrPair.second.first,
993 LocalAddrPair.second.second);
994 }
995 (void)LocalScope.Privatize();
996 for (const auto &VLASizePair : VLASizes)
997 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
998 PGO->assignRegionCounters(GlobalDecl(CD), F);
999 CapturedStmtInfo->EmitBody(*this, CD->getBody());
1000 (void)LocalScope.ForceCleanup();
1002
1003 if (!NeedWrapperFunction)
1004 return F;
1005
1006 // Reverse the order.
1007 WrapperF->removeFromParent();
1008 F->getParent()->getFunctionList().insertAfter(F->getIterator(), WrapperF);
1009
1010 llvm::Align PtrAlign = CGM.getDataLayout().getPointerABIAlignment(0);
1012 assert(CD->getContextParamPosition() == 0 &&
1013 "Expected context param at position 0 for target regions");
1014 assert(RD->getNumFields() + 1 == F->getNumOperands() &&
1015 "Argument count mismatch");
1016
1017 for (auto [FD, InnerParam, SlotIdx] : llvm::zip(
1018 RD->fields(), F->args(), llvm::seq<unsigned>(RD->getNumFields()))) {
1019 llvm::Value *Slot = WrapperCGF.Builder.CreateConstInBoundsGEP1_32(
1020 WrapperCGF.IntPtrTy, WrapperContextV, SlotIdx);
1021 llvm::Value *Val = WrapperCGF.Builder.CreateAlignedLoad(
1022 InnerParam.getType(), Slot, PtrAlign, InnerParam.getName());
1023 CallArgs.push_back(Val);
1024 }
1025
1026 // Handle the load from the implicit dyn_ptr at the end of the __context.
1027 unsigned SlotIdx = RD->getNumFields();
1028 auto InnerParam = F->arg_begin() + SlotIdx;
1029 llvm::Value *Slot = WrapperCGF.Builder.CreateConstInBoundsGEP1_32(
1030 WrapperCGF.IntPtrTy, WrapperContextV, SlotIdx);
1031 llvm::Value *Val = WrapperCGF.Builder.CreateAlignedLoad(
1032 InnerParam->getType(), Slot, PtrAlign, InnerParam->getName());
1033 CallArgs.push_back(Val);
1034
1035 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
1036 WrapperCGF.FinishFunction();
1037 return WrapperF;
1038}
1039
1040//===----------------------------------------------------------------------===//
1041// OpenMP Directive Emission
1042//===----------------------------------------------------------------------===//
1044 Address DestAddr, Address SrcAddr, QualType OriginalType,
1045 const llvm::function_ref<void(Address, Address)> CopyGen) {
1046 // Perform element-by-element initialization.
1047 QualType ElementTy;
1048
1049 // Drill down to the base element type on both arrays.
1050 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
1051 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
1052 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
1053
1054 llvm::Value *SrcBegin = SrcAddr.emitRawPointer(*this);
1055 llvm::Value *DestBegin = DestAddr.emitRawPointer(*this);
1056 // Cast from pointer to array type to pointer to single element.
1057 llvm::Value *DestEnd = Builder.CreateInBoundsGEP(DestAddr.getElementType(),
1058 DestBegin, NumElements);
1059
1060 // The basic structure here is a while-do loop.
1061 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
1062 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
1063 llvm::Value *IsEmpty =
1064 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
1065 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
1066
1067 // Enter the loop body, making that address the current address.
1068 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
1069 EmitBlock(BodyBB);
1070
1071 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
1072
1073 llvm::PHINode *SrcElementPHI =
1074 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
1075 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
1076 Address SrcElementCurrent =
1077 Address(SrcElementPHI, SrcAddr.getElementType(),
1078 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
1079
1080 llvm::PHINode *DestElementPHI = Builder.CreatePHI(
1081 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
1082 DestElementPHI->addIncoming(DestBegin, EntryBB);
1083 Address DestElementCurrent =
1084 Address(DestElementPHI, DestAddr.getElementType(),
1085 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
1086
1087 // Emit copy.
1088 CopyGen(DestElementCurrent, SrcElementCurrent);
1089
1090 // Shift the address forward by one element.
1091 llvm::Value *DestElementNext =
1092 Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI,
1093 /*Idx0=*/1, "omp.arraycpy.dest.element");
1094 llvm::Value *SrcElementNext =
1095 Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI,
1096 /*Idx0=*/1, "omp.arraycpy.src.element");
1097 // Check whether we've reached the end.
1098 llvm::Value *Done =
1099 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
1100 Builder.CreateCondBr(Done, DoneBB, BodyBB);
1101 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
1102 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
1103
1104 // Done.
1105 EmitBlock(DoneBB, /*IsFinished=*/true);
1106}
1107
1109 Address SrcAddr, const VarDecl *DestVD,
1110 const VarDecl *SrcVD, const Expr *Copy) {
1111 if (OriginalType->isArrayType()) {
1112 const auto *BO = dyn_cast<BinaryOperator>(Copy);
1113 if (BO && BO->getOpcode() == BO_Assign) {
1114 // Perform simple memcpy for simple copying.
1115 LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
1116 LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
1117 EmitAggregateAssign(Dest, Src, OriginalType);
1118 } else {
1119 // For arrays with complex element types perform element by element
1120 // copying.
1122 DestAddr, SrcAddr, OriginalType,
1123 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
1124 // Working with the single array element, so have to remap
1125 // destination and source variables to corresponding array
1126 // elements.
1128 Remap.addPrivate(DestVD, DestElement);
1129 Remap.addPrivate(SrcVD, SrcElement);
1130 (void)Remap.Privatize();
1132 });
1133 }
1134 } else {
1135 // Remap pseudo source variable to private copy.
1137 Remap.addPrivate(SrcVD, SrcAddr);
1138 Remap.addPrivate(DestVD, DestAddr);
1139 (void)Remap.Privatize();
1140 // Emit copying of the whole variable.
1142 }
1143}
1144
1146 OMPPrivateScope &PrivateScope) {
1147 if (!HaveInsertPoint())
1148 return false;
1150 bool DeviceConstTarget = getLangOpts().OpenMPIsTargetDevice &&
1152 bool FirstprivateIsLastprivate = false;
1153 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
1154 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1155 for (const auto *D : C->varlist())
1156 Lastprivates.try_emplace(
1158 C->getKind());
1159 }
1160 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
1162 getOpenMPCaptureRegions(CaptureRegions, EKind);
1163 // Force emission of the firstprivate copy if the directive does not emit
1164 // outlined function, like omp for, omp simd, omp distribute etc.
1165 bool MustEmitFirstprivateCopy =
1166 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
1167 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
1168 const auto *IRef = C->varlist_begin();
1169 const auto *InitsRef = C->inits().begin();
1170 for (const Expr *IInit : C->private_copies()) {
1171 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1172 bool ThisFirstprivateIsLastprivate =
1173 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
1174 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
1175 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1176 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
1177 !FD->getType()->isReferenceType() &&
1178 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
1179 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
1180 ++IRef;
1181 ++InitsRef;
1182 continue;
1183 }
1184 // Do not emit copy for firstprivate constant variables in target regions,
1185 // captured by reference.
1186 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
1187 FD && FD->getType()->isReferenceType() &&
1188 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
1189 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
1190 ++IRef;
1191 ++InitsRef;
1192 continue;
1193 }
1194 FirstprivateIsLastprivate =
1195 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
1196 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
1197 const auto *VDInit =
1198 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
1199 bool IsRegistered;
1200 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1201 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
1202 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1203 LValue OriginalLVal;
1204 if (!FD) {
1205 // Check if the firstprivate variable is just a constant value.
1207 if (CE && !CE.isReference()) {
1208 // Constant value, no need to create a copy.
1209 ++IRef;
1210 ++InitsRef;
1211 continue;
1212 }
1213 if (CE && CE.isReference()) {
1214 OriginalLVal = CE.getReferenceLValue(*this, &DRE);
1215 } else {
1216 assert(!CE && "Expected non-constant firstprivate.");
1217 OriginalLVal = EmitLValue(&DRE);
1218 }
1219 } else {
1220 OriginalLVal = EmitLValue(&DRE);
1221 }
1222 QualType Type = VD->getType();
1223 if (Type->isArrayType()) {
1224 // Emit VarDecl with copy init for arrays.
1225 // Get the address of the original variable captured in current
1226 // captured region.
1227 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1228 const Expr *Init = VD->getInit();
1230 // Perform simple memcpy.
1231 LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(), Type);
1232 EmitAggregateAssign(Dest, OriginalLVal, Type);
1233 } else {
1235 Emission.getAllocatedAddress(), OriginalLVal.getAddress(), Type,
1236 [this, VDInit, Init](Address DestElement, Address SrcElement) {
1237 // Clean up any temporaries needed by the
1238 // initialization.
1239 RunCleanupsScope InitScope(*this);
1240 // Emit initialization for single element.
1241 setAddrOfLocalVar(VDInit, SrcElement);
1242 EmitAnyExprToMem(Init, DestElement,
1243 Init->getType().getQualifiers(),
1244 /*IsInitializer*/ false);
1245 LocalDeclMap.erase(VDInit);
1246 });
1247 }
1248 EmitAutoVarCleanups(Emission);
1249 IsRegistered =
1250 PrivateScope.addPrivate(OrigVD, Emission.getAllocatedAddress());
1251 } else {
1252 Address OriginalAddr = OriginalLVal.getAddress();
1253 // Emit private VarDecl with copy init.
1254 // Remap temp VDInit variable to the address of the original
1255 // variable (for proper handling of captured global variables).
1256 setAddrOfLocalVar(VDInit, OriginalAddr);
1257 EmitDecl(*VD);
1258 LocalDeclMap.erase(VDInit);
1259 Address VDAddr = GetAddrOfLocalVar(VD);
1260 if (ThisFirstprivateIsLastprivate &&
1261 Lastprivates[OrigVD->getCanonicalDecl()] ==
1262 OMPC_LASTPRIVATE_conditional) {
1263 // Create/init special variable for lastprivate conditionals.
1264 llvm::Value *V =
1265 EmitLoadOfScalar(MakeAddrLValue(VDAddr, (*IRef)->getType(),
1267 (*IRef)->getExprLoc());
1268 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1269 *this, OrigVD);
1270 EmitStoreOfScalar(V, MakeAddrLValue(VDAddr, (*IRef)->getType(),
1272 LocalDeclMap.erase(VD);
1273 setAddrOfLocalVar(VD, VDAddr);
1274 }
1275 IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr);
1276 }
1277 assert(IsRegistered &&
1278 "firstprivate var already registered as private");
1279 // Silence the warning about unused variable.
1280 (void)IsRegistered;
1281 }
1282 ++IRef;
1283 ++InitsRef;
1284 }
1285 }
1286 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
1287}
1288
1290 const OMPExecutableDirective &D,
1291 CodeGenFunction::OMPPrivateScope &PrivateScope) {
1292 if (!HaveInsertPoint())
1293 return;
1294 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
1295 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
1296 auto IRef = C->varlist_begin();
1297 for (const Expr *IInit : C->private_copies()) {
1298 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1299 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
1300 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1301 EmitDecl(*VD);
1302 // Emit private VarDecl with copy init.
1303 bool IsRegistered =
1304 PrivateScope.addPrivate(OrigVD, GetAddrOfLocalVar(VD));
1305 assert(IsRegistered && "private var already registered as private");
1306 // Silence the warning about unused variable.
1307 (void)IsRegistered;
1308 }
1309 ++IRef;
1310 }
1311 }
1312}
1313
1315 if (!HaveInsertPoint())
1316 return false;
1317 // threadprivate_var1 = master_threadprivate_var1;
1318 // operator=(threadprivate_var2, master_threadprivate_var2);
1319 // ...
1320 // __kmpc_barrier(&loc, global_tid);
1321 llvm::DenseSet<const VarDecl *> CopiedVars;
1322 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
1323 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
1324 auto IRef = C->varlist_begin();
1325 auto ISrcRef = C->source_exprs().begin();
1326 auto IDestRef = C->destination_exprs().begin();
1327 for (const Expr *AssignOp : C->assignment_ops()) {
1328 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1329 QualType Type = VD->getType();
1330 if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
1331 // Get the address of the master variable. If we are emitting code with
1332 // TLS support, the address is passed from the master as field in the
1333 // captured declaration.
1334 Address MasterAddr = Address::invalid();
1335 if (getLangOpts().OpenMPUseTLS &&
1336 getContext().getTargetInfo().isTLSSupported()) {
1337 assert(CapturedStmtInfo->lookup(VD) &&
1338 "Copyin threadprivates should have been captured!");
1339 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
1340 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1341 MasterAddr = EmitLValue(&DRE).getAddress();
1342 LocalDeclMap.erase(VD);
1343 } else {
1344 MasterAddr =
1345 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
1346 : CGM.GetAddrOfGlobal(VD),
1347 CGM.getTypes().ConvertTypeForMem(VD->getType()),
1348 getContext().getDeclAlign(VD));
1349 }
1350 // Get the address of the threadprivate variable.
1351 Address PrivateAddr = EmitLValue(*IRef).getAddress();
1352 if (CopiedVars.size() == 1) {
1353 // At first check if current thread is a master thread. If it is, no
1354 // need to copy data.
1355 CopyBegin = createBasicBlock("copyin.not.master");
1356 CopyEnd = createBasicBlock("copyin.not.master.end");
1357 // TODO: Avoid ptrtoint conversion.
1358 auto *MasterAddrInt = Builder.CreatePtrToInt(
1359 MasterAddr.emitRawPointer(*this), CGM.IntPtrTy);
1360 auto *PrivateAddrInt = Builder.CreatePtrToInt(
1361 PrivateAddr.emitRawPointer(*this), CGM.IntPtrTy);
1362 Builder.CreateCondBr(
1363 Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin,
1364 CopyEnd);
1365 EmitBlock(CopyBegin);
1366 }
1367 const auto *SrcVD =
1368 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1369 const auto *DestVD =
1370 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1371 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1372 }
1373 ++IRef;
1374 ++ISrcRef;
1375 ++IDestRef;
1376 }
1377 }
1378 if (CopyEnd) {
1379 // Exit out of copying procedure for non-master thread.
1380 EmitBlock(CopyEnd, /*IsFinished=*/true);
1381 return true;
1382 }
1383 return false;
1384}
1385
1387 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1388 if (!HaveInsertPoint())
1389 return false;
1390 bool HasAtLeastOneLastprivate = false;
1392 llvm::DenseSet<const VarDecl *> SIMDLCVs;
1393 if (isOpenMPSimdDirective(EKind)) {
1394 const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1395 for (const Expr *C : LoopDirective->counters()) {
1396 SIMDLCVs.insert(
1398 }
1399 }
1400 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1401 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1402 HasAtLeastOneLastprivate = true;
1403 if (isOpenMPTaskLoopDirective(EKind) && !getLangOpts().OpenMPSimd)
1404 break;
1405 const auto *IRef = C->varlist_begin();
1406 const auto *IDestRef = C->destination_exprs().begin();
1407 for (const Expr *IInit : C->private_copies()) {
1408 // Keep the address of the original variable for future update at the end
1409 // of the loop.
1410 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1411 // Taskloops do not require additional initialization, it is done in
1412 // runtime support library.
1413 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1414 const auto *DestVD =
1415 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1416 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1417 /*RefersToEnclosingVariableOrCapture=*/
1418 CapturedStmtInfo->lookup(OrigVD) != nullptr,
1419 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1420 PrivateScope.addPrivate(DestVD, EmitLValue(&DRE).getAddress());
1421 // Check if the variable is also a firstprivate: in this case IInit is
1422 // not generated. Initialization of this variable will happen in codegen
1423 // for 'firstprivate' clause.
1424 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
1425 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1426 Address VDAddr = Address::invalid();
1427 if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1428 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1429 *this, OrigVD);
1430 setAddrOfLocalVar(VD, VDAddr);
1431 } else {
1432 // Emit private VarDecl with copy init.
1433 EmitDecl(*VD);
1434 VDAddr = GetAddrOfLocalVar(VD);
1435 }
1436 bool IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr);
1437 assert(IsRegistered &&
1438 "lastprivate var already registered as private");
1439 (void)IsRegistered;
1440 }
1441 }
1442 ++IRef;
1443 ++IDestRef;
1444 }
1445 }
1446 return HasAtLeastOneLastprivate;
1447}
1448
1450 const OMPExecutableDirective &D, bool NoFinals,
1451 llvm::Value *IsLastIterCond) {
1452 if (!HaveInsertPoint())
1453 return;
1454 // Emit following code:
1455 // if (<IsLastIterCond>) {
1456 // orig_var1 = private_orig_var1;
1457 // ...
1458 // orig_varn = private_orig_varn;
1459 // }
1460 llvm::BasicBlock *ThenBB = nullptr;
1461 llvm::BasicBlock *DoneBB = nullptr;
1462 if (IsLastIterCond) {
1463 // Emit implicit barrier if at least one lastprivate conditional is found
1464 // and this is not a simd mode.
1465 if (!getLangOpts().OpenMPSimd &&
1466 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1467 [](const OMPLastprivateClause *C) {
1468 return C->getKind() == OMPC_LASTPRIVATE_conditional;
1469 })) {
1470 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1471 OMPD_unknown,
1472 /*EmitChecks=*/false,
1473 /*ForceSimpleCall=*/true);
1474 }
1475 ThenBB = createBasicBlock(".omp.lastprivate.then");
1476 DoneBB = createBasicBlock(".omp.lastprivate.done");
1477 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1478 EmitBlock(ThenBB);
1479 }
1480 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1481 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1482 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1483 auto IC = LoopDirective->counters().begin();
1484 for (const Expr *F : LoopDirective->finals()) {
1485 const auto *D =
1486 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1487 if (NoFinals)
1488 AlreadyEmittedVars.insert(D);
1489 else
1490 LoopCountersAndUpdates[D] = F;
1491 ++IC;
1492 }
1493 }
1494 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1495 auto IRef = C->varlist_begin();
1496 auto ISrcRef = C->source_exprs().begin();
1497 auto IDestRef = C->destination_exprs().begin();
1498 for (const Expr *AssignOp : C->assignment_ops()) {
1499 const auto *PrivateVD =
1500 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1501 QualType Type = PrivateVD->getType();
1502 const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1503 if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1504 // If lastprivate variable is a loop control variable for loop-based
1505 // directive, update its value before copyin back to original
1506 // variable.
1507 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1508 EmitIgnoredExpr(FinalExpr);
1509 const auto *SrcVD =
1510 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1511 const auto *DestVD =
1512 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1513 // Get the address of the private variable.
1514 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1515 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1516 PrivateAddr = Address(
1517 Builder.CreateLoad(PrivateAddr),
1518 CGM.getTypes().ConvertTypeForMem(RefTy->getPointeeType()),
1519 CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1520 // Store the last value to the private copy in the last iteration.
1521 if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1522 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1523 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1524 (*IRef)->getExprLoc());
1525 // Get the address of the original variable.
1526 Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1527 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1528 }
1529 ++IRef;
1530 ++ISrcRef;
1531 ++IDestRef;
1532 }
1533 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1534 EmitIgnoredExpr(PostUpdate);
1535 }
1536 if (IsLastIterCond)
1537 EmitBlock(DoneBB, /*IsFinished=*/true);
1538}
1539
1541 const OMPExecutableDirective &D,
1542 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1543 if (!HaveInsertPoint())
1544 return;
1547 SmallVector<const Expr *, 4> ReductionOps;
1553 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1554 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1555 continue;
1556 Shareds.append(C->varlist_begin(), C->varlist_end());
1557 Privates.append(C->privates().begin(), C->privates().end());
1558 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1559 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1560 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1561 if (C->getModifier() == OMPC_REDUCTION_task) {
1562 Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1563 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1564 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1565 Data.ReductionOps.append(C->reduction_ops().begin(),
1566 C->reduction_ops().end());
1567 TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1568 TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1569 }
1570 }
1571 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1572 unsigned Count = 0;
1573 auto *ILHS = LHSs.begin();
1574 auto *IRHS = RHSs.begin();
1575 auto *IPriv = Privates.begin();
1576 for (const Expr *IRef : Shareds) {
1577 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1578 // Emit private VarDecl with reduction init.
1579 RedCG.emitSharedOrigLValue(*this, Count);
1580 RedCG.emitAggregateType(*this, Count);
1581 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1582 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1583 RedCG.getSharedLValue(Count).getAddress(),
1584 [&Emission](CodeGenFunction &CGF) {
1585 CGF.EmitAutoVarInit(Emission);
1586 return true;
1587 });
1588 EmitAutoVarCleanups(Emission);
1589 Address BaseAddr = RedCG.adjustPrivateAddress(
1590 *this, Count, Emission.getAllocatedAddress());
1591 bool IsRegistered =
1592 PrivateScope.addPrivate(RedCG.getBaseDecl(Count), BaseAddr);
1593 assert(IsRegistered && "private var already registered as private");
1594 // Silence the warning about unused variable.
1595 (void)IsRegistered;
1596
1597 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1598 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1599 QualType Type = PrivateVD->getType();
1600 bool isaOMPArraySectionExpr = isa<ArraySectionExpr>(IRef);
1601 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1602 // Store the address of the original variable associated with the LHS
1603 // implicit variable.
1604 PrivateScope.addPrivate(LHSVD, RedCG.getSharedLValue(Count).getAddress());
1605 PrivateScope.addPrivate(RHSVD, GetAddrOfLocalVar(PrivateVD));
1606 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1608 // Store the address of the original variable associated with the LHS
1609 // implicit variable.
1610 PrivateScope.addPrivate(LHSVD, RedCG.getSharedLValue(Count).getAddress());
1611 PrivateScope.addPrivate(RHSVD,
1612 GetAddrOfLocalVar(PrivateVD).withElementType(
1613 ConvertTypeForMem(RHSVD->getType())));
1614 } else {
1615 QualType Type = PrivateVD->getType();
1616 bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1617 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
1618 // Store the address of the original variable associated with the LHS
1619 // implicit variable.
1620 if (IsArray) {
1621 OriginalAddr =
1622 OriginalAddr.withElementType(ConvertTypeForMem(LHSVD->getType()));
1623 }
1624 PrivateScope.addPrivate(LHSVD, OriginalAddr);
1625 PrivateScope.addPrivate(
1626 RHSVD, IsArray ? GetAddrOfLocalVar(PrivateVD).withElementType(
1627 ConvertTypeForMem(RHSVD->getType()))
1628 : GetAddrOfLocalVar(PrivateVD));
1629 }
1630 ++ILHS;
1631 ++IRHS;
1632 ++IPriv;
1633 ++Count;
1634 }
1635 if (!Data.ReductionVars.empty()) {
1637 Data.IsReductionWithTaskMod = true;
1638 Data.IsWorksharingReduction = isOpenMPWorksharingDirective(EKind);
1639 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1640 *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1641 const Expr *TaskRedRef = nullptr;
1642 switch (EKind) {
1643 case OMPD_parallel:
1644 TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1645 break;
1646 case OMPD_for:
1647 TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1648 break;
1649 case OMPD_sections:
1650 TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1651 break;
1652 case OMPD_parallel_for:
1653 TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1654 break;
1655 case OMPD_parallel_master:
1656 TaskRedRef =
1657 cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1658 break;
1659 case OMPD_parallel_sections:
1660 TaskRedRef =
1661 cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1662 break;
1663 case OMPD_target_parallel:
1664 TaskRedRef =
1665 cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1666 break;
1667 case OMPD_target_parallel_for:
1668 TaskRedRef =
1669 cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1670 break;
1671 case OMPD_distribute_parallel_for:
1672 TaskRedRef =
1673 cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1674 break;
1675 case OMPD_teams_distribute_parallel_for:
1677 .getTaskReductionRefExpr();
1678 break;
1679 case OMPD_target_teams_distribute_parallel_for:
1681 .getTaskReductionRefExpr();
1682 break;
1683 case OMPD_simd:
1684 case OMPD_for_simd:
1685 case OMPD_section:
1686 case OMPD_single:
1687 case OMPD_master:
1688 case OMPD_critical:
1689 case OMPD_parallel_for_simd:
1690 case OMPD_task:
1691 case OMPD_taskyield:
1692 case OMPD_error:
1693 case OMPD_barrier:
1694 case OMPD_taskwait:
1695 case OMPD_taskgroup:
1696 case OMPD_flush:
1697 case OMPD_depobj:
1698 case OMPD_scan:
1699 case OMPD_ordered:
1700 case OMPD_atomic:
1701 case OMPD_teams:
1702 case OMPD_target:
1703 case OMPD_cancellation_point:
1704 case OMPD_cancel:
1705 case OMPD_target_data:
1706 case OMPD_target_enter_data:
1707 case OMPD_target_exit_data:
1708 case OMPD_taskloop:
1709 case OMPD_taskloop_simd:
1710 case OMPD_master_taskloop:
1711 case OMPD_master_taskloop_simd:
1712 case OMPD_parallel_master_taskloop:
1713 case OMPD_parallel_master_taskloop_simd:
1714 case OMPD_distribute:
1715 case OMPD_target_update:
1716 case OMPD_distribute_parallel_for_simd:
1717 case OMPD_distribute_simd:
1718 case OMPD_target_parallel_for_simd:
1719 case OMPD_target_simd:
1720 case OMPD_teams_distribute:
1721 case OMPD_teams_distribute_simd:
1722 case OMPD_teams_distribute_parallel_for_simd:
1723 case OMPD_target_teams:
1724 case OMPD_target_teams_distribute:
1725 case OMPD_target_teams_distribute_parallel_for_simd:
1726 case OMPD_target_teams_distribute_simd:
1727 case OMPD_declare_target:
1728 case OMPD_end_declare_target:
1729 case OMPD_threadprivate:
1730 case OMPD_allocate:
1731 case OMPD_declare_reduction:
1732 case OMPD_declare_mapper:
1733 case OMPD_declare_simd:
1734 case OMPD_requires:
1735 case OMPD_declare_variant:
1736 case OMPD_begin_declare_variant:
1737 case OMPD_end_declare_variant:
1738 case OMPD_unknown:
1739 default:
1740 llvm_unreachable("Unexpected directive with task reductions.");
1741 }
1742
1743 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1744 EmitVarDecl(*VD);
1745 EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1746 /*Volatile=*/false, TaskRedRef->getType());
1747 }
1748}
1749
1751 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1752 if (!HaveInsertPoint())
1753 return;
1758 llvm::SmallVector<bool, 8> IsPrivateVarReduction;
1759 bool HasAtLeastOneReduction = false;
1760 bool IsReductionWithTaskMod = false;
1761 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1762 // Do not emit for inscan reductions.
1763 if (C->getModifier() == OMPC_REDUCTION_inscan)
1764 continue;
1765 HasAtLeastOneReduction = true;
1766 Privates.append(C->privates().begin(), C->privates().end());
1767 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1768 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1769 IsPrivateVarReduction.append(C->private_var_reduction_flags().begin(),
1770 C->private_var_reduction_flags().end());
1771 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1772 IsReductionWithTaskMod =
1773 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1774 }
1775 if (HasAtLeastOneReduction) {
1777 if (IsReductionWithTaskMod) {
1778 CGM.getOpenMPRuntime().emitTaskReductionFini(
1779 *this, D.getBeginLoc(), isOpenMPWorksharingDirective(EKind));
1780 }
1781 bool TeamsLoopCanBeParallel = false;
1782 if (auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(&D))
1783 TeamsLoopCanBeParallel = TTLD->canBeParallelFor();
1784 bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1786 TeamsLoopCanBeParallel || ReductionKind == OMPD_simd;
1787 bool SimpleReduction = ReductionKind == OMPD_simd;
1788 // Emit nowait reduction if nowait clause is present or directive is a
1789 // parallel directive (it always has implicit barrier).
1790 CGM.getOpenMPRuntime().emitReduction(
1791 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1792 {WithNowait, SimpleReduction, IsPrivateVarReduction, ReductionKind});
1793 }
1794}
1795
1798 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1799 if (!CGF.HaveInsertPoint())
1800 return;
1801 llvm::BasicBlock *DoneBB = nullptr;
1802 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1803 if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1804 if (!DoneBB) {
1805 if (llvm::Value *Cond = CondGen(CGF)) {
1806 // If the first post-update expression is found, emit conditional
1807 // block if it was requested.
1808 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1809 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1810 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1811 CGF.EmitBlock(ThenBB);
1812 }
1813 }
1814 CGF.EmitIgnoredExpr(PostUpdate);
1815 }
1816 }
1817 if (DoneBB)
1818 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1819}
1820
1821namespace {
1822/// Codegen lambda for appending distribute lower and upper bounds to outlined
1823/// parallel function. This is necessary for combined constructs such as
1824/// 'distribute parallel for'
1825typedef llvm::function_ref<void(CodeGenFunction &,
1826 const OMPExecutableDirective &,
1827 llvm::SmallVectorImpl<llvm::Value *> &)>
1828 CodeGenBoundParametersTy;
1829} // anonymous namespace
1830
1831static void
1833 const OMPExecutableDirective &S) {
1834 if (CGF.getLangOpts().OpenMP < 50)
1835 return;
1836 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1837 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1838 for (const Expr *Ref : C->varlist()) {
1839 if (!Ref->getType()->isScalarType())
1840 continue;
1841 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1842 if (!DRE)
1843 continue;
1844 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1846 }
1847 }
1848 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1849 for (const Expr *Ref : C->varlist()) {
1850 if (!Ref->getType()->isScalarType())
1851 continue;
1852 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1853 if (!DRE)
1854 continue;
1855 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1857 }
1858 }
1859 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1860 for (const Expr *Ref : C->varlist()) {
1861 if (!Ref->getType()->isScalarType())
1862 continue;
1863 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1864 if (!DRE)
1865 continue;
1866 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1868 }
1869 }
1870 // Privates should ne analyzed since they are not captured at all.
1871 // Task reductions may be skipped - tasks are ignored.
1872 // Firstprivates do not return value but may be passed by reference - no need
1873 // to check for updated lastprivate conditional.
1874 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1875 for (const Expr *Ref : C->varlist()) {
1876 if (!Ref->getType()->isScalarType())
1877 continue;
1878 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1879 if (!DRE)
1880 continue;
1881 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1882 }
1883 }
1885 CGF, S, PrivateDecls);
1886}
1887
1890 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1891 const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1892 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1893 llvm::Value *NumThreads = nullptr;
1895 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is as
1896 // if sev-level is fatal."
1897 OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal;
1898 clang::Expr *Message = nullptr;
1899 SourceLocation SeverityLoc = SourceLocation();
1900 SourceLocation MessageLoc = SourceLocation();
1901
1902 llvm::Function *OutlinedFn =
1904 CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind,
1905 CodeGen);
1906
1907 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1908 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1909 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1910 /*IgnoreResultAssign=*/true);
1911 Modifier = NumThreadsClause->getModifier();
1912 if (const auto *MessageClause = S.getSingleClause<OMPMessageClause>()) {
1913 Message = MessageClause->getMessageString();
1914 MessageLoc = MessageClause->getBeginLoc();
1915 }
1916 if (const auto *SeverityClause = S.getSingleClause<OMPSeverityClause>()) {
1917 Severity = SeverityClause->getSeverityKind();
1918 SeverityLoc = SeverityClause->getBeginLoc();
1919 }
1921 CGF, NumThreads, NumThreadsClause->getBeginLoc(), Modifier, Severity,
1922 SeverityLoc, Message, MessageLoc);
1923 }
1924 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1925 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1927 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1928 }
1929 const Expr *IfCond = nullptr;
1930 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1931 if (C->getNameModifier() == OMPD_unknown ||
1932 C->getNameModifier() == OMPD_parallel) {
1933 IfCond = C->getCondition();
1934 break;
1935 }
1936 }
1937
1938 OMPParallelScope Scope(CGF, S);
1940 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1941 // lower and upper bounds with the pragma 'for' chunking mechanism.
1942 // The following lambda takes care of appending the lower and upper bound
1943 // parameters when necessary
1944 CodeGenBoundParameters(CGF, S, CapturedVars);
1945 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1946 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1947 CapturedVars, IfCond, NumThreads,
1948 Modifier, Severity, Message);
1949}
1950
1951static bool isAllocatableDecl(const VarDecl *VD) {
1952 const VarDecl *CVD = VD->getCanonicalDecl();
1953 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1954 return false;
1955 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1956 // Use the default allocation.
1957 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1958 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1959 !AA->getAllocator());
1960}
1961
1965
1967 const OMPExecutableDirective &S) {
1968 bool Copyins = CGF.EmitOMPCopyinClause(S);
1969 if (Copyins) {
1970 // Emit implicit barrier to synchronize threads and avoid data races on
1971 // propagation master's thread values of threadprivate variables to local
1972 // instances of that variables of all other implicit threads.
1974 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1975 /*ForceSimpleCall=*/true);
1976 }
1977}
1978
1980 CodeGenFunction &CGF, const VarDecl *VD) {
1981 CodeGenModule &CGM = CGF.CGM;
1982 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1983
1984 if (!VD)
1985 return Address::invalid();
1986 const VarDecl *CVD = VD->getCanonicalDecl();
1987 if (!isAllocatableDecl(CVD))
1988 return Address::invalid();
1989 llvm::Value *Size;
1990 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1991 if (CVD->getType()->isVariablyModifiedType()) {
1992 Size = CGF.getTypeSize(CVD->getType());
1993 // Align the size: ((size + align - 1) / align) * align
1994 Size = CGF.Builder.CreateNUWAdd(
1995 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1996 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1997 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1998 } else {
1999 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
2000 Size = CGM.getSize(Sz.alignTo(Align));
2001 }
2002
2003 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2004 assert(AA->getAllocator() &&
2005 "Expected allocator expression for non-default allocator.");
2006 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
2007 // According to the standard, the original allocator type is a enum (integer).
2008 // Convert to pointer type, if required.
2009 if (Allocator->getType()->isIntegerTy())
2010 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
2011 else if (Allocator->getType()->isPointerTy())
2012 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
2013 CGM.VoidPtrTy);
2014
2015 llvm::Value *Addr = OMPBuilder.createOMPAlloc(
2016 CGF.Builder, Size, Allocator,
2017 getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
2018 llvm::CallInst *FreeCI =
2019 OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
2020
2021 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
2023 Addr,
2024 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
2025 getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
2026 return Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
2027}
2028
2030 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
2031 SourceLocation Loc) {
2032 CodeGenModule &CGM = CGF.CGM;
2033 if (CGM.getLangOpts().OpenMPUseTLS &&
2034 CGM.getContext().getTargetInfo().isTLSSupported())
2035 return VDAddr;
2036
2037 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2038
2039 llvm::Type *VarTy = VDAddr.getElementType();
2040 llvm::Value *Data =
2041 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy);
2042 llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
2043 std::string Suffix = getNameWithSeparators({"cache", ""});
2044 llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
2045
2046 llvm::CallInst *ThreadPrivateCacheCall =
2047 OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
2048
2049 return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment());
2050}
2051
2053 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
2054 SmallString<128> Buffer;
2055 llvm::raw_svector_ostream OS(Buffer);
2056 StringRef Sep = FirstSeparator;
2057 for (StringRef Part : Parts) {
2058 OS << Sep << Part;
2059 Sep = Separator;
2060 }
2061 return OS.str().str();
2062}
2063
2065 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
2066 InsertPointTy CodeGenIP, Twine RegionName) {
2068 Builder.restoreIP(CodeGenIP);
2069 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
2070 "." + RegionName + ".after");
2071
2072 {
2073 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
2074 CGF.EmitStmt(RegionBodyStmt);
2075 }
2076
2077 if (Builder.saveIP().isSet())
2078 Builder.CreateBr(FiniBB);
2079}
2080
2082 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
2083 InsertPointTy CodeGenIP, Twine RegionName) {
2085 Builder.restoreIP(CodeGenIP);
2086 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
2087 "." + RegionName + ".after");
2088
2089 {
2090 OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
2091 CGF.EmitStmt(RegionBodyStmt);
2092 }
2093
2094 if (Builder.saveIP().isSet())
2095 Builder.CreateBr(FiniBB);
2096}
2097
2098void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
2099 if (CGM.getLangOpts().OpenMPIRBuilder) {
2100 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2101 // Check if we have any if clause associated with the directive.
2102 llvm::Value *IfCond = nullptr;
2103 if (const auto *C = S.getSingleClause<OMPIfClause>())
2104 IfCond = EmitScalarExpr(C->getCondition(),
2105 /*IgnoreResultAssign=*/true);
2106
2107 llvm::Value *NumThreads = nullptr;
2108 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
2109 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
2110 /*IgnoreResultAssign=*/true);
2111
2112 ProcBindKind ProcBind = OMP_PROC_BIND_default;
2113 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
2114 ProcBind = ProcBindClause->getProcBindKind();
2115
2116 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2117
2118 // The cleanup callback that finalizes all variables at the given location,
2119 // thus calls destructors etc.
2120 auto FiniCB = [this](InsertPointTy IP) {
2122 return llvm::Error::success();
2123 };
2124
2125 // Privatization callback that performs appropriate action for
2126 // shared/private/firstprivate/lastprivate/copyin/... variables.
2127 //
2128 // TODO: This defaults to shared right now.
2129 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
2130 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
2131 // The next line is appropriate only for variables (Val) with the
2132 // data-sharing attribute "shared".
2133 ReplVal = &Val;
2134
2135 return CodeGenIP;
2136 };
2137
2138 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
2139 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
2140
2141 auto BodyGenCB = [&, this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
2142 ArrayRef<llvm::BasicBlock *> DeallocBlocks) {
2144 *this, ParallelRegionBodyStmt, AllocIP, CodeGenIP, "parallel");
2145 return llvm::Error::success();
2146 };
2147
2148 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
2149 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
2150 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
2151 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
2152 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2153 cantFail(OMPBuilder.createParallel(
2154 Builder, AllocaIP, /*DeallocBlocks=*/{}, BodyGenCB, PrivCB, FiniCB,
2155 IfCond, NumThreads, ProcBind, S.hasCancel()));
2156 Builder.restoreIP(AfterIP);
2157 return;
2158 }
2159
2160 // Emit parallel region as a standalone region.
2161 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2162 Action.Enter(CGF);
2163 OMPPrivateScope PrivateScope(CGF);
2164 emitOMPCopyinClause(CGF, S);
2165 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
2166 CGF.EmitOMPPrivateClause(S, PrivateScope);
2167 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
2168 (void)PrivateScope.Privatize();
2169 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
2170 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2171 };
2172 {
2173 auto LPCRegion =
2175 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
2178 [](CodeGenFunction &) { return nullptr; });
2179 }
2180 // Check for outer lastprivate conditional update.
2182}
2183
2187
2188namespace {
2189/// RAII to handle scopes for loop transformation directives.
2190class OMPTransformDirectiveScopeRAII {
2191 OMPLoopScope *Scope = nullptr;
2193 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
2194
2195 OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) =
2196 delete;
2197 OMPTransformDirectiveScopeRAII &
2198 operator=(const OMPTransformDirectiveScopeRAII &) = delete;
2199
2200public:
2201 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
2202 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) {
2203 Scope = new OMPLoopScope(CGF, *Dir);
2205 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
2206 } else if (const auto *Dir =
2207 dyn_cast<OMPCanonicalLoopSequenceTransformationDirective>(
2208 S)) {
2209 // For simplicity we reuse the loop scope similarly to what we do with
2210 // OMPCanonicalLoopNestTransformationDirective do by being a subclass
2211 // of OMPLoopBasedDirective.
2212 Scope = new OMPLoopScope(CGF, *Dir);
2214 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
2215 }
2216 }
2217 ~OMPTransformDirectiveScopeRAII() {
2218 if (!Scope)
2219 return;
2220 delete CapInfoRAII;
2221 delete CGSI;
2222 delete Scope;
2223 }
2224};
2225} // namespace
2226
2227static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
2228 int MaxLevel, int Level = 0) {
2229 assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
2230 const Stmt *SimplifiedS = S->IgnoreContainers();
2231 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
2232 PrettyStackTraceLoc CrashInfo(
2233 CGF.getContext().getSourceManager(), CS->getLBracLoc(),
2234 "LLVM IR generation of compound statement ('{}')");
2235
2236 // Keep track of the current cleanup stack depth, including debug scopes.
2238 for (const Stmt *CurStmt : CS->body())
2239 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
2240 return;
2241 }
2242 if (SimplifiedS == NextLoop) {
2243 if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS))
2244 SimplifiedS = Dir->getTransformedStmt();
2245 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
2246 SimplifiedS = CanonLoop->getLoopStmt();
2247 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
2248 S = For->getBody();
2249 } else {
2250 assert(isa<CXXForRangeStmt>(SimplifiedS) &&
2251 "Expected canonical for loop or range-based for loop.");
2252 const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
2253 CGF.EmitStmt(CXXFor->getLoopVarStmt());
2254 S = CXXFor->getBody();
2255 }
2256 if (Level + 1 < MaxLevel) {
2257 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
2258 S, /*TryImperfectlyNestedLoops=*/true);
2259 emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
2260 return;
2261 }
2262 }
2263 CGF.EmitStmt(S);
2264}
2265
2268 RunCleanupsScope BodyScope(*this);
2269 // Update counters values on current iteration.
2270 for (const Expr *UE : D.updates())
2271 EmitIgnoredExpr(UE);
2272 // Update the linear variables.
2273 // In distribute directives only loop counters may be marked as linear, no
2274 // need to generate the code for them.
2276 if (!isOpenMPDistributeDirective(EKind)) {
2277 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2278 for (const Expr *UE : C->updates())
2279 EmitIgnoredExpr(UE);
2280 }
2281 }
2282
2283 // On a continue in the body, jump to the end.
2284 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
2285 BreakContinueStack.push_back(BreakContinue(D, LoopExit, Continue));
2286 for (const Expr *E : D.finals_conditions()) {
2287 if (!E)
2288 continue;
2289 // Check that loop counter in non-rectangular nest fits into the iteration
2290 // space.
2291 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
2292 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
2293 getProfileCount(D.getBody()));
2294 EmitBlock(NextBB);
2295 }
2296
2297 OMPPrivateScope InscanScope(*this);
2298 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
2299 bool IsInscanRegion = InscanScope.Privatize();
2300 if (IsInscanRegion) {
2301 // Need to remember the block before and after scan directive
2302 // to dispatch them correctly depending on the clause used in
2303 // this directive, inclusive or exclusive. For inclusive scan the natural
2304 // order of the blocks is used, for exclusive clause the blocks must be
2305 // executed in reverse order.
2306 OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
2307 OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
2308 // No need to allocate inscan exit block, in simd mode it is selected in the
2309 // codegen for the scan directive.
2310 if (EKind != OMPD_simd && !getLangOpts().OpenMPSimd)
2311 OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
2312 OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
2315 }
2316
2317 // Emit loop variables for C++ range loops.
2318 const Stmt *Body =
2319 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
2320 // Emit loop body.
2321 emitBody(*this, Body,
2322 OMPLoopBasedDirective::tryToFindNextInnerLoop(
2323 Body, /*TryImperfectlyNestedLoops=*/true),
2324 D.getLoopsNumber());
2325
2326 // Jump to the dispatcher at the end of the loop body.
2327 if (IsInscanRegion)
2329
2330 // The end (updates/cleanups).
2331 EmitBlock(Continue.getBlock());
2332 BreakContinueStack.pop_back();
2333}
2334
2335using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
2336
2337/// Emit a captured statement and return the function as well as its captured
2338/// closure context.
2340 const CapturedStmt *S) {
2341 LValue CapStruct = ParentCGF.InitCapturedStruct(*S);
2342 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
2343 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
2344 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S);
2345 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
2346 llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S);
2347
2348 return {F, CapStruct.getPointer(ParentCGF)};
2349}
2350
2351/// Emit a call to a previously captured closure.
2352static llvm::CallInst *
2355 // Append the closure context to the argument.
2356 SmallVector<llvm::Value *> EffectiveArgs;
2357 EffectiveArgs.reserve(Args.size() + 1);
2358 llvm::append_range(EffectiveArgs, Args);
2359 EffectiveArgs.push_back(Cap.second);
2360
2361 return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs);
2362}
2363
2364llvm::CanonicalLoopInfo *
2366 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
2367
2368 // The caller is processing the loop-associated directive processing the \p
2369 // Depth loops nested in \p S. Put the previous pending loop-associated
2370 // directive to the stack. If the current loop-associated directive is a loop
2371 // transformation directive, it will push its generated loops onto the stack
2372 // such that together with the loops left here they form the combined loop
2373 // nest for the parent loop-associated directive.
2374 int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
2375 ExpectedOMPLoopDepth = Depth;
2376
2377 EmitStmt(S);
2378 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
2379
2380 // The last added loop is the outermost one.
2381 llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
2382
2383 // Pop the \p Depth loops requested by the call from that stack and restore
2384 // the previous context.
2385 OMPLoopNestStack.pop_back_n(Depth);
2386 ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
2387
2388 return Result;
2389}
2390
2391void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
2392 const Stmt *SyntacticalLoop = S->getLoopStmt();
2393 if (!getLangOpts().OpenMPIRBuilder) {
2394 // Ignore if OpenMPIRBuilder is not enabled.
2395 EmitStmt(SyntacticalLoop);
2396 return;
2397 }
2398
2399 LexicalScope ForScope(*this, S->getSourceRange());
2400
2401 // Emit init statements. The Distance/LoopVar funcs may reference variable
2402 // declarations they contain.
2403 const Stmt *BodyStmt;
2404 if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) {
2405 if (const Stmt *InitStmt = For->getInit())
2406 EmitStmt(InitStmt);
2407 BodyStmt = For->getBody();
2408 } else if (const auto *RangeFor =
2409 dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) {
2410 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
2411 EmitStmt(RangeStmt);
2412 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
2413 EmitStmt(BeginStmt);
2414 if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
2415 EmitStmt(EndStmt);
2416 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
2417 EmitStmt(LoopVarStmt);
2418 BodyStmt = RangeFor->getBody();
2419 } else
2420 llvm_unreachable("Expected for-stmt or range-based for-stmt");
2421
2422 // Emit closure for later use. By-value captures will be captured here.
2423 const CapturedStmt *DistanceFunc = S->getDistanceFunc();
2424 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
2425 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
2426 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
2427
2428 // Call the distance function to get the number of iterations of the loop to
2429 // come.
2430 QualType LogicalTy = DistanceFunc->getCapturedDecl()
2431 ->getParam(0)
2432 ->getType()
2434 RawAddress CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
2435 emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
2436 llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count");
2437
2438 // Emit the loop structure.
2439 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2440 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2441 llvm::Value *IndVar) {
2442 Builder.restoreIP(CodeGenIP);
2443
2444 // Emit the loop body: Convert the logical iteration number to the loop
2445 // variable and emit the body.
2446 const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2447 LValue LCVal = EmitLValue(LoopVarRef);
2448 Address LoopVarAddress = LCVal.getAddress();
2449 emitCapturedStmtCall(*this, LoopVarClosure,
2450 {LoopVarAddress.emitRawPointer(*this), IndVar});
2451
2452 RunCleanupsScope BodyScope(*this);
2453 EmitStmt(BodyStmt);
2454 return llvm::Error::success();
2455 };
2456
2457 llvm::CanonicalLoopInfo *CL =
2458 cantFail(OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal));
2459
2460 // Finish up the loop.
2461 Builder.restoreIP(CL->getAfterIP());
2462 ForScope.ForceCleanup();
2463
2464 // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2465 OMPLoopNestStack.push_back(CL);
2466}
2467
2469 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2470 const Expr *IncExpr,
2471 const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2472 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2473 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
2474
2475 // Start the loop with a block that tests the condition.
2476 auto CondBlock = createBasicBlock("omp.inner.for.cond");
2477 EmitBlock(CondBlock);
2478 const SourceRange R = S.getSourceRange();
2479
2480 // If attributes are attached, push to the basic block with them.
2481 const auto &OMPED = cast<OMPExecutableDirective>(S);
2482 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2483 const Stmt *SS = ICS->getCapturedStmt();
2484 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
2485 OMPLoopNestStack.clear();
2486 if (AS)
2487 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
2488 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
2489 SourceLocToDebugLoc(R.getEnd()));
2490 else
2491 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2492 SourceLocToDebugLoc(R.getEnd()));
2493
2494 // If there are any cleanups between here and the loop-exit scope,
2495 // create a block to stage a loop exit along.
2496 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2497 if (RequiresCleanup)
2498 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
2499
2500 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
2501
2502 // Emit condition.
2503 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
2504 if (ExitBlock != LoopExit.getBlock()) {
2505 EmitBlock(ExitBlock);
2507 }
2508
2509 EmitBlock(LoopBody);
2511
2512 // Create a block for the increment.
2513 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
2514 BreakContinueStack.push_back(BreakContinue(S, LoopExit, Continue));
2515
2516 BodyGen(*this);
2517
2518 // Emit "IV = IV + 1" and a back-edge to the condition block.
2519 EmitBlock(Continue.getBlock());
2520 EmitIgnoredExpr(IncExpr);
2521 PostIncGen(*this);
2522 BreakContinueStack.pop_back();
2523 EmitBranch(CondBlock);
2524 LoopStack.pop();
2525 // Emit the fall-through block.
2526 EmitBlock(LoopExit.getBlock());
2527}
2528
2530 if (!HaveInsertPoint())
2531 return false;
2532 // Emit inits for the linear variables.
2533 bool HasLinears = false;
2534 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2535 for (const Expr *Init : C->inits()) {
2536 HasLinears = true;
2537 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
2538 if (const auto *Ref =
2539 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
2540 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
2541 const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
2542 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2543 CapturedStmtInfo->lookup(OrigVD) != nullptr,
2544 VD->getInit()->getType(), VK_LValue,
2545 VD->getInit()->getExprLoc());
2547 &DRE, VD,
2548 MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
2549 /*capturedByInit=*/false);
2550 EmitAutoVarCleanups(Emission);
2551 } else {
2552 EmitVarDecl(*VD);
2553 }
2554 }
2555 // Emit the linear steps for the linear clauses.
2556 // If a step is not constant, it is pre-calculated before the loop.
2557 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
2558 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
2559 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
2560 // Emit calculation of the linear step.
2561 EmitIgnoredExpr(CS);
2562 }
2563 }
2564 return HasLinears;
2565}
2566
2568 const OMPLoopDirective &D,
2569 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2570 if (!HaveInsertPoint())
2571 return;
2572 llvm::BasicBlock *DoneBB = nullptr;
2573 // Emit the final values of the linear variables.
2574 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2575 auto IC = C->varlist_begin();
2576 for (const Expr *F : C->finals()) {
2577 if (!DoneBB) {
2578 if (llvm::Value *Cond = CondGen(*this)) {
2579 // If the first post-update expression is found, emit conditional
2580 // block if it was requested.
2581 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
2582 DoneBB = createBasicBlock(".omp.linear.pu.done");
2583 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2584 EmitBlock(ThenBB);
2585 }
2586 }
2587 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
2588 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2589 CapturedStmtInfo->lookup(OrigVD) != nullptr,
2590 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2591 Address OrigAddr = EmitLValue(&DRE).getAddress();
2592 CodeGenFunction::OMPPrivateScope VarScope(*this);
2593 VarScope.addPrivate(OrigVD, OrigAddr);
2594 (void)VarScope.Privatize();
2595 EmitIgnoredExpr(F);
2596 ++IC;
2597 }
2598 if (const Expr *PostUpdate = C->getPostUpdateExpr())
2599 EmitIgnoredExpr(PostUpdate);
2600 }
2601 if (DoneBB)
2602 EmitBlock(DoneBB, /*IsFinished=*/true);
2603}
2604
2606 const OMPExecutableDirective &D) {
2607 if (!CGF.HaveInsertPoint())
2608 return;
2609 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2610 llvm::APInt ClauseAlignment(64, 0);
2611 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2612 auto *AlignmentCI =
2613 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2614 ClauseAlignment = AlignmentCI->getValue();
2615 }
2616 for (const Expr *E : Clause->varlist()) {
2617 llvm::APInt Alignment(ClauseAlignment);
2618 if (Alignment == 0) {
2619 // OpenMP [2.8.1, Description]
2620 // If no optional parameter is specified, implementation-defined default
2621 // alignments for SIMD instructions on the target platforms are assumed.
2622 Alignment =
2623 CGF.getContext()
2625 E->getType()->getPointeeType()))
2626 .getQuantity();
2627 }
2628 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2629 "alignment is not power of 2");
2630 if (Alignment != 0) {
2631 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2633 PtrValue, E, /*No second loc needed*/ SourceLocation(),
2634 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2635 }
2636 }
2637 }
2638}
2639
2642 if (!HaveInsertPoint())
2643 return;
2644 auto I = S.private_counters().begin();
2645 for (const Expr *E : S.counters()) {
2646 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2647 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2648 // Emit var without initialization.
2649 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
2650 EmitAutoVarCleanups(VarEmission);
2651 LocalDeclMap.erase(PrivateVD);
2652 (void)LoopScope.addPrivate(VD, VarEmission.getAllocatedAddress());
2653 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
2654 VD->hasGlobalStorage()) {
2655 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2656 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
2657 E->getType(), VK_LValue, E->getExprLoc());
2658 (void)LoopScope.addPrivate(PrivateVD, EmitLValue(&DRE).getAddress());
2659 } else {
2660 (void)LoopScope.addPrivate(PrivateVD, VarEmission.getAllocatedAddress());
2661 }
2662 ++I;
2663 }
2664 // Privatize extra loop counters used in loops for ordered(n) clauses.
2665 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2666 if (!C->getNumForLoops())
2667 continue;
2668 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2669 I < E; ++I) {
2670 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2671 const auto *VD = cast<VarDecl>(DRE->getDecl());
2672 // Override only those variables that can be captured to avoid re-emission
2673 // of the variables declared within the loops.
2674 if (DRE->refersToEnclosingVariableOrCapture()) {
2675 (void)LoopScope.addPrivate(
2676 VD, CreateMemTemp(DRE->getType(), VD->getName()));
2677 }
2678 }
2679 }
2680}
2681
2683 const Expr *Cond, llvm::BasicBlock *TrueBlock,
2684 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2685 if (!CGF.HaveInsertPoint())
2686 return;
2687 {
2688 CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2689 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
2690 (void)PreCondScope.Privatize();
2691 // Get initial values of real counters.
2692 for (const Expr *I : S.inits()) {
2693 CGF.EmitIgnoredExpr(I);
2694 }
2695 }
2696 // Create temp loop control variables with their init values to support
2697 // non-rectangular loops.
2698 CodeGenFunction::OMPMapVars PreCondVars;
2699 for (const Expr *E : S.dependent_counters()) {
2700 if (!E)
2701 continue;
2702 assert(!E->getType().getNonReferenceType()->isRecordType() &&
2703 "dependent counter must not be an iterator.");
2704 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2705 Address CounterAddr =
2707 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
2708 }
2709 (void)PreCondVars.apply(CGF);
2710 for (const Expr *E : S.dependent_inits()) {
2711 if (!E)
2712 continue;
2713 CGF.EmitIgnoredExpr(E);
2714 }
2715 // Check that loop is executed at least one time.
2716 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2717 PreCondVars.restore(CGF);
2718}
2719
2721 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2722 if (!HaveInsertPoint())
2723 return;
2724 llvm::DenseSet<const VarDecl *> SIMDLCVs;
2726 if (isOpenMPSimdDirective(EKind)) {
2727 const auto *LoopDirective = cast<OMPLoopDirective>(&D);
2728 for (const Expr *C : LoopDirective->counters()) {
2729 SIMDLCVs.insert(
2731 }
2732 }
2733 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2734 auto CurPrivate = C->privates().begin();
2735 for (const Expr *E : C->varlist()) {
2736 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2737 const auto *PrivateVD =
2738 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2739 if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2740 // Emit private VarDecl with copy init.
2741 EmitVarDecl(*PrivateVD);
2742 bool IsRegistered =
2743 PrivateScope.addPrivate(VD, GetAddrOfLocalVar(PrivateVD));
2744 assert(IsRegistered && "linear var already registered as private");
2745 // Silence the warning about unused variable.
2746 (void)IsRegistered;
2747 } else {
2748 EmitVarDecl(*PrivateVD);
2749 }
2750 ++CurPrivate;
2751 }
2752 }
2753}
2754
2756 const OMPExecutableDirective &D) {
2757 if (!CGF.HaveInsertPoint())
2758 return;
2759 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2760 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2761 /*ignoreResult=*/true);
2762 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2763 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2764 // In presence of finite 'safelen', it may be unsafe to mark all
2765 // the memory instructions parallel, because loop-carried
2766 // dependences of 'safelen' iterations are possible.
2767 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2768 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2769 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2770 /*ignoreResult=*/true);
2771 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2772 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2773 // In presence of finite 'safelen', it may be unsafe to mark all
2774 // the memory instructions parallel, because loop-carried
2775 // dependences of 'safelen' iterations are possible.
2776 CGF.LoopStack.setParallel(/*Enable=*/false);
2777 }
2778}
2779
2780// Check for the presence of an `OMPOrderedDirective`,
2781// i.e., `ordered` in `#pragma omp ordered simd`.
2782//
2783// Consider the following source code:
2784// ```
2785// __attribute__((noinline)) void omp_simd_loop(float X[ARRAY_SIZE][ARRAY_SIZE])
2786// {
2787// for (int r = 1; r < ARRAY_SIZE; ++r) {
2788// for (int c = 1; c < ARRAY_SIZE; ++c) {
2789// #pragma omp simd
2790// for (int k = 2; k < ARRAY_SIZE; ++k) {
2791// #pragma omp ordered simd
2792// X[r][k] = X[r][k - 2] + sinf((float)(r / c));
2793// }
2794// }
2795// }
2796// }
2797// ```
2798//
2799// Suppose we are in `CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective
2800// &D)`. By examining `D.dump()` we have the following AST containing
2801// `OMPOrderedDirective`:
2802//
2803// ```
2804// OMPSimdDirective 0x1c32950
2805// `-CapturedStmt 0x1c32028
2806// |-CapturedDecl 0x1c310e8
2807// | |-ForStmt 0x1c31e30
2808// | | |-DeclStmt 0x1c31298
2809// | | | `-VarDecl 0x1c31208 used k 'int' cinit
2810// | | | `-IntegerLiteral 0x1c31278 'int' 2
2811// | | |-<<<NULL>>>
2812// | | |-BinaryOperator 0x1c31308 'int' '<'
2813// | | | |-ImplicitCastExpr 0x1c312f0 'int' <LValueToRValue>
2814// | | | | `-DeclRefExpr 0x1c312b0 'int' lvalue Var 0x1c31208 'k' 'int'
2815// | | | `-IntegerLiteral 0x1c312d0 'int' 256
2816// | | |-UnaryOperator 0x1c31348 'int' prefix '++'
2817// | | | `-DeclRefExpr 0x1c31328 'int' lvalue Var 0x1c31208 'k' 'int'
2818// | | `-CompoundStmt 0x1c31e18
2819// | | `-OMPOrderedDirective 0x1c31dd8
2820// | | |-OMPSimdClause 0x1c31380
2821// | | `-CapturedStmt 0x1c31cd0
2822// ```
2823//
2824// Note the presence of `OMPOrderedDirective` above:
2825// It's (transitively) nested in a `CapturedStmt` representing the pragma
2826// annotated compound statement. Thus, we need to consider this nesting and
2827// include checking the `getCapturedStmt` in this case.
2828static bool hasOrderedDirective(const Stmt *S) {
2830 return true;
2831
2832 if (const auto *CS = dyn_cast<CapturedStmt>(S))
2834
2835 for (const Stmt *Child : S->children()) {
2836 if (Child && hasOrderedDirective(Child))
2837 return true;
2838 }
2839
2840 return false;
2841}
2842
2843static void applyConservativeSimdOrderedDirective(const Stmt &AssociatedStmt,
2845 // Check for the presence of an `OMPOrderedDirective`
2846 // i.e., `ordered` in `#pragma omp ordered simd`
2847 bool HasOrderedDirective = hasOrderedDirective(&AssociatedStmt);
2848 // If present then conservatively disable loop vectorization
2849 // analogously to how `emitSimdlenSafelenClause` does.
2850 if (HasOrderedDirective)
2851 LoopStack.setParallel(/*Enable=*/false);
2852}
2853
2855 // Walk clauses and process safelen/lastprivate.
2856 LoopStack.setParallel(/*Enable=*/true);
2857 LoopStack.setVectorizeEnable();
2858 const Stmt *AssociatedStmt = D.getAssociatedStmt();
2860 emitSimdlenSafelenClause(*this, D);
2861 if (const auto *C = D.getSingleClause<OMPOrderClause>())
2862 if (C->getKind() == OMPC_ORDER_concurrent)
2863 LoopStack.setParallel(/*Enable=*/true);
2865 if ((EKind == OMPD_simd ||
2866 (getLangOpts().OpenMPSimd && isOpenMPSimdDirective(EKind))) &&
2867 llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2868 [](const OMPReductionClause *C) {
2869 return C->getModifier() == OMPC_REDUCTION_inscan;
2870 }))
2871 // Disable parallel access in case of prefix sum.
2872 LoopStack.setParallel(/*Enable=*/false);
2873}
2874
2876 const OMPLoopDirective &D,
2877 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2878 if (!HaveInsertPoint())
2879 return;
2880 llvm::BasicBlock *DoneBB = nullptr;
2881 auto IC = D.counters().begin();
2882 auto IPC = D.private_counters().begin();
2883 for (const Expr *F : D.finals()) {
2884 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
2885 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
2886 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
2887 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
2888 OrigVD->hasGlobalStorage() || CED) {
2889 if (!DoneBB) {
2890 if (llvm::Value *Cond = CondGen(*this)) {
2891 // If the first post-update expression is found, emit conditional
2892 // block if it was requested.
2893 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
2894 DoneBB = createBasicBlock(".omp.final.done");
2895 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2896 EmitBlock(ThenBB);
2897 }
2898 }
2899 Address OrigAddr = Address::invalid();
2900 if (CED) {
2901 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
2902 } else {
2903 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2904 /*RefersToEnclosingVariableOrCapture=*/false,
2905 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2906 OrigAddr = EmitLValue(&DRE).getAddress();
2907 }
2908 OMPPrivateScope VarScope(*this);
2909 VarScope.addPrivate(OrigVD, OrigAddr);
2910 (void)VarScope.Privatize();
2911 EmitIgnoredExpr(F);
2912 }
2913 ++IC;
2914 ++IPC;
2915 }
2916 if (DoneBB)
2917 EmitBlock(DoneBB, /*IsFinished=*/true);
2918}
2919
2926
2927/// Emit a helper variable and return corresponding lvalue.
2929 const DeclRefExpr *Helper) {
2930 auto VDecl = cast<VarDecl>(Helper->getDecl());
2931 CGF.EmitVarDecl(*VDecl);
2932 return CGF.EmitLValue(Helper);
2933}
2934
2936 const RegionCodeGenTy &SimdInitGen,
2937 const RegionCodeGenTy &BodyCodeGen) {
2938 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2939 PrePostActionTy &) {
2940 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2942 SimdInitGen(CGF);
2943
2944 BodyCodeGen(CGF);
2945 };
2946 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2948 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2949
2950 BodyCodeGen(CGF);
2951 };
2952 const Expr *IfCond = nullptr;
2954 if (isOpenMPSimdDirective(EKind)) {
2955 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2956 if (CGF.getLangOpts().OpenMP >= 50 &&
2957 (C->getNameModifier() == OMPD_unknown ||
2958 C->getNameModifier() == OMPD_simd)) {
2959 IfCond = C->getCondition();
2960 break;
2961 }
2962 }
2963 }
2964 if (IfCond) {
2965 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2966 } else {
2967 RegionCodeGenTy ThenRCG(ThenGen);
2968 ThenRCG(CGF);
2969 }
2970}
2971
2973 PrePostActionTy &Action) {
2974 Action.Enter(CGF);
2975 OMPLoopScope PreInitScope(CGF, S);
2976 // if (PreCond) {
2977 // for (IV in 0..LastIteration) BODY;
2978 // <Final counter/linear vars updates>;
2979 // }
2980
2981 // The presence of lower/upper bound variable depends on the actual directive
2982 // kind in the AST node. The variables must be emitted because some of the
2983 // expressions associated with the loop will use them.
2984 OpenMPDirectiveKind DKind = S.getDirectiveKind();
2985 if (isOpenMPDistributeDirective(DKind) ||
2988 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2989 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2990 }
2991
2993 // Emit: if (PreCond) - begin.
2994 // If the condition constant folds and can be elided, avoid emitting the
2995 // whole loop.
2996 bool CondConstant;
2997 llvm::BasicBlock *ContBlock = nullptr;
2998 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2999 if (!CondConstant)
3000 return;
3001 } else {
3002 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
3003 ContBlock = CGF.createBasicBlock("simd.if.end");
3004 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
3005 CGF.getProfileCount(&S));
3006 CGF.EmitBlock(ThenBlock);
3008 }
3009
3010 // Emit the loop iteration variable.
3011 const Expr *IVExpr = S.getIterationVariable();
3012 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
3013 CGF.EmitVarDecl(*IVDecl);
3014 CGF.EmitIgnoredExpr(S.getInit());
3015
3016 // Emit the iterations count variable.
3017 // If it is not a variable, Sema decided to calculate iterations count on
3018 // each iteration (e.g., it is foldable into a constant).
3019 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3020 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3021 // Emit calculation of the iterations count.
3022 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
3023 }
3024
3025 emitAlignedClause(CGF, S);
3026 (void)CGF.EmitOMPLinearClauseInit(S);
3027 {
3028 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
3029 CGF.EmitOMPPrivateClause(S, LoopScope);
3030 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
3031 CGF.EmitOMPLinearClause(S, LoopScope);
3032 CGF.EmitOMPReductionClauseInit(S, LoopScope);
3034 CGF, S, CGF.EmitLValue(S.getIterationVariable()));
3035 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
3036 (void)LoopScope.Privatize();
3039
3041 CGF, S,
3042 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3043 CGF.EmitOMPSimdInit(S);
3044 },
3045 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3046 CGF.EmitOMPInnerLoop(
3047 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
3048 [&S](CodeGenFunction &CGF) {
3049 emitOMPLoopBodyWithStopPoint(CGF, S,
3050 CodeGenFunction::JumpDest());
3051 },
3052 [](CodeGenFunction &) {});
3053 });
3054 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
3055 // Emit final copy of the lastprivate variables at the end of loops.
3056 if (HasLastprivateClause)
3057 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
3058 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
3060 [](CodeGenFunction &) { return nullptr; });
3061 LoopScope.restoreMap();
3062 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
3063 }
3064 // Emit: if (PreCond) - end.
3065 if (ContBlock) {
3066 CGF.EmitBranch(ContBlock);
3067 CGF.EmitBlock(ContBlock, true);
3068 }
3069}
3070
3071// Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function
3072// available for "loop bind(thread)", which maps to "simd".
3074 // Check for unsupported clauses
3075 for (OMPClause *C : S.clauses()) {
3076 // Currently only order, simdlen and safelen clauses are supported
3079 return false;
3080 }
3081
3082 // Check if we have a statement with the ordered directive.
3083 // Visit the statement hierarchy to find a compound statement
3084 // with a ordered directive in it.
3085 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(S.getRawStmt())) {
3086 if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) {
3087 for (const Stmt *SubStmt : SyntacticalLoop->children()) {
3088 if (!SubStmt)
3089 continue;
3090 if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(SubStmt)) {
3091 for (const Stmt *CSSubStmt : CS->children()) {
3092 if (!CSSubStmt)
3093 continue;
3094 if (isa<OMPOrderedDirective>(CSSubStmt)) {
3095 return false;
3096 }
3097 }
3098 }
3099 }
3100 }
3101 }
3102 return true;
3103}
3104
3105static llvm::MapVector<llvm::Value *, llvm::Value *>
3107 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars;
3108 for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) {
3109 llvm::APInt ClauseAlignment(64, 0);
3110 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
3111 auto *AlignmentCI =
3112 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
3113 ClauseAlignment = AlignmentCI->getValue();
3114 }
3115 for (const Expr *E : Clause->varlist()) {
3116 llvm::APInt Alignment(ClauseAlignment);
3117 if (Alignment == 0) {
3118 // OpenMP [2.8.1, Description]
3119 // If no optional parameter is specified, implementation-defined default
3120 // alignments for SIMD instructions on the target platforms are assumed.
3121 Alignment =
3122 CGF.getContext()
3124 E->getType()->getPointeeType()))
3125 .getQuantity();
3126 }
3127 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
3128 "alignment is not power of 2");
3129 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
3130 AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue());
3131 }
3132 }
3133 return AlignedVars;
3134}
3135
3136// Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function
3137// available for "loop bind(thread)", which maps to "simd".
3140 bool UseOMPIRBuilder =
3141 CGM.getLangOpts().OpenMPIRBuilder && isSimdSupportedByOpenMPIRBuilder(S);
3142 if (UseOMPIRBuilder) {
3143 auto &&CodeGenIRBuilder = [&S, &CGM, UseOMPIRBuilder](CodeGenFunction &CGF,
3144 PrePostActionTy &) {
3145 // Use the OpenMPIRBuilder if enabled.
3146 if (UseOMPIRBuilder) {
3147 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars =
3148 GetAlignedMapping(S, CGF);
3149 // Emit the associated statement and get its loop representation.
3150 const Stmt *Inner = S.getRawStmt();
3151 llvm::CanonicalLoopInfo *CLI =
3152 CGF.EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
3153
3154 llvm::OpenMPIRBuilder &OMPBuilder =
3156 // Add SIMD specific metadata
3157 llvm::ConstantInt *Simdlen = nullptr;
3158 if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) {
3159 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
3160 /*ignoreResult=*/true);
3161 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
3162 Simdlen = Val;
3163 }
3164 llvm::ConstantInt *Safelen = nullptr;
3165 if (const auto *C = S.getSingleClause<OMPSafelenClause>()) {
3166 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
3167 /*ignoreResult=*/true);
3168 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
3169 Safelen = Val;
3170 }
3171 llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown;
3172 if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3173 if (C->getKind() == OpenMPOrderClauseKind::OMPC_ORDER_concurrent) {
3174 Order = llvm::omp::OrderKind::OMP_ORDER_concurrent;
3175 }
3176 }
3177 // Add simd metadata to the collapsed loop. Do not generate
3178 // another loop for if clause. Support for if clause is done earlier.
3179 OMPBuilder.applySimd(CLI, AlignedVars,
3180 /*IfCond*/ nullptr, Order, Simdlen, Safelen);
3181 return;
3182 }
3183 };
3184 {
3185 auto LPCRegion =
3187 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
3188 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
3189 CodeGenIRBuilder);
3190 }
3191 return;
3192 }
3193
3195 CGF.OMPFirstScanLoop = true;
3196 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3197 emitOMPSimdRegion(CGF, S, Action);
3198 };
3199 {
3200 auto LPCRegion =
3202 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
3204 }
3205 // Check for outer lastprivate conditional update.
3207}
3208
3209void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
3210 emitOMPSimdDirective(S, *this, CGM);
3211}
3212
3214 // Emit the de-sugared statement.
3215 OMPTransformDirectiveScopeRAII TileScope(*this, &S);
3217}
3218
3220 // Emit the de-sugared statement.
3221 OMPTransformDirectiveScopeRAII StripeScope(*this, &S);
3223}
3224
3226 // Emit the de-sugared statement.
3227 OMPTransformDirectiveScopeRAII ReverseScope(*this, &S);
3229}
3230
3232 // Emit the de-sugared statement (the split loops).
3233 OMPTransformDirectiveScopeRAII SplitScope(*this, &S);
3235}
3236
3238 const OMPInterchangeDirective &S) {
3239 // Emit the de-sugared statement.
3240 OMPTransformDirectiveScopeRAII InterchangeScope(*this, &S);
3242}
3243
3245 // Emit the de-sugared statement
3246 OMPTransformDirectiveScopeRAII FuseScope(*this, &S);
3248}
3249
3251 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
3252
3253 if (UseOMPIRBuilder) {
3254 auto DL = SourceLocToDebugLoc(S.getBeginLoc());
3255 const Stmt *Inner = S.getRawStmt();
3256
3257 // Consume nested loop. Clear the entire remaining loop stack because a
3258 // fully unrolled loop is non-transformable. For partial unrolling the
3259 // generated outer loop is pushed back to the stack.
3260 llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
3261 OMPLoopNestStack.clear();
3262
3263 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3264
3265 bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
3266 llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
3267
3268 if (S.hasClausesOfKind<OMPFullClause>()) {
3269 assert(ExpectedOMPLoopDepth == 0);
3270 OMPBuilder.unrollLoopFull(DL, CLI);
3271 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
3272 uint64_t Factor = 0;
3273 if (Expr *FactorExpr = PartialClause->getFactor()) {
3274 Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
3275 assert(Factor >= 1 && "Only positive factors are valid");
3276 }
3277 OMPBuilder.unrollLoopPartial(DL, CLI, Factor,
3278 NeedsUnrolledCLI ? &UnrolledCLI : nullptr);
3279 } else {
3280 OMPBuilder.unrollLoopHeuristic(DL, CLI);
3281 }
3282
3283 assert((!NeedsUnrolledCLI || UnrolledCLI) &&
3284 "NeedsUnrolledCLI implies UnrolledCLI to be set");
3285 if (UnrolledCLI)
3286 OMPLoopNestStack.push_back(UnrolledCLI);
3287
3288 return;
3289 }
3290
3291 // This function is only called if the unrolled loop is not consumed by any
3292 // other loop-associated construct. Such a loop-associated construct will have
3293 // used the transformed AST.
3294
3295 // Set the unroll metadata for the next emitted loop.
3296 LoopStack.setUnrollState(LoopAttributes::Enable);
3297
3298 if (S.hasClausesOfKind<OMPFullClause>()) {
3299 LoopStack.setUnrollState(LoopAttributes::Full);
3300 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
3301 if (Expr *FactorExpr = PartialClause->getFactor()) {
3302 uint64_t Factor =
3303 FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
3304 assert(Factor >= 1 && "Only positive factors are valid");
3305 LoopStack.setUnrollCount(Factor);
3306 }
3307 }
3308
3309 EmitStmt(S.getAssociatedStmt());
3310}
3311
3312void CodeGenFunction::EmitOMPOuterLoop(
3313 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
3315 const CodeGenFunction::OMPLoopArguments &LoopArgs,
3316 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
3317 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
3319
3320 const Expr *IVExpr = S.getIterationVariable();
3321 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3322 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3323
3324 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
3325
3326 // Start the loop with a block that tests the condition.
3327 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
3328 EmitBlock(CondBlock);
3329 const SourceRange R = S.getSourceRange();
3330 OMPLoopNestStack.clear();
3331 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
3332 SourceLocToDebugLoc(R.getEnd()));
3333
3334 llvm::Value *BoolCondVal = nullptr;
3335 if (!DynamicOrOrdered) {
3336 // UB = min(UB, GlobalUB) or
3337 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
3338 // 'distribute parallel for')
3339 EmitIgnoredExpr(LoopArgs.EUB);
3340 // IV = LB
3341 EmitIgnoredExpr(LoopArgs.Init);
3342 // IV < UB
3343 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
3344 } else {
3345 BoolCondVal =
3346 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
3347 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
3348 }
3349
3350 // If there are any cleanups between here and the loop-exit scope,
3351 // create a block to stage a loop exit along.
3352 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
3353 if (LoopScope.requiresCleanups())
3354 ExitBlock = createBasicBlock("omp.dispatch.cleanup");
3355
3356 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
3357 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
3358 if (ExitBlock != LoopExit.getBlock()) {
3359 EmitBlock(ExitBlock);
3361 }
3362 EmitBlock(LoopBody);
3363
3364 // Emit "IV = LB" (in case of static schedule, we have already calculated new
3365 // LB for loop condition and emitted it above).
3366 if (DynamicOrOrdered)
3367 EmitIgnoredExpr(LoopArgs.Init);
3368
3369 // Create a block for the increment.
3370 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
3371 BreakContinueStack.push_back(BreakContinue(S, LoopExit, Continue));
3372
3375 *this, S,
3376 [&S, IsMonotonic, EKind](CodeGenFunction &CGF, PrePostActionTy &) {
3377 // Generate !llvm.loop.parallel metadata for loads and stores for loops
3378 // with dynamic/guided scheduling and without ordered clause.
3379 if (!isOpenMPSimdDirective(EKind)) {
3380 CGF.LoopStack.setParallel(!IsMonotonic);
3381 if (const auto *C = S.getSingleClause<OMPOrderClause>())
3382 if (C->getKind() == OMPC_ORDER_concurrent)
3383 CGF.LoopStack.setParallel(/*Enable=*/true);
3384 } else {
3385 CGF.EmitOMPSimdInit(S);
3386 }
3387 },
3388 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
3389 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3390 SourceLocation Loc = S.getBeginLoc();
3391 // when 'distribute' is not combined with a 'for':
3392 // while (idx <= UB) { BODY; ++idx; }
3393 // when 'distribute' is combined with a 'for'
3394 // (e.g. 'distribute parallel for')
3395 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3396 CGF.EmitOMPInnerLoop(
3397 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
3398 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3399 CodeGenLoop(CGF, S, LoopExit);
3400 },
3401 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
3402 CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
3403 });
3404 });
3405
3406 EmitBlock(Continue.getBlock());
3407 BreakContinueStack.pop_back();
3408 if (!DynamicOrOrdered) {
3409 // Emit "LB = LB + Stride", "UB = UB + Stride".
3410 EmitIgnoredExpr(LoopArgs.NextLB);
3411 EmitIgnoredExpr(LoopArgs.NextUB);
3412 }
3413
3414 EmitBranch(CondBlock);
3415 OMPLoopNestStack.clear();
3416 LoopStack.pop();
3417 // Emit the fall-through block.
3418 EmitBlock(LoopExit.getBlock());
3419
3420 // Tell the runtime we are done.
3421 auto &&CodeGen = [DynamicOrOrdered, &S, &LoopArgs](CodeGenFunction &CGF) {
3422 if (!DynamicOrOrdered)
3423 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3424 LoopArgs.DKind);
3425 };
3426 OMPCancelStack.emitExit(*this, EKind, CodeGen);
3427}
3428
3429void CodeGenFunction::EmitOMPForOuterLoop(
3430 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
3431 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
3432 const OMPLoopArguments &LoopArgs,
3433 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3434 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3435
3436 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
3437 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind.Schedule);
3438
3439 assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
3440 LoopArgs.Chunk != nullptr)) &&
3441 "static non-chunked schedule does not need outer loop");
3442
3443 // Emit outer loop.
3444 //
3445 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3446 // When schedule(dynamic,chunk_size) is specified, the iterations are
3447 // distributed to threads in the team in chunks as the threads request them.
3448 // Each thread executes a chunk of iterations, then requests another chunk,
3449 // until no chunks remain to be distributed. Each chunk contains chunk_size
3450 // iterations, except for the last chunk to be distributed, which may have
3451 // fewer iterations. When no chunk_size is specified, it defaults to 1.
3452 //
3453 // When schedule(guided,chunk_size) is specified, the iterations are assigned
3454 // to threads in the team in chunks as the executing threads request them.
3455 // Each thread executes a chunk of iterations, then requests another chunk,
3456 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
3457 // each chunk is proportional to the number of unassigned iterations divided
3458 // by the number of threads in the team, decreasing to 1. For a chunk_size
3459 // with value k (greater than 1), the size of each chunk is determined in the
3460 // same way, with the restriction that the chunks do not contain fewer than k
3461 // iterations (except for the last chunk to be assigned, which may have fewer
3462 // than k iterations).
3463 //
3464 // When schedule(auto) is specified, the decision regarding scheduling is
3465 // delegated to the compiler and/or runtime system. The programmer gives the
3466 // implementation the freedom to choose any possible mapping of iterations to
3467 // threads in the team.
3468 //
3469 // When schedule(runtime) is specified, the decision regarding scheduling is
3470 // deferred until run time, and the schedule and chunk size are taken from the
3471 // run-sched-var ICV. If the ICV is set to auto, the schedule is
3472 // implementation defined
3473 //
3474 // __kmpc_dispatch_init();
3475 // while(__kmpc_dispatch_next(&LB, &UB)) {
3476 // idx = LB;
3477 // while (idx <= UB) { BODY; ++idx;
3478 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
3479 // } // inner loop
3480 // }
3481 // __kmpc_dispatch_deinit();
3482 //
3483 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3484 // When schedule(static, chunk_size) is specified, iterations are divided into
3485 // chunks of size chunk_size, and the chunks are assigned to the threads in
3486 // the team in a round-robin fashion in the order of the thread number.
3487 //
3488 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
3489 // while (idx <= UB) { BODY; ++idx; } // inner loop
3490 // LB = LB + ST;
3491 // UB = UB + ST;
3492 // }
3493 //
3494
3495 const Expr *IVExpr = S.getIterationVariable();
3496 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3497 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3498
3499 if (DynamicOrOrdered) {
3500 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
3501 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
3502 llvm::Value *LBVal = DispatchBounds.first;
3503 llvm::Value *UBVal = DispatchBounds.second;
3504 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
3505 LoopArgs.Chunk};
3506 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
3507 IVSigned, Ordered, DipatchRTInputValues);
3508 } else {
3509 CGOpenMPRuntime::StaticRTInput StaticInit(
3510 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
3511 LoopArgs.ST, LoopArgs.Chunk);
3513 RT.emitForStaticInit(*this, S.getBeginLoc(), EKind, ScheduleKind,
3514 StaticInit);
3515 }
3516
3517 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
3518 const unsigned IVSize,
3519 const bool IVSigned) {
3520 if (Ordered) {
3521 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
3522 IVSigned);
3523 }
3524 };
3525
3526 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
3527 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
3528 OuterLoopArgs.IncExpr = S.getInc();
3529 OuterLoopArgs.Init = S.getInit();
3530 OuterLoopArgs.Cond = S.getCond();
3531 OuterLoopArgs.NextLB = S.getNextLowerBound();
3532 OuterLoopArgs.NextUB = S.getNextUpperBound();
3533 OuterLoopArgs.DKind = LoopArgs.DKind;
3534 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
3535 emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
3536 if (DynamicOrOrdered) {
3537 RT.emitForDispatchDeinit(*this, S.getBeginLoc());
3538 }
3539}
3540
3542 const unsigned IVSize, const bool IVSigned) {}
3543
3544void CodeGenFunction::EmitOMPDistributeOuterLoop(
3545 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
3546 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
3547 const CodeGenLoopTy &CodeGenLoopContent) {
3548
3549 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3550
3551 // Emit outer loop.
3552 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
3553 // dynamic
3554 //
3555
3556 const Expr *IVExpr = S.getIterationVariable();
3557 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3558 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3560
3561 CGOpenMPRuntime::StaticRTInput StaticInit(
3562 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
3563 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
3564 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
3565
3566 // for combined 'distribute' and 'for' the increment expression of distribute
3567 // is stored in DistInc. For 'distribute' alone, it is in Inc.
3568 Expr *IncExpr;
3570 IncExpr = S.getDistInc();
3571 else
3572 IncExpr = S.getInc();
3573
3574 // this routine is shared by 'omp distribute parallel for' and
3575 // 'omp distribute': select the right EUB expression depending on the
3576 // directive
3577 OMPLoopArguments OuterLoopArgs;
3578 OuterLoopArgs.LB = LoopArgs.LB;
3579 OuterLoopArgs.UB = LoopArgs.UB;
3580 OuterLoopArgs.ST = LoopArgs.ST;
3581 OuterLoopArgs.IL = LoopArgs.IL;
3582 OuterLoopArgs.Chunk = LoopArgs.Chunk;
3583 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(EKind)
3584 ? S.getCombinedEnsureUpperBound()
3585 : S.getEnsureUpperBound();
3586 OuterLoopArgs.IncExpr = IncExpr;
3587 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(EKind)
3588 ? S.getCombinedInit()
3589 : S.getInit();
3590 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(EKind)
3591 ? S.getCombinedCond()
3592 : S.getCond();
3593 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(EKind)
3594 ? S.getCombinedNextLowerBound()
3595 : S.getNextLowerBound();
3596 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(EKind)
3597 ? S.getCombinedNextUpperBound()
3598 : S.getNextUpperBound();
3599 OuterLoopArgs.DKind = OMPD_distribute;
3600
3601 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
3602 LoopScope, OuterLoopArgs, CodeGenLoopContent,
3604}
3605
3606static std::pair<LValue, LValue>
3608 const OMPExecutableDirective &S) {
3610 LValue LB =
3611 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3612 LValue UB =
3613 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3614
3615 // When composing 'distribute' with 'for' (e.g. as in 'distribute
3616 // parallel for') we need to use the 'distribute'
3617 // chunk lower and upper bounds rather than the whole loop iteration
3618 // space. These are parameters to the outlined function for 'parallel'
3619 // and we copy the bounds of the previous schedule into the
3620 // the current ones.
3621 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
3622 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
3623 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
3624 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
3625 PrevLBVal = CGF.EmitScalarConversion(
3626 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
3627 LS.getIterationVariable()->getType(),
3628 LS.getPrevLowerBoundVariable()->getExprLoc());
3629 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
3630 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
3631 PrevUBVal = CGF.EmitScalarConversion(
3632 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
3633 LS.getIterationVariable()->getType(),
3634 LS.getPrevUpperBoundVariable()->getExprLoc());
3635
3636 CGF.EmitStoreOfScalar(PrevLBVal, LB);
3637 CGF.EmitStoreOfScalar(PrevUBVal, UB);
3638
3639 return {LB, UB};
3640}
3641
3642/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3643/// we need to use the LB and UB expressions generated by the worksharing
3644/// code generation support, whereas in non combined situations we would
3645/// just emit 0 and the LastIteration expression
3646/// This function is necessary due to the difference of the LB and UB
3647/// types for the RT emission routines for 'for_static_init' and
3648/// 'for_dispatch_init'
3649static std::pair<llvm::Value *, llvm::Value *>
3651 const OMPExecutableDirective &S,
3652 Address LB, Address UB) {
3654 const Expr *IVExpr = LS.getIterationVariable();
3655 // when implementing a dynamic schedule for a 'for' combined with a
3656 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3657 // is not normalized as each team only executes its own assigned
3658 // distribute chunk
3659 QualType IteratorTy = IVExpr->getType();
3660 llvm::Value *LBVal =
3661 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3662 llvm::Value *UBVal =
3663 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3664 return {LBVal, UBVal};
3665}
3666
3670 const auto &Dir = cast<OMPLoopDirective>(S);
3671 LValue LB =
3672 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
3673 llvm::Value *LBCast = CGF.Builder.CreateIntCast(
3674 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
3675 CapturedVars.push_back(LBCast);
3676 LValue UB =
3677 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
3678
3679 llvm::Value *UBCast = CGF.Builder.CreateIntCast(
3680 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
3681 CapturedVars.push_back(UBCast);
3682}
3683
3684static void
3686 const OMPLoopDirective &S,
3689 auto &&CGInlinedWorksharingLoop = [&S, EKind](CodeGenFunction &CGF,
3690 PrePostActionTy &Action) {
3691 Action.Enter(CGF);
3692 bool HasCancel = false;
3693 if (!isOpenMPSimdDirective(EKind)) {
3694 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
3695 HasCancel = D->hasCancel();
3696 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
3697 HasCancel = D->hasCancel();
3698 else if (const auto *D =
3699 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
3700 HasCancel = D->hasCancel();
3701 }
3702 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
3703 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
3706 };
3707
3709 CGF, S, isOpenMPSimdDirective(EKind) ? OMPD_for_simd : OMPD_for,
3710 CGInlinedWorksharingLoop,
3712}
3713
3716 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3718 S.getDistInc());
3719 };
3720 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3721 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3722}
3723
3726 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3728 S.getDistInc());
3729 };
3730 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3731 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3732}
3733
3735 const OMPDistributeSimdDirective &S) {
3736 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3738 };
3739 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3740 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3741}
3742
3744 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3745 // Emit SPMD target parallel for region as a standalone region.
3746 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3747 emitOMPSimdRegion(CGF, S, Action);
3748 };
3749 llvm::Function *Fn;
3750 llvm::Constant *Addr;
3751 // Emit target region as a standalone region.
3752 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3753 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3754 assert(Fn && Addr && "Target device function emission failed.");
3755}
3756
3758 const OMPTargetSimdDirective &S) {
3759 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3760 emitOMPSimdRegion(CGF, S, Action);
3761 };
3763}
3764
3765namespace {
3766struct ScheduleKindModifiersTy {
3770 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3773 : Kind(Kind), M1(M1), M2(M2) {}
3774};
3775} // namespace
3776
3778 const OMPLoopDirective &S, Expr *EUB,
3779 const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3780 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3781 // Emit the loop iteration variable.
3782 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3783 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
3784 EmitVarDecl(*IVDecl);
3785
3786 // Emit the iterations count variable.
3787 // If it is not a variable, Sema decided to calculate iterations count on each
3788 // iteration (e.g., it is foldable into a constant).
3789 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3790 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3791 // Emit calculation of the iterations count.
3792 EmitIgnoredExpr(S.getCalcLastIteration());
3793 }
3794
3795 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3796
3797 bool HasLastprivateClause;
3798 // Check pre-condition.
3799 {
3800 OMPLoopScope PreInitScope(*this, S);
3801 // Skip the entire loop if we don't meet the precondition.
3802 // If the condition constant folds and can be elided, avoid emitting the
3803 // whole loop.
3804 bool CondConstant;
3805 llvm::BasicBlock *ContBlock = nullptr;
3806 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3807 if (!CondConstant)
3808 return false;
3809 } else {
3810 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
3811 ContBlock = createBasicBlock("omp.precond.end");
3812 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3813 getProfileCount(&S));
3814 EmitBlock(ThenBlock);
3816 }
3817
3818 RunCleanupsScope DoacrossCleanupScope(*this);
3819 bool Ordered = false;
3820 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3821 if (OrderedClause->getNumForLoops())
3822 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
3823 else
3824 Ordered = true;
3825 }
3826
3827 emitAlignedClause(*this, S);
3828 bool HasLinears = EmitOMPLinearClauseInit(S);
3829 // Emit helper vars inits.
3830
3831 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3832 LValue LB = Bounds.first;
3833 LValue UB = Bounds.second;
3834 LValue ST =
3835 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3836 LValue IL =
3837 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3838
3839 // Emit 'then' code.
3840 {
3842 OMPPrivateScope LoopScope(*this);
3843 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
3844 // Emit implicit barrier to synchronize threads and avoid data races on
3845 // initialization of firstprivate variables and post-update of
3846 // lastprivate variables.
3847 CGM.getOpenMPRuntime().emitBarrierCall(
3848 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3849 /*ForceSimpleCall=*/true);
3850 }
3851 EmitOMPPrivateClause(S, LoopScope);
3853 *this, S, EmitLValue(S.getIterationVariable()));
3854 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3855 EmitOMPReductionClauseInit(S, LoopScope);
3856 EmitOMPPrivateLoopCounters(S, LoopScope);
3857 EmitOMPLinearClause(S, LoopScope);
3858 (void)LoopScope.Privatize();
3860 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
3861
3862 // Detect the loop schedule kind and chunk.
3863 const Expr *ChunkExpr = nullptr;
3864 OpenMPScheduleTy ScheduleKind;
3865 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3866 ScheduleKind.Schedule = C->getScheduleKind();
3867 ScheduleKind.M1 = C->getFirstScheduleModifier();
3868 ScheduleKind.M2 = C->getSecondScheduleModifier();
3869 ChunkExpr = C->getChunkSize();
3870 } else {
3871 // Default behaviour for schedule clause.
3872 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3873 *this, S, ScheduleKind.Schedule, ChunkExpr);
3874 }
3875 bool HasChunkSizeOne = false;
3876 llvm::Value *Chunk = nullptr;
3877 if (ChunkExpr) {
3878 Chunk = EmitScalarExpr(ChunkExpr);
3879 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
3880 S.getIterationVariable()->getType(),
3881 S.getBeginLoc());
3883 if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
3884 llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3885 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3886 }
3887 }
3888 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3889 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3890 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3891 // If the static schedule kind is specified or if the ordered clause is
3892 // specified, and if no monotonic modifier is specified, the effect will
3893 // be as if the monotonic modifier was specified.
3894 bool StaticChunkedOne =
3895 RT.isStaticChunked(ScheduleKind.Schedule,
3896 /* Chunked */ Chunk != nullptr) &&
3897 HasChunkSizeOne && isOpenMPLoopBoundSharingDirective(EKind);
3898 // GPU combined `distribute parallel for`: emit a single
3899 // for_static_init with the fused distr_static_chunk + static_chunkone
3900 // schedule (enum 93). The surrounding EmitOMPDistributeLoop must skip
3901 // its distribute_static_init under the same conditions.
3902 if (StaticChunkedOne && canEmitGPUFusedDistSchedule(CGM, S, EKind))
3903 ScheduleKind.UseFusedDistChunkSchedule = true;
3904 bool IsMonotonic =
3905 Ordered ||
3906 (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
3907 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3908 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3909 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3910 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3911 if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
3912 /* Chunked */ Chunk != nullptr) ||
3913 StaticChunkedOne) &&
3914 !Ordered) {
3918 *this, S,
3919 [&S, EKind](CodeGenFunction &CGF, PrePostActionTy &) {
3920 if (isOpenMPSimdDirective(EKind)) {
3921 CGF.EmitOMPSimdInit(S);
3922 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3923 if (C->getKind() == OMPC_ORDER_concurrent)
3924 CGF.LoopStack.setParallel(/*Enable=*/true);
3925 }
3926 },
3927 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3928 &S, ScheduleKind, LoopExit, EKind,
3929 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3930 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3931 // When no chunk_size is specified, the iteration space is divided
3932 // into chunks that are approximately equal in size, and at most
3933 // one chunk is distributed to each thread. Note that the size of
3934 // the chunks is unspecified in this case.
3936 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
3937 UB.getAddress(), ST.getAddress(),
3938 StaticChunkedOne ? Chunk : nullptr);
3940 CGF, S.getBeginLoc(), EKind, ScheduleKind, StaticInit);
3941 // UB = min(UB, GlobalUB);
3942 if (!StaticChunkedOne)
3943 CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
3944 // IV = LB;
3945 CGF.EmitIgnoredExpr(S.getInit());
3946 // For unchunked static schedule generate:
3947 //
3948 // while (idx <= UB) {
3949 // BODY;
3950 // ++idx;
3951 // }
3952 //
3953 // For static schedule with chunk one:
3954 //
3955 // while (IV <= PrevUB) {
3956 // BODY;
3957 // IV += ST;
3958 // }
3959 CGF.EmitOMPInnerLoop(
3960 S, LoopScope.requiresCleanups(),
3961 StaticChunkedOne ? S.getCombinedParForInDistCond()
3962 : S.getCond(),
3963 StaticChunkedOne ? S.getDistInc() : S.getInc(),
3964 [&S, LoopExit](CodeGenFunction &CGF) {
3965 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3966 },
3967 [](CodeGenFunction &) {});
3968 });
3969 EmitBlock(LoopExit.getBlock());
3970 // Tell the runtime we are done.
3971 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3972 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3973 OMPD_for);
3974 };
3975 OMPCancelStack.emitExit(*this, EKind, CodeGen);
3976 } else {
3977 // Emit the outer loop, which requests its work chunk [LB..UB] from
3978 // runtime and runs the inner loop to process it.
3979 OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
3980 ST.getAddress(), IL.getAddress(), Chunk,
3981 EUB);
3982 LoopArguments.DKind = OMPD_for;
3983 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3984 LoopArguments, CGDispatchBounds);
3985 }
3986 if (isOpenMPSimdDirective(EKind)) {
3987 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3988 return CGF.Builder.CreateIsNotNull(
3989 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3990 });
3991 }
3993 S, /*ReductionKind=*/isOpenMPSimdDirective(EKind)
3994 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3995 : /*Parallel only*/ OMPD_parallel);
3996 // Emit post-update of the reduction variables if IsLastIter != 0.
3998 *this, S, [IL, &S](CodeGenFunction &CGF) {
3999 return CGF.Builder.CreateIsNotNull(
4000 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4001 });
4002 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4003 if (HasLastprivateClause)
4005 S, isOpenMPSimdDirective(EKind),
4006 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
4007 LoopScope.restoreMap();
4008 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
4009 return CGF.Builder.CreateIsNotNull(
4010 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4011 });
4012 }
4013 DoacrossCleanupScope.ForceCleanup();
4014 // We're now done with the loop, so jump to the continuation block.
4015 if (ContBlock) {
4016 EmitBranch(ContBlock);
4017 EmitBlock(ContBlock, /*IsFinished=*/true);
4018 }
4019 }
4020 return HasLastprivateClause;
4021}
4022
4023/// The following two functions generate expressions for the loop lower
4024/// and upper bounds in case of static and dynamic (dispatch) schedule
4025/// of the associated 'for' or 'distribute' loop.
4026static std::pair<LValue, LValue>
4028 const auto &LS = cast<OMPLoopDirective>(S);
4029 LValue LB =
4030 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
4031 LValue UB =
4032 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
4033 return {LB, UB};
4034}
4035
4036/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
4037/// consider the lower and upper bound expressions generated by the
4038/// worksharing loop support, but we use 0 and the iteration space size as
4039/// constants
4040static std::pair<llvm::Value *, llvm::Value *>
4042 Address LB, Address UB) {
4043 const auto &LS = cast<OMPLoopDirective>(S);
4044 const Expr *IVExpr = LS.getIterationVariable();
4045 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
4046 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
4047 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
4048 return {LBVal, UBVal};
4049}
4050
4051/// Emits internal temp array declarations for the directive with inscan
4052/// reductions.
4053/// The code is the following:
4054/// \code
4055/// size num_iters = <num_iters>;
4056/// <type> buffer[num_iters];
4057/// \endcode
4059 CodeGenFunction &CGF, const OMPLoopDirective &S,
4060 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
4061 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
4062 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
4065 SmallVector<const Expr *, 4> ReductionOps;
4066 SmallVector<const Expr *, 4> CopyArrayTemps;
4067 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4068 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
4069 "Only inscan reductions are expected.");
4070 Shareds.append(C->varlist_begin(), C->varlist_end());
4071 Privates.append(C->privates().begin(), C->privates().end());
4072 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
4073 CopyArrayTemps.append(C->copy_array_temps().begin(),
4074 C->copy_array_temps().end());
4075 }
4076 {
4077 // Emit buffers for each reduction variables.
4078 // ReductionCodeGen is required to emit correctly the code for array
4079 // reductions.
4080 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
4081 unsigned Count = 0;
4082 auto *ITA = CopyArrayTemps.begin();
4083 for (const Expr *IRef : Privates) {
4084 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
4085 // Emit variably modified arrays, used for arrays/array sections
4086 // reductions.
4087 if (PrivateVD->getType()->isVariablyModifiedType()) {
4088 RedCG.emitSharedOrigLValue(CGF, Count);
4089 RedCG.emitAggregateType(CGF, Count);
4090 }
4092 CGF,
4094 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
4095 ->getSizeExpr()),
4096 RValue::get(OMPScanNumIterations));
4097 // Emit temp buffer.
4098 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
4099 ++ITA;
4100 ++Count;
4101 }
4102 }
4103}
4104
4105/// Copies final inscan reductions values to the original variables.
4106/// The code is the following:
4107/// \code
4108/// <orig_var> = buffer[num_iters-1];
4109/// \endcode
4111 CodeGenFunction &CGF, const OMPLoopDirective &S,
4112 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
4113 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
4114 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
4120 SmallVector<const Expr *, 4> CopyArrayElems;
4121 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4122 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
4123 "Only inscan reductions are expected.");
4124 Shareds.append(C->varlist_begin(), C->varlist_end());
4125 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4126 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4127 Privates.append(C->privates().begin(), C->privates().end());
4128 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
4129 CopyArrayElems.append(C->copy_array_elems().begin(),
4130 C->copy_array_elems().end());
4131 }
4132 // Create temp var and copy LHS value to this temp value.
4133 // LHS = TMP[LastIter];
4134 llvm::Value *OMPLast = CGF.Builder.CreateNSWSub(
4135 OMPScanNumIterations,
4136 llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false));
4137 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4138 const Expr *PrivateExpr = Privates[I];
4139 const Expr *OrigExpr = Shareds[I];
4140 const Expr *CopyArrayElem = CopyArrayElems[I];
4142 CGF,
4144 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
4145 RValue::get(OMPLast));
4146 LValue DestLVal = CGF.EmitLValue(OrigExpr);
4147 LValue SrcLVal = CGF.EmitLValue(CopyArrayElem);
4148 CGF.EmitOMPCopy(
4149 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(),
4150 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4151 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]);
4152 }
4153}
4154
4155/// Emits the code for the directive with inscan reductions.
4156/// The code is the following:
4157/// \code
4158/// #pragma omp ...
4159/// for (i: 0..<num_iters>) {
4160/// <input phase>;
4161/// buffer[i] = red;
4162/// }
4163/// #pragma omp master // in parallel region
4164/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
4165/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
4166/// buffer[i] op= buffer[i-pow(2,k)];
4167/// #pragma omp barrier // in parallel region
4168/// #pragma omp ...
4169/// for (0..<num_iters>) {
4170/// red = InclusiveScan ? buffer[i] : buffer[i-1];
4171/// <scan phase>;
4172/// }
4173/// \endcode
4175 CodeGenFunction &CGF, const OMPLoopDirective &S,
4176 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
4177 llvm::function_ref<void(CodeGenFunction &)> FirstGen,
4178 llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
4179 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
4180 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
4182 SmallVector<const Expr *, 4> ReductionOps;
4185 SmallVector<const Expr *, 4> CopyArrayElems;
4186 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4187 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
4188 "Only inscan reductions are expected.");
4189 Privates.append(C->privates().begin(), C->privates().end());
4190 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
4191 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4192 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4193 CopyArrayElems.append(C->copy_array_elems().begin(),
4194 C->copy_array_elems().end());
4195 }
4197 {
4198 // Emit loop with input phase:
4199 // #pragma omp ...
4200 // for (i: 0..<num_iters>) {
4201 // <input phase>;
4202 // buffer[i] = red;
4203 // }
4204 CGF.OMPFirstScanLoop = true;
4206 FirstGen(CGF);
4207 }
4208 // #pragma omp barrier // in parallel region
4209 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
4210 &ReductionOps,
4211 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
4212 Action.Enter(CGF);
4213 // Emit prefix reduction:
4214 // #pragma omp master // in parallel region
4215 // for (int k = 0; k <= ceil(log2(n)); ++k)
4216 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
4217 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
4218 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
4219 llvm::Function *F =
4220 CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
4221 llvm::Value *Arg =
4222 CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
4223 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
4224 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
4225 LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
4226 LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
4227 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
4228 OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
4229 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
4230 CGF.EmitBlock(LoopBB);
4231 auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
4232 // size pow2k = 1;
4233 auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
4234 Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
4235 Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
4236 // for (size i = n - 1; i >= 2 ^ k; --i)
4237 // tmp[i] op= tmp[i-pow2k];
4238 llvm::BasicBlock *InnerLoopBB =
4239 CGF.createBasicBlock("omp.inner.log.scan.body");
4240 llvm::BasicBlock *InnerExitBB =
4241 CGF.createBasicBlock("omp.inner.log.scan.exit");
4242 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
4243 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4244 CGF.EmitBlock(InnerLoopBB);
4245 auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
4246 IVal->addIncoming(NMin1, LoopBB);
4247 {
4248 CodeGenFunction::OMPPrivateScope PrivScope(CGF);
4249 auto *ILHS = LHSs.begin();
4250 auto *IRHS = RHSs.begin();
4251 for (const Expr *CopyArrayElem : CopyArrayElems) {
4252 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4253 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4254 Address LHSAddr = Address::invalid();
4255 {
4257 CGF,
4259 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
4260 RValue::get(IVal));
4261 LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress();
4262 }
4263 PrivScope.addPrivate(LHSVD, LHSAddr);
4264 Address RHSAddr = Address::invalid();
4265 {
4266 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
4268 CGF,
4270 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
4271 RValue::get(OffsetIVal));
4272 RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress();
4273 }
4274 PrivScope.addPrivate(RHSVD, RHSAddr);
4275 ++ILHS;
4276 ++IRHS;
4277 }
4278 PrivScope.Privatize();
4279 CGF.CGM.getOpenMPRuntime().emitReduction(
4280 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
4281 {/*WithNowait=*/true, /*SimpleReduction=*/true,
4282 /*IsPrivateVarReduction*/ {}, OMPD_unknown});
4283 }
4284 llvm::Value *NextIVal =
4285 CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
4286 IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
4287 CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
4288 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4289 CGF.EmitBlock(InnerExitBB);
4290 llvm::Value *Next =
4291 CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
4292 Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
4293 // pow2k <<= 1;
4294 llvm::Value *NextPow2K =
4295 CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
4296 Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
4297 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
4298 CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
4299 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
4300 CGF.EmitBlock(ExitBB);
4301 };
4303 if (isOpenMPParallelDirective(EKind)) {
4304 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
4306 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4307 /*ForceSimpleCall=*/true);
4308 } else {
4309 RegionCodeGenTy RCG(CodeGen);
4310 RCG(CGF);
4311 }
4312
4313 CGF.OMPFirstScanLoop = false;
4314 SecondGen(CGF);
4315}
4316
4318 const OMPLoopDirective &S,
4319 bool HasCancel) {
4320 bool HasLastprivates;
4322 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4323 [](const OMPReductionClause *C) {
4324 return C->getModifier() == OMPC_REDUCTION_inscan;
4325 })) {
4326 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4328 OMPLoopScope LoopScope(CGF, S);
4329 return CGF.EmitScalarExpr(S.getNumIterations());
4330 };
4331 const auto &&FirstGen = [&S, HasCancel, EKind](CodeGenFunction &CGF) {
4332 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
4333 (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
4336 // Emit an implicit barrier at the end.
4337 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
4338 OMPD_for);
4339 };
4340 const auto &&SecondGen = [&S, HasCancel, EKind,
4341 &HasLastprivates](CodeGenFunction &CGF) {
4342 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
4343 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
4346 };
4347 if (!isOpenMPParallelDirective(EKind))
4348 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
4349 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
4350 if (!isOpenMPParallelDirective(EKind))
4351 emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
4352 } else {
4353 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
4354 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
4357 }
4358 return HasLastprivates;
4359}
4360
4361// Pass OMPLoopDirective (instead of OMPForDirective) to make this check
4362// available for "loop bind(parallel)", which maps to "for".
4364 bool HasCancel) {
4365 if (HasCancel)
4366 return false;
4367 for (OMPClause *C : S.clauses()) {
4369 continue;
4370
4371 if (auto *SC = dyn_cast<OMPScheduleClause>(C)) {
4372 if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
4373 return false;
4374 if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
4375 return false;
4376 switch (SC->getScheduleKind()) {
4377 case OMPC_SCHEDULE_auto:
4378 case OMPC_SCHEDULE_dynamic:
4379 case OMPC_SCHEDULE_runtime:
4380 case OMPC_SCHEDULE_guided:
4381 case OMPC_SCHEDULE_static:
4382 continue;
4384 return false;
4385 }
4386 }
4387
4388 return false;
4389 }
4390
4391 return true;
4392}
4393
4394static llvm::omp::ScheduleKind
4396 switch (ScheduleClauseKind) {
4398 return llvm::omp::OMP_SCHEDULE_Default;
4399 case OMPC_SCHEDULE_auto:
4400 return llvm::omp::OMP_SCHEDULE_Auto;
4401 case OMPC_SCHEDULE_dynamic:
4402 return llvm::omp::OMP_SCHEDULE_Dynamic;
4403 case OMPC_SCHEDULE_guided:
4404 return llvm::omp::OMP_SCHEDULE_Guided;
4405 case OMPC_SCHEDULE_runtime:
4406 return llvm::omp::OMP_SCHEDULE_Runtime;
4407 case OMPC_SCHEDULE_static:
4408 return llvm::omp::OMP_SCHEDULE_Static;
4409 }
4410 llvm_unreachable("Unhandled schedule kind");
4411}
4412
4413// Pass OMPLoopDirective (instead of OMPForDirective) to make this function
4414// available for "loop bind(parallel)", which maps to "for".
4416 CodeGenModule &CGM, bool HasCancel) {
4417 bool HasLastprivates = false;
4418 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder &&
4419 isForSupportedByOpenMPIRBuilder(S, HasCancel);
4420 auto &&CodeGen = [&S, &CGM, HasCancel, &HasLastprivates,
4421 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
4422 // Use the OpenMPIRBuilder if enabled.
4423 if (UseOMPIRBuilder) {
4424 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
4425
4426 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default;
4427 llvm::Value *ChunkSize = nullptr;
4428 if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) {
4429 SchedKind =
4430 convertClauseKindToSchedKind(SchedClause->getScheduleKind());
4431 if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize())
4432 ChunkSize = CGF.EmitScalarExpr(ChunkSizeExpr);
4433 }
4434
4435 // Emit the associated statement and get its loop representation.
4436 const Stmt *Inner = S.getRawStmt();
4437 llvm::CanonicalLoopInfo *CLI =
4439
4440 llvm::OpenMPIRBuilder &OMPBuilder =
4442 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4443 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
4444 cantFail(OMPBuilder.applyWorkshareLoop(
4445 CGF.Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
4446 SchedKind, ChunkSize, /*HasSimdModifier=*/false,
4447 /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
4448 /*HasOrderedClause=*/false));
4449 return;
4450 }
4451
4452 HasLastprivates = emitWorksharingDirective(CGF, S, HasCancel);
4453 };
4454 {
4455 auto LPCRegion =
4457 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
4459 HasCancel);
4460 }
4461
4462 if (!UseOMPIRBuilder) {
4463 // Emit an implicit barrier at the end.
4464 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
4465 CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), OMPD_for);
4466 }
4467 // Check for outer lastprivate conditional update.
4469}
4470
4471void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
4472 return emitOMPForDirective(S, *this, CGM, S.hasCancel());
4473}
4474
4475void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
4476 bool HasLastprivates = false;
4477 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
4478 PrePostActionTy &) {
4479 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4480 };
4481 {
4482 auto LPCRegion =
4484 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4485 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
4486 }
4487
4488 // Emit an implicit barrier at the end.
4489 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
4490 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
4491 // Check for outer lastprivate conditional update.
4493}
4494
4496 const Twine &Name,
4497 llvm::Value *Init = nullptr) {
4498 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
4499 if (Init)
4500 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
4501 return LVal;
4502}
4503
4504void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
4505 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4506 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
4507 bool HasLastprivates = false;
4509 auto &&CodeGen = [&S, CapturedStmt, CS, EKind,
4510 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
4511 const ASTContext &C = CGF.getContext();
4512 QualType KmpInt32Ty =
4513 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4514 // Emit helper vars inits.
4515 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
4516 CGF.Builder.getInt32(0));
4517 llvm::ConstantInt *GlobalUBVal = CS != nullptr
4518 ? CGF.Builder.getInt32(CS->size() - 1)
4519 : CGF.Builder.getInt32(0);
4520 LValue UB =
4521 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
4522 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
4523 CGF.Builder.getInt32(1));
4524 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
4525 CGF.Builder.getInt32(0));
4526 // Loop counter.
4527 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
4528 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4529 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
4530 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4531 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
4532 // Generate condition for loop.
4533 BinaryOperator *Cond = BinaryOperator::Create(
4534 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary,
4535 S.getBeginLoc(), FPOptionsOverride());
4536 // Increment for loop counter.
4537 UnaryOperator *Inc = UnaryOperator::Create(
4538 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary,
4539 S.getBeginLoc(), true, FPOptionsOverride());
4540 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
4541 // Iterate through all sections and emit a switch construct:
4542 // switch (IV) {
4543 // case 0:
4544 // <SectionStmt[0]>;
4545 // break;
4546 // ...
4547 // case <NumSection> - 1:
4548 // <SectionStmt[<NumSection> - 1]>;
4549 // break;
4550 // }
4551 // .omp.sections.exit:
4552 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
4553 llvm::SwitchInst *SwitchStmt =
4554 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
4555 ExitBB, CS == nullptr ? 1 : CS->size());
4556 if (CS) {
4557 unsigned CaseNumber = 0;
4558 for (const Stmt *SubStmt : CS->children()) {
4559 auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
4560 CGF.EmitBlock(CaseBB);
4561 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
4562 CGF.EmitStmt(SubStmt);
4563 CGF.EmitBranch(ExitBB);
4564 ++CaseNumber;
4565 }
4566 } else {
4567 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
4568 CGF.EmitBlock(CaseBB);
4569 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
4570 CGF.EmitStmt(CapturedStmt);
4571 CGF.EmitBranch(ExitBB);
4572 }
4573 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
4574 };
4575
4576 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
4577 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
4578 // Emit implicit barrier to synchronize threads and avoid data races on
4579 // initialization of firstprivate variables and post-update of lastprivate
4580 // variables.
4581 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4582 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4583 /*ForceSimpleCall=*/true);
4584 }
4585 CGF.EmitOMPPrivateClause(S, LoopScope);
4586 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
4587 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4588 CGF.EmitOMPReductionClauseInit(S, LoopScope);
4589 (void)LoopScope.Privatize();
4591 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
4592
4593 // Emit static non-chunked loop.
4594 OpenMPScheduleTy ScheduleKind;
4595 ScheduleKind.Schedule = OMPC_SCHEDULE_static;
4596 CGOpenMPRuntime::StaticRTInput StaticInit(
4597 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
4598 LB.getAddress(), UB.getAddress(), ST.getAddress());
4599 CGF.CGM.getOpenMPRuntime().emitForStaticInit(CGF, S.getBeginLoc(), EKind,
4600 ScheduleKind, StaticInit);
4601 // UB = min(UB, GlobalUB);
4602 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
4603 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
4604 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
4605 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
4606 // IV = LB;
4607 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
4608 // while (idx <= UB) { BODY; ++idx; }
4609 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
4610 [](CodeGenFunction &) {});
4611 // Tell the runtime we are done.
4612 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
4613 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
4614 OMPD_sections);
4615 };
4616 CGF.OMPCancelStack.emitExit(CGF, EKind, CodeGen);
4617 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4618 // Emit post-update of the reduction variables if IsLastIter != 0.
4619 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
4620 return CGF.Builder.CreateIsNotNull(
4621 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4622 });
4623
4624 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4625 if (HasLastprivates)
4627 S, /*NoFinals=*/false,
4628 CGF.Builder.CreateIsNotNull(
4629 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
4630 };
4631
4632 bool HasCancel = false;
4633 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
4634 HasCancel = OSD->hasCancel();
4635 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
4636 HasCancel = OPSD->hasCancel();
4637 OMPCancelStackRAII CancelRegion(*this, EKind, HasCancel);
4638 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
4639 HasCancel);
4640 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
4641 // clause. Otherwise the barrier will be generated by the codegen for the
4642 // directive.
4643 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
4644 // Emit implicit barrier to synchronize threads and avoid data races on
4645 // initialization of firstprivate variables.
4646 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
4647 OMPD_unknown);
4648 }
4649}
4650
4651void CodeGenFunction::EmitOMPScopeDirective(const OMPScopeDirective &S) {
4652 {
4653 // Emit code for 'scope' region
4654 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4655 Action.Enter(CGF);
4656 OMPPrivateScope PrivateScope(CGF);
4657 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4658 CGF.EmitOMPPrivateClause(S, PrivateScope);
4659 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4660 (void)PrivateScope.Privatize();
4661 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4662 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4663 };
4664 auto LPCRegion =
4666 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4667 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_scope, CodeGen);
4668 }
4669 // Emit an implicit barrier at the end.
4670 if (!S.getSingleClause<OMPNowaitClause>()) {
4671 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_scope);
4672 }
4673 // Check for outer lastprivate conditional update.
4675}
4676
4677void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
4678 if (CGM.getLangOpts().OpenMPIRBuilder) {
4679 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4680 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4681 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
4682
4683 auto FiniCB = [](InsertPointTy IP) {
4684 // Don't FinalizeOMPRegion because this is done inside of OMPIRBuilder for
4685 // sections.
4686 return llvm::Error::success();
4687 };
4688
4689 const CapturedStmt *ICS = S.getInnermostCapturedStmt();
4690 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4691 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
4693 if (CS) {
4694 for (const Stmt *SubStmt : CS->children()) {
4695 auto SectionCB = [this, SubStmt](
4696 InsertPointTy AllocIP, InsertPointTy CodeGenIP,
4697 ArrayRef<llvm::BasicBlock *> DeallocBlocks) {
4698 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(*this, SubStmt, AllocIP,
4699 CodeGenIP, "section");
4700 return llvm::Error::success();
4701 };
4702 SectionCBVector.push_back(SectionCB);
4703 }
4704 } else {
4705 auto SectionCB =
4706 [this, CapturedStmt](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
4707 ArrayRef<llvm::BasicBlock *> DeallocBlocks) {
4709 *this, CapturedStmt, AllocIP, CodeGenIP, "section");
4710 return llvm::Error::success();
4711 };
4712 SectionCBVector.push_back(SectionCB);
4713 }
4714
4715 // Privatization callback that performs appropriate action for
4716 // shared/private/firstprivate/lastprivate/copyin/... variables.
4717 //
4718 // TODO: This defaults to shared right now.
4719 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
4720 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
4721 // The next line is appropriate only for variables (Val) with the
4722 // data-sharing attribute "shared".
4723 ReplVal = &Val;
4724
4725 return CodeGenIP;
4726 };
4727
4728 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
4729 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
4730 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4731 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
4732 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4733 cantFail(OMPBuilder.createSections(
4734 Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
4735 S.getSingleClause<OMPNowaitClause>()));
4736 Builder.restoreIP(AfterIP);
4737 return;
4738 }
4739 {
4740 auto LPCRegion =
4742 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4743 EmitSections(S);
4744 }
4745 // Emit an implicit barrier at the end.
4746 if (!S.getSingleClause<OMPNowaitClause>()) {
4747 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
4748 OMPD_sections);
4749 }
4750 // Check for outer lastprivate conditional update.
4752}
4753
4754void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
4755 if (CGM.getLangOpts().OpenMPIRBuilder) {
4756 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4757 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4758
4759 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
4760 auto FiniCB = [this](InsertPointTy IP) {
4762 return llvm::Error::success();
4763 };
4764
4765 auto BodyGenCB = [SectionRegionBodyStmt,
4766 this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
4767 ArrayRef<llvm::BasicBlock *> DeallocBlocks) {
4769 *this, SectionRegionBodyStmt, AllocIP, CodeGenIP, "section");
4770 return llvm::Error::success();
4771 };
4772
4773 LexicalScope Scope(*this, S.getSourceRange());
4774 EmitStopPoint(&S);
4775 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4776 cantFail(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
4777 Builder.restoreIP(AfterIP);
4778
4779 return;
4780 }
4781 LexicalScope Scope(*this, S.getSourceRange());
4782 EmitStopPoint(&S);
4783 EmitStmt(S.getAssociatedStmt());
4784}
4785
4786void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
4787 llvm::SmallVector<const Expr *, 8> CopyprivateVars;
4791 // Check if there are any 'copyprivate' clauses associated with this
4792 // 'single' construct.
4793 // Build a list of copyprivate variables along with helper expressions
4794 // (<source>, <destination>, <destination>=<source> expressions)
4795 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
4796 CopyprivateVars.append(C->varlist_begin(), C->varlist_end());
4797 DestExprs.append(C->destination_exprs().begin(),
4798 C->destination_exprs().end());
4799 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
4800 AssignmentOps.append(C->assignment_ops().begin(),
4801 C->assignment_ops().end());
4802 }
4803 // Emit code for 'single' region along with 'copyprivate' clauses
4804 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4805 Action.Enter(CGF);
4809 (void)SingleScope.Privatize();
4810 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4811 };
4812 {
4813 auto LPCRegion =
4815 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4816 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
4817 CopyprivateVars, DestExprs,
4818 SrcExprs, AssignmentOps);
4819 }
4820 // Emit an implicit barrier at the end (to avoid data race on firstprivate
4821 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4822 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
4823 CGM.getOpenMPRuntime().emitBarrierCall(
4824 *this, S.getBeginLoc(),
4825 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
4826 }
4827 // Check for outer lastprivate conditional update.
4829}
4830
4832 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4833 Action.Enter(CGF);
4834 CGF.EmitStmt(S.getRawStmt());
4835 };
4836 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
4837}
4838
4839void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
4840 if (CGM.getLangOpts().OpenMPIRBuilder) {
4841 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4842 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4843
4844 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
4845
4846 auto FiniCB = [this](InsertPointTy IP) {
4848 return llvm::Error::success();
4849 };
4850
4851 auto BodyGenCB = [MasterRegionBodyStmt,
4852 this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
4853 ArrayRef<llvm::BasicBlock *> DeallocBlocks) {
4855 *this, MasterRegionBodyStmt, AllocIP, CodeGenIP, "master");
4856 return llvm::Error::success();
4857 };
4858
4859 LexicalScope Scope(*this, S.getSourceRange());
4860 EmitStopPoint(&S);
4861 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4862 cantFail(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
4863 Builder.restoreIP(AfterIP);
4864
4865 return;
4866 }
4867 LexicalScope Scope(*this, S.getSourceRange());
4868 EmitStopPoint(&S);
4869 emitMaster(*this, S);
4870}
4871
4873 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4874 Action.Enter(CGF);
4875 CGF.EmitStmt(S.getRawStmt());
4876 };
4877 Expr *Filter = nullptr;
4878 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4879 Filter = FilterClause->getThreadID();
4880 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(),
4881 Filter);
4882}
4883
4885 if (CGM.getLangOpts().OpenMPIRBuilder) {
4886 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4887 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4888
4889 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4890 const Expr *Filter = nullptr;
4891 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4892 Filter = FilterClause->getThreadID();
4893 llvm::Value *FilterVal = Filter
4894 ? EmitScalarExpr(Filter, CGM.Int32Ty)
4895 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
4896
4897 auto FiniCB = [this](InsertPointTy IP) {
4899 return llvm::Error::success();
4900 };
4901
4902 auto BodyGenCB = [MaskedRegionBodyStmt,
4903 this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
4904 ArrayRef<llvm::BasicBlock *> DeallocBlocks) {
4906 *this, MaskedRegionBodyStmt, AllocIP, CodeGenIP, "masked");
4907 return llvm::Error::success();
4908 };
4909
4910 LexicalScope Scope(*this, S.getSourceRange());
4911 EmitStopPoint(&S);
4912 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
4913 OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
4914 Builder.restoreIP(AfterIP);
4915
4916 return;
4917 }
4918 LexicalScope Scope(*this, S.getSourceRange());
4919 EmitStopPoint(&S);
4920 emitMasked(*this, S);
4921}
4922
4923void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4924 if (CGM.getLangOpts().OpenMPIRBuilder) {
4925 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4926 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4927
4928 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4929 const Expr *Hint = nullptr;
4930 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4931 Hint = HintClause->getHint();
4932
4933 // TODO: This is slightly different from what's currently being done in
4934 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4935 // about typing is final.
4936 llvm::Value *HintInst = nullptr;
4937 if (Hint)
4938 HintInst =
4939 Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
4940
4941 auto FiniCB = [this](InsertPointTy IP) {
4943 return llvm::Error::success();
4944 };
4945
4946 auto BodyGenCB = [CriticalRegionBodyStmt,
4947 this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
4948 ArrayRef<llvm::BasicBlock *> DeallocBlocks) {
4950 *this, CriticalRegionBodyStmt, AllocIP, CodeGenIP, "critical");
4951 return llvm::Error::success();
4952 };
4953
4954 LexicalScope Scope(*this, S.getSourceRange());
4955 EmitStopPoint(&S);
4956 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4957 cantFail(OMPBuilder.createCritical(Builder, BodyGenCB, FiniCB,
4958 S.getDirectiveName().getAsString(),
4959 HintInst));
4960 Builder.restoreIP(AfterIP);
4961
4962 return;
4963 }
4964
4965 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4966 Action.Enter(CGF);
4967 CGF.EmitStmt(S.getAssociatedStmt());
4968 };
4969 const Expr *Hint = nullptr;
4970 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4971 Hint = HintClause->getHint();
4972 LexicalScope Scope(*this, S.getSourceRange());
4973 EmitStopPoint(&S);
4974 CGM.getOpenMPRuntime().emitCriticalRegion(*this,
4975 S.getDirectiveName().getAsString(),
4976 CodeGen, S.getBeginLoc(), Hint);
4977}
4978
4980 const OMPParallelForDirective &S) {
4981 // Emit directive as a combined directive that consists of two implicit
4982 // directives: 'parallel' with 'for' directive.
4983 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4984 Action.Enter(CGF);
4985 emitOMPCopyinClause(CGF, S);
4986 (void)emitWorksharingDirective(CGF, S, S.hasCancel());
4987 };
4988 {
4989 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4992 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4993 OMPLoopScope LoopScope(CGF, S);
4994 return CGF.EmitScalarExpr(S.getNumIterations());
4995 };
4996 bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4997 [](const OMPReductionClause *C) {
4998 return C->getModifier() == OMPC_REDUCTION_inscan;
4999 });
5000 if (IsInscan)
5001 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
5002 auto LPCRegion =
5004 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
5006 if (IsInscan)
5007 emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
5008 }
5009 // Check for outer lastprivate conditional update.
5011}
5012
5014 const OMPParallelForSimdDirective &S) {
5015 // Emit directive as a combined directive that consists of two implicit
5016 // directives: 'parallel' with 'for' directive.
5017 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5018 Action.Enter(CGF);
5019 emitOMPCopyinClause(CGF, S);
5020 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
5021 };
5022 {
5023 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
5026 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
5027 OMPLoopScope LoopScope(CGF, S);
5028 return CGF.EmitScalarExpr(S.getNumIterations());
5029 };
5030 bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
5031 [](const OMPReductionClause *C) {
5032 return C->getModifier() == OMPC_REDUCTION_inscan;
5033 });
5034 if (IsInscan)
5035 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
5036 auto LPCRegion =
5038 emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
5040 if (IsInscan)
5041 emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
5042 }
5043 // Check for outer lastprivate conditional update.
5045}
5046
5048 const OMPParallelMasterDirective &S) {
5049 // Emit directive as a combined directive that consists of two implicit
5050 // directives: 'parallel' with 'master' directive.
5051 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5052 Action.Enter(CGF);
5053 OMPPrivateScope PrivateScope(CGF);
5054 emitOMPCopyinClause(CGF, S);
5055 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
5056 CGF.EmitOMPPrivateClause(S, PrivateScope);
5057 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5058 (void)PrivateScope.Privatize();
5059 emitMaster(CGF, S);
5060 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
5061 };
5062 {
5063 auto LPCRegion =
5065 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
5068 [](CodeGenFunction &) { return nullptr; });
5069 }
5070 // Check for outer lastprivate conditional update.
5072}
5073
5075 const OMPParallelMaskedDirective &S) {
5076 // Emit directive as a combined directive that consists of two implicit
5077 // directives: 'parallel' with 'masked' directive.
5078 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5079 Action.Enter(CGF);
5080 OMPPrivateScope PrivateScope(CGF);
5081 emitOMPCopyinClause(CGF, S);
5082 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
5083 CGF.EmitOMPPrivateClause(S, PrivateScope);
5084 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
5085 (void)PrivateScope.Privatize();
5086 emitMasked(CGF, S);
5087 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
5088 };
5089 {
5090 auto LPCRegion =
5092 emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen,
5095 [](CodeGenFunction &) { return nullptr; });
5096 }
5097 // Check for outer lastprivate conditional update.
5099}
5100
5102 const OMPParallelSectionsDirective &S) {
5103 // Emit directive as a combined directive that consists of two implicit
5104 // directives: 'parallel' with 'sections' directive.
5105 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5106 Action.Enter(CGF);
5107 emitOMPCopyinClause(CGF, S);
5108 CGF.EmitSections(S);
5109 };
5110 {
5111 auto LPCRegion =
5113 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
5115 }
5116 // Check for outer lastprivate conditional update.
5118}
5119
5120namespace {
5121/// Get the list of variables declared in the context of the untied tasks.
5122class CheckVarsEscapingUntiedTaskDeclContext final
5123 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
5125
5126public:
5127 explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
5128 ~CheckVarsEscapingUntiedTaskDeclContext() = default;
5129 void VisitDeclStmt(const DeclStmt *S) {
5130 if (!S)
5131 return;
5132 // Need to privatize only local vars, static locals can be processed as is.
5133 for (const Decl *D : S->decls()) {
5134 if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
5135 if (VD->hasLocalStorage())
5136 PrivateDecls.push_back(VD);
5137 }
5138 }
5139 void VisitOMPExecutableDirective(const OMPExecutableDirective *) {}
5140 void VisitCapturedStmt(const CapturedStmt *) {}
5141 void VisitLambdaExpr(const LambdaExpr *) {}
5142 void VisitBlockExpr(const BlockExpr *) {}
5143 void VisitStmt(const Stmt *S) {
5144 if (!S)
5145 return;
5146 for (const Stmt *Child : S->children())
5147 if (Child)
5148 Visit(Child);
5149 }
5150
5151 /// Swaps list of vars with the provided one.
5152 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
5153};
5154} // anonymous namespace
5155
5158
5159 // First look for 'omp_all_memory' and add this first.
5160 bool OmpAllMemory = false;
5161 if (llvm::any_of(
5162 S.getClausesOfKind<OMPDependClause>(), [](const OMPDependClause *C) {
5163 return C->getDependencyKind() == OMPC_DEPEND_outallmemory ||
5164 C->getDependencyKind() == OMPC_DEPEND_inoutallmemory;
5165 })) {
5166 OmpAllMemory = true;
5167 // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
5168 // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
5169 // simplify.
5171 Data.Dependences.emplace_back(OMPC_DEPEND_outallmemory,
5172 /*IteratorExpr=*/nullptr);
5173 // Add a nullptr Expr to simplify the codegen in emitDependData.
5174 DD.DepExprs.push_back(nullptr);
5175 }
5176 // Add remaining dependences skipping any 'out' or 'inout' if they are
5177 // overridden by 'omp_all_memory'.
5178 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
5179 OpenMPDependClauseKind Kind = C->getDependencyKind();
5180 if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory)
5181 continue;
5182 if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout))
5183 continue;
5185 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
5186 DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
5187 }
5188}
5189
5191 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
5192 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
5194 // Emit outlined function for task construct.
5195 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
5196 auto I = CS->getCapturedDecl()->param_begin();
5197 auto PartId = std::next(I);
5198 auto TaskT = std::next(I, 4);
5199 // Check if the task is final
5200 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
5201 // If the condition constant folds and can be elided, try to avoid emitting
5202 // the condition and the dead arm of the if/else.
5203 const Expr *Cond = Clause->getCondition();
5204 bool CondConstant;
5205 if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
5206 Data.Final.setInt(CondConstant);
5207 else
5208 Data.Final.setPointer(EvaluateExprAsBool(Cond));
5209 } else {
5210 // By default the task is not final.
5211 Data.Final.setInt(/*IntVal=*/false);
5212 }
5213 // Check if the task has 'priority' clause.
5214 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
5215 const Expr *Prio = Clause->getPriority();
5216 Data.Priority.setInt(/*IntVal=*/true);
5217 Data.Priority.setPointer(EmitScalarConversion(
5218 EmitScalarExpr(Prio), Prio->getType(),
5219 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
5220 Prio->getExprLoc()));
5221 }
5222 // The first function argument for tasks is a thread id, the second one is a
5223 // part id (0 for tied tasks, >=0 for untied task).
5224 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
5225 // Get list of private variables.
5226 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
5227 auto IRef = C->varlist_begin();
5228 for (const Expr *IInit : C->private_copies()) {
5229 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
5230 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
5231 Data.PrivateVars.push_back(*IRef);
5232 Data.PrivateCopies.push_back(IInit);
5233 }
5234 ++IRef;
5235 }
5236 }
5237 EmittedAsPrivate.clear();
5238 // Get list of firstprivate variables.
5239 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
5240 auto IRef = C->varlist_begin();
5241 auto IElemInitRef = C->inits().begin();
5242 for (const Expr *IInit : C->private_copies()) {
5243 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
5244 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
5245 Data.FirstprivateVars.push_back(*IRef);
5246 Data.FirstprivateCopies.push_back(IInit);
5247 Data.FirstprivateInits.push_back(*IElemInitRef);
5248 }
5249 ++IRef;
5250 ++IElemInitRef;
5251 }
5252 }
5253 // Get list of lastprivate variables (for taskloops).
5254 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
5255 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
5256 auto IRef = C->varlist_begin();
5257 auto ID = C->destination_exprs().begin();
5258 for (const Expr *IInit : C->private_copies()) {
5259 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
5260 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
5261 Data.LastprivateVars.push_back(*IRef);
5262 Data.LastprivateCopies.push_back(IInit);
5263 }
5264 LastprivateDstsOrigs.insert(
5265 std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
5266 cast<DeclRefExpr>(*IRef)));
5267 ++IRef;
5268 ++ID;
5269 }
5270 }
5273 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
5274 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
5275 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
5276 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
5277 Data.ReductionOps.append(C->reduction_ops().begin(),
5278 C->reduction_ops().end());
5279 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5280 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5281 }
5282 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
5283 *this, S.getBeginLoc(), LHSs, RHSs, Data);
5284 // Build list of dependences.
5286 // Get list of local vars for untied tasks.
5287 if (!Data.Tied) {
5288 CheckVarsEscapingUntiedTaskDeclContext Checker;
5289 Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
5290 Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
5291 Checker.getPrivateDecls().end());
5292 }
5293 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
5294 CapturedRegion](CodeGenFunction &CGF,
5295 PrePostActionTy &Action) {
5296 llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
5297 std::pair<Address, Address>>
5298 UntiedLocalVars;
5299 // Set proper addresses for generated private copies.
5301 // Generate debug info for variables present in shared clause.
5302 if (auto *DI = CGF.getDebugInfo()) {
5303 llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields =
5304 CGF.CapturedStmtInfo->getCaptureFields();
5305 llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue();
5306 if (CaptureFields.size() && ContextValue) {
5307 unsigned CharWidth = CGF.getContext().getCharWidth();
5308 // The shared variables are packed together as members of structure.
5309 // So the address of each shared variable can be computed by adding
5310 // offset of it (within record) to the base address of record. For each
5311 // shared variable, debug intrinsic llvm.dbg.declare is generated with
5312 // appropriate expressions (DIExpression).
5313 // Ex:
5314 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i
5315 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
5316 // metadata !svar1,
5317 // metadata !DIExpression(DW_OP_deref))
5318 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
5319 // metadata !svar2,
5320 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
5321 for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) {
5322 const VarDecl *SharedVar = It->first;
5323 RecordDecl *CaptureRecord = It->second->getParent();
5324 const ASTRecordLayout &Layout =
5325 CGF.getContext().getASTRecordLayout(CaptureRecord);
5326 unsigned Offset =
5327 Layout.getFieldOffset(It->second->getFieldIndex()) / CharWidth;
5328 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
5329 (void)DI->EmitDeclareOfAutoVariable(SharedVar, ContextValue,
5330 CGF.Builder, false);
5331 // Get the call dbg.declare instruction we just created and update
5332 // its DIExpression to add offset to base address.
5333 auto UpdateExpr = [](llvm::LLVMContext &Ctx, auto *Declare,
5334 unsigned Offset) {
5336 // Add offset to the base address if non zero.
5337 if (Offset) {
5338 Ops.push_back(llvm::dwarf::DW_OP_plus_uconst);
5339 Ops.push_back(Offset);
5340 }
5341 Ops.push_back(llvm::dwarf::DW_OP_deref);
5342 Declare->setExpression(llvm::DIExpression::get(Ctx, Ops));
5343 };
5344 llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back();
5345 if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&Last))
5346 UpdateExpr(DDI->getContext(), DDI, Offset);
5347 // If we're emitting using the new debug info format into a block
5348 // without a terminator, the record will be "trailing".
5349 assert(!Last.isTerminator() && "unexpected terminator");
5350 if (auto *Marker =
5351 CGF.Builder.GetInsertBlock()->getTrailingDbgRecords()) {
5352 for (llvm::DbgVariableRecord &DVR : llvm::reverse(
5353 llvm::filterDbgVars(Marker->getDbgRecordRange()))) {
5354 UpdateExpr(Last.getContext(), &DVR, Offset);
5355 break;
5356 }
5357 }
5358 }
5359 }
5360 }
5362 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
5363 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
5364 enum { PrivatesParam = 2, CopyFnParam = 3 };
5365 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5366 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
5367 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
5368 CS->getCapturedDecl()->getParam(PrivatesParam)));
5369 // Map privates.
5373 CallArgs.push_back(PrivatesPtr);
5374 ParamTypes.push_back(PrivatesPtr->getType());
5375 for (const Expr *E : Data.PrivateVars) {
5376 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5377 RawAddress PrivatePtr = CGF.CreateMemTempWithoutCast(
5378 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
5379 PrivatePtrs.emplace_back(VD, PrivatePtr);
5380 CallArgs.push_back(PrivatePtr.getPointer());
5381 ParamTypes.push_back(PrivatePtr.getType());
5382 }
5383 for (const Expr *E : Data.FirstprivateVars) {
5384 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5385 RawAddress PrivatePtr = CGF.CreateMemTempWithoutCast(
5386 CGF.getContext().getPointerType(E->getType()),
5387 ".firstpriv.ptr.addr");
5388 PrivatePtrs.emplace_back(VD, PrivatePtr);
5389 FirstprivatePtrs.emplace_back(VD, PrivatePtr);
5390 CallArgs.push_back(PrivatePtr.getPointer());
5391 ParamTypes.push_back(PrivatePtr.getType());
5392 }
5393 for (const Expr *E : Data.LastprivateVars) {
5394 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5395 RawAddress PrivatePtr = CGF.CreateMemTempWithoutCast(
5396 CGF.getContext().getPointerType(E->getType()),
5397 ".lastpriv.ptr.addr");
5398 PrivatePtrs.emplace_back(VD, PrivatePtr);
5399 CallArgs.push_back(PrivatePtr.getPointer());
5400 ParamTypes.push_back(PrivatePtr.getType());
5401 }
5402 for (const VarDecl *VD : Data.PrivateLocals) {
5404 if (VD->getType()->isLValueReferenceType())
5405 Ty = CGF.getContext().getPointerType(Ty);
5406 if (isAllocatableDecl(VD))
5407 Ty = CGF.getContext().getPointerType(Ty);
5408 RawAddress PrivatePtr = CGF.CreateMemTempWithoutCast(
5409 CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
5410 auto Result = UntiedLocalVars.insert(
5411 std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid())));
5412 // If key exists update in place.
5413 if (Result.second == false)
5414 *Result.first = std::make_pair(
5415 VD, std::make_pair(PrivatePtr, Address::invalid()));
5416 CallArgs.push_back(PrivatePtr.getPointer());
5417 ParamTypes.push_back(PrivatePtr.getType());
5418 }
5419 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
5420 ParamTypes, /*isVarArg=*/false);
5421 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5422 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
5423 for (const auto &Pair : LastprivateDstsOrigs) {
5424 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
5425 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
5426 /*RefersToEnclosingVariableOrCapture=*/
5427 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
5428 Pair.second->getType(), VK_LValue,
5429 Pair.second->getExprLoc());
5430 Scope.addPrivate(Pair.first, CGF.EmitLValue(&DRE).getAddress());
5431 }
5432 for (const auto &Pair : PrivatePtrs) {
5433 Address Replacement = Address(
5434 CGF.Builder.CreateLoad(Pair.second),
5435 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
5436 CGF.getContext().getDeclAlign(Pair.first));
5437 Scope.addPrivate(Pair.first, Replacement);
5438 if (auto *DI = CGF.getDebugInfo())
5439 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
5440 (void)DI->EmitDeclareOfAutoVariable(
5441 Pair.first, Pair.second.getBasePointer(), CGF.Builder,
5442 /*UsePointerValue*/ true);
5443 }
5444 // Adjust mapping for internal locals by mapping actual memory instead of
5445 // a pointer to this memory.
5446 for (auto &Pair : UntiedLocalVars) {
5447 QualType VDType = Pair.first->getType().getNonReferenceType();
5448 if (Pair.first->getType()->isLValueReferenceType())
5449 VDType = CGF.getContext().getPointerType(VDType);
5450 if (isAllocatableDecl(Pair.first)) {
5451 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
5452 Address Replacement(
5453 Ptr,
5454 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(VDType)),
5455 CGF.getPointerAlign());
5456 Pair.second.first = Replacement;
5457 Ptr = CGF.Builder.CreateLoad(Replacement);
5458 Replacement = Address(Ptr, CGF.ConvertTypeForMem(VDType),
5459 CGF.getContext().getDeclAlign(Pair.first));
5460 Pair.second.second = Replacement;
5461 } else {
5462 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
5463 Address Replacement(Ptr, CGF.ConvertTypeForMem(VDType),
5464 CGF.getContext().getDeclAlign(Pair.first));
5465 Pair.second.first = Replacement;
5466 }
5467 }
5468 }
5469 if (Data.Reductions) {
5470 OMPPrivateScope FirstprivateScope(CGF);
5471 for (const auto &Pair : FirstprivatePtrs) {
5472 Address Replacement(
5473 CGF.Builder.CreateLoad(Pair.second),
5474 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
5475 CGF.getContext().getDeclAlign(Pair.first));
5476 FirstprivateScope.addPrivate(Pair.first, Replacement);
5477 }
5478 (void)FirstprivateScope.Privatize();
5479 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5480 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5481 Data.ReductionCopies, Data.ReductionOps);
5482 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5483 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
5484 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5485 RedCG.emitSharedOrigLValue(CGF, Cnt);
5486 RedCG.emitAggregateType(CGF, Cnt);
5487 // FIXME: This must removed once the runtime library is fixed.
5488 // Emit required threadprivate variables for
5489 // initializer/combiner/finalizer.
5490 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
5491 RedCG, Cnt);
5492 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5493 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
5494 Replacement = Address(
5495 CGF.EmitScalarConversion(Replacement.emitRawPointer(CGF),
5496 CGF.getContext().VoidPtrTy,
5497 CGF.getContext().getPointerType(
5498 Data.ReductionCopies[Cnt]->getType()),
5499 Data.ReductionCopies[Cnt]->getExprLoc()),
5500 CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()),
5501 Replacement.getAlignment());
5502 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
5503 Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
5504 }
5505 }
5506 // Privatize all private variables except for in_reduction items.
5507 (void)Scope.Privatize();
5511 SmallVector<const Expr *, 4> TaskgroupDescriptors;
5512 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5513 auto IPriv = C->privates().begin();
5514 auto IRed = C->reduction_ops().begin();
5515 auto ITD = C->taskgroup_descriptors().begin();
5516 for (const Expr *Ref : C->varlist()) {
5517 InRedVars.emplace_back(Ref);
5518 InRedPrivs.emplace_back(*IPriv);
5519 InRedOps.emplace_back(*IRed);
5520 TaskgroupDescriptors.emplace_back(*ITD);
5521 std::advance(IPriv, 1);
5522 std::advance(IRed, 1);
5523 std::advance(ITD, 1);
5524 }
5525 }
5526 // Privatize in_reduction items here, because taskgroup descriptors must be
5527 // privatized earlier.
5528 OMPPrivateScope InRedScope(CGF);
5529 if (!InRedVars.empty()) {
5530 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5531 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5532 RedCG.emitSharedOrigLValue(CGF, Cnt);
5533 RedCG.emitAggregateType(CGF, Cnt);
5534 // The taskgroup descriptor variable is always implicit firstprivate and
5535 // privatized already during processing of the firstprivates.
5536 // FIXME: This must removed once the runtime library is fixed.
5537 // Emit required threadprivate variables for
5538 // initializer/combiner/finalizer.
5539 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
5540 RedCG, Cnt);
5541 llvm::Value *ReductionsPtr;
5542 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5543 ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
5544 TRExpr->getExprLoc());
5545 } else {
5546 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5547 }
5548 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5549 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
5550 Replacement = Address(
5551 CGF.EmitScalarConversion(
5552 Replacement.emitRawPointer(CGF), CGF.getContext().VoidPtrTy,
5553 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
5554 InRedPrivs[Cnt]->getExprLoc()),
5555 CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()),
5556 Replacement.getAlignment());
5557 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
5558 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
5559 }
5560 }
5561 (void)InRedScope.Privatize();
5562
5564 UntiedLocalVars);
5565 Action.Enter(CGF);
5566 BodyGen(CGF);
5567 };
5569 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5570 S, *I, *PartId, *TaskT, EKind, CodeGen, Data.Tied, Data.NumberOfParts);
5571 OMPLexicalScope Scope(*this, S, std::nullopt,
5572 !isOpenMPParallelDirective(EKind) &&
5573 !isOpenMPSimdDirective(EKind));
5574 TaskGen(*this, OutlinedFn, Data);
5575}
5576
5577static ImplicitParamDecl *
5579 QualType Ty, CapturedDecl *CD,
5580 SourceLocation Loc) {
5581 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
5583 auto *OrigRef = DeclRefExpr::Create(
5585 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
5586 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
5588 auto *PrivateRef = DeclRefExpr::Create(
5589 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
5590 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
5591 QualType ElemType = C.getBaseElementType(Ty);
5592 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
5594 auto *InitRef = DeclRefExpr::Create(
5596 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
5597 PrivateVD->setInitStyle(VarDecl::CInit);
5598 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
5599 InitRef, /*BasePath=*/nullptr,
5601 Data.FirstprivateVars.emplace_back(OrigRef);
5602 Data.FirstprivateCopies.emplace_back(PrivateRef);
5603 Data.FirstprivateInits.emplace_back(InitRef);
5604 return OrigVD;
5605}
5606
5608 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
5609 OMPTargetDataInfo &InputInfo) {
5610 // Emit outlined function for task construct.
5611 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
5612 Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
5613 CanQualType SharedsTy =
5615 auto I = CS->getCapturedDecl()->param_begin();
5616 auto PartId = std::next(I);
5617 auto TaskT = std::next(I, 4);
5619 // The task is not final.
5620 Data.Final.setInt(/*IntVal=*/false);
5621 // Get list of firstprivate variables.
5622 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
5623 auto IRef = C->varlist_begin();
5624 auto IElemInitRef = C->inits().begin();
5625 for (auto *IInit : C->private_copies()) {
5626 Data.FirstprivateVars.push_back(*IRef);
5627 Data.FirstprivateCopies.push_back(IInit);
5628 Data.FirstprivateInits.push_back(*IElemInitRef);
5629 ++IRef;
5630 ++IElemInitRef;
5631 }
5632 }
5635 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5636 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
5637 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
5638 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
5639 Data.ReductionOps.append(C->reduction_ops().begin(),
5640 C->reduction_ops().end());
5641 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5642 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5643 }
5644 OMPPrivateScope TargetScope(*this);
5645 VarDecl *BPVD = nullptr;
5646 VarDecl *PVD = nullptr;
5647 VarDecl *SVD = nullptr;
5648 VarDecl *MVD = nullptr;
5649 if (InputInfo.NumberOfTargetItems > 0) {
5650 auto *CD = CapturedDecl::Create(
5651 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
5652 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
5653 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
5654 getContext().VoidPtrTy, ArrSize, nullptr, ArraySizeModifier::Normal,
5655 /*IndexTypeQuals=*/0);
5657 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5659 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5661 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
5662 ArrSize, nullptr, ArraySizeModifier::Normal,
5663 /*IndexTypeQuals=*/0);
5664 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
5665 S.getBeginLoc());
5666 TargetScope.addPrivate(BPVD, InputInfo.BasePointersArray);
5667 TargetScope.addPrivate(PVD, InputInfo.PointersArray);
5668 TargetScope.addPrivate(SVD, InputInfo.SizesArray);
5669 // If there is no user-defined mapper, the mapper array will be nullptr. In
5670 // this case, we don't need to privatize it.
5671 if (!isa_and_nonnull<llvm::ConstantPointerNull>(
5672 InputInfo.MappersArray.emitRawPointer(*this))) {
5674 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5675 TargetScope.addPrivate(MVD, InputInfo.MappersArray);
5676 }
5677 }
5678 (void)TargetScope.Privatize();
5681 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, EKind,
5682 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
5683 // Set proper addresses for generated private copies.
5685 if (!Data.FirstprivateVars.empty()) {
5686 enum { PrivatesParam = 2, CopyFnParam = 3 };
5687 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5688 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
5689 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
5690 CS->getCapturedDecl()->getParam(PrivatesParam)));
5691 // Map privates.
5695 CallArgs.push_back(PrivatesPtr);
5696 ParamTypes.push_back(PrivatesPtr->getType());
5697 for (const Expr *E : Data.FirstprivateVars) {
5698 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5699 RawAddress PrivatePtr = CGF.CreateMemTempWithoutCast(
5700 CGF.getContext().getPointerType(E->getType()),
5701 ".firstpriv.ptr.addr");
5702 PrivatePtrs.emplace_back(VD, PrivatePtr);
5703 CallArgs.push_back(PrivatePtr.getPointer());
5704 ParamTypes.push_back(PrivatePtr.getType());
5705 }
5706 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
5707 ParamTypes, /*isVarArg=*/false);
5708 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5709 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
5710 for (const auto &Pair : PrivatePtrs) {
5711 Address Replacement(
5712 CGF.Builder.CreateLoad(Pair.second),
5713 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
5714 CGF.getContext().getDeclAlign(Pair.first));
5715 Scope.addPrivate(Pair.first, Replacement);
5716 }
5717 }
5718 CGF.processInReduction(S, Data, CGF, CS, Scope);
5719 if (InputInfo.NumberOfTargetItems > 0) {
5720 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
5721 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
5722 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
5723 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
5724 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
5725 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
5726 // If MVD is nullptr, the mapper array is not privatized
5727 if (MVD)
5728 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
5729 CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
5730 }
5731
5732 Action.Enter(CGF);
5733 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
5734 auto *TL = S.getSingleClause<OMPThreadLimitClause>();
5735 if (CGF.CGM.getLangOpts().OpenMP >= 51 &&
5736 needsTaskBasedThreadLimit(EKind) && TL) {
5737 // Emit __kmpc_set_thread_limit() to set the thread_limit for the task
5738 // enclosing this target region. This will indirectly set the thread_limit
5739 // for every applicable construct within target region.
5740 CGF.CGM.getOpenMPRuntime().emitThreadLimitClause(
5741 CGF, TL->getThreadLimit().front(), S.getBeginLoc());
5742 }
5743 BodyGen(CGF);
5744 };
5745 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5746 S, *I, *PartId, *TaskT, EKind, CodeGen, /*Tied=*/true,
5747 Data.NumberOfParts);
5748 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
5749 IntegerLiteral IfCond(getContext(), TrueOrFalse,
5750 getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
5751 SourceLocation());
5752 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
5753 SharedsTy, CapturedStruct, &IfCond, Data);
5754}
5755
5758 CodeGenFunction &CGF,
5759 const CapturedStmt *CS,
5762 if (Data.Reductions) {
5763 OpenMPDirectiveKind CapturedRegion = EKind;
5764 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5765 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5766 Data.ReductionCopies, Data.ReductionOps);
5767 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5769 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5770 RedCG.emitSharedOrigLValue(CGF, Cnt);
5771 RedCG.emitAggregateType(CGF, Cnt);
5772 // FIXME: This must removed once the runtime library is fixed.
5773 // Emit required threadprivate variables for
5774 // initializer/combiner/finalizer.
5775 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
5776 RedCG, Cnt);
5778 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
5779 Replacement = Address(
5780 CGF.EmitScalarConversion(Replacement.emitRawPointer(CGF),
5781 CGF.getContext().VoidPtrTy,
5783 Data.ReductionCopies[Cnt]->getType()),
5784 Data.ReductionCopies[Cnt]->getExprLoc()),
5785 CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()),
5786 Replacement.getAlignment());
5787 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
5788 Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
5789 }
5790 }
5791 (void)Scope.Privatize();
5795 SmallVector<const Expr *, 4> TaskgroupDescriptors;
5796 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5797 auto IPriv = C->privates().begin();
5798 auto IRed = C->reduction_ops().begin();
5799 auto ITD = C->taskgroup_descriptors().begin();
5800 for (const Expr *Ref : C->varlist()) {
5801 InRedVars.emplace_back(Ref);
5802 InRedPrivs.emplace_back(*IPriv);
5803 InRedOps.emplace_back(*IRed);
5804 TaskgroupDescriptors.emplace_back(*ITD);
5805 std::advance(IPriv, 1);
5806 std::advance(IRed, 1);
5807 std::advance(ITD, 1);
5808 }
5809 }
5810 OMPPrivateScope InRedScope(CGF);
5811 if (!InRedVars.empty()) {
5812 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5813 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5814 RedCG.emitSharedOrigLValue(CGF, Cnt);
5815 RedCG.emitAggregateType(CGF, Cnt);
5816 // FIXME: This must removed once the runtime library is fixed.
5817 // Emit required threadprivate variables for
5818 // initializer/combiner/finalizer.
5819 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
5820 RedCG, Cnt);
5821 llvm::Value *ReductionsPtr;
5822 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5823 ReductionsPtr =
5824 CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), TRExpr->getExprLoc());
5825 } else {
5826 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5827 }
5829 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
5830 Replacement = Address(
5832 Replacement.emitRawPointer(CGF), CGF.getContext().VoidPtrTy,
5833 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
5834 InRedPrivs[Cnt]->getExprLoc()),
5835 CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()),
5836 Replacement.getAlignment());
5837 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
5838 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
5839 }
5840 }
5841 (void)InRedScope.Privatize();
5842}
5843
5844void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
5845 // Emit outlined function for task construct.
5846 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
5847 Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
5848 CanQualType SharedsTy =
5850 const Expr *IfCond = nullptr;
5851 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5852 if (C->getNameModifier() == OMPD_unknown ||
5853 C->getNameModifier() == OMPD_task) {
5854 IfCond = C->getCondition();
5855 break;
5856 }
5857 }
5858
5860 // Check if we should emit tied or untied task.
5861 Data.Tied = !S.getSingleClause<OMPUntiedClause>();
5862 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
5863 CGF.EmitStmt(CS->getCapturedStmt());
5864 };
5865 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
5866 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
5867 const OMPTaskDataTy &Data) {
5868 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
5869 SharedsTy, CapturedStruct, IfCond,
5870 Data);
5871 };
5872 auto LPCRegion =
5874 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
5875}
5876
5878 const OMPTaskyieldDirective &S) {
5879 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
5880}
5881
5883 const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>();
5884 Expr *ME = MC ? MC->getMessageString() : nullptr;
5885 const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>();
5886 bool IsFatal = false;
5887 if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal)
5888 IsFatal = true;
5889 CGM.getOpenMPRuntime().emitErrorCall(*this, S.getBeginLoc(), ME, IsFatal);
5890}
5891
5892void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
5893 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
5894}
5895
5896void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
5898 // Build list of dependences
5900 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
5901 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data);
5902}
5903
5904static bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) {
5905 return T.clauses().empty();
5906}
5907
5909 const OMPTaskgroupDirective &S) {
5910 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5911 if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S)) {
5912 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5913 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5914 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5915 AllocaInsertPt->getIterator());
5916
5917 auto BodyGenCB = [&, this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
5918 ArrayRef<llvm::BasicBlock *> DeallocBlocks) {
5919 Builder.restoreIP(CodeGenIP);
5920 EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5921 return llvm::Error::success();
5922 };
5924 if (!CapturedStmtInfo)
5925 CapturedStmtInfo = &CapStmtInfo;
5926 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
5927 cantFail(OMPBuilder.createTaskgroup(Builder, AllocaIP,
5928 /*DeallocBlocks=*/{}, BodyGenCB));
5929 Builder.restoreIP(AfterIP);
5930 return;
5931 }
5932 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5933 Action.Enter(CGF);
5934 if (const Expr *E = S.getReductionRef()) {
5938 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
5939 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
5940 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
5941 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
5942 Data.ReductionOps.append(C->reduction_ops().begin(),
5943 C->reduction_ops().end());
5944 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5945 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5946 }
5947 llvm::Value *ReductionDesc =
5948 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
5949 LHSs, RHSs, Data);
5950 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5951 CGF.EmitVarDecl(*VD);
5952 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
5953 /*Volatile=*/false, E->getType());
5954 }
5955 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5956 };
5957 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
5958}
5959
5960void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
5961 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
5962 ? llvm::AtomicOrdering::NotAtomic
5963 : llvm::AtomicOrdering::AcquireRelease;
5964 CGM.getOpenMPRuntime().emitFlush(
5965 *this,
5966 [&S]() -> ArrayRef<const Expr *> {
5967 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
5968 return llvm::ArrayRef(FlushClause->varlist_begin(),
5969 FlushClause->varlist_end());
5970 return {};
5971 }(),
5972 S.getBeginLoc(), AO);
5973}
5974
5975void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
5976 const auto *DO = S.getSingleClause<OMPDepobjClause>();
5977 LValue DOLVal = EmitLValue(DO->getDepobj());
5978 if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
5979 // Build list and emit dependences
5982 for (auto &Dep : Data.Dependences) {
5983 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
5984 *this, Dep, DC->getBeginLoc());
5985 EmitStoreOfScalar(DepAddr.emitRawPointer(*this), DOLVal);
5986 }
5987 return;
5988 }
5989 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
5990 CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
5991 return;
5992 }
5993 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
5994 CGM.getOpenMPRuntime().emitUpdateClause(
5995 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
5996 return;
5997 }
5998}
5999
6002 return;
6004 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
6009 SmallVector<const Expr *, 4> ReductionOps;
6011 SmallVector<const Expr *, 4> CopyArrayTemps;
6012 SmallVector<const Expr *, 4> CopyArrayElems;
6013 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
6014 if (C->getModifier() != OMPC_REDUCTION_inscan)
6015 continue;
6016 Shareds.append(C->varlist_begin(), C->varlist_end());
6017 Privates.append(C->privates().begin(), C->privates().end());
6018 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
6019 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
6020 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
6021 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
6022 CopyArrayTemps.append(C->copy_array_temps().begin(),
6023 C->copy_array_temps().end());
6024 CopyArrayElems.append(C->copy_array_elems().begin(),
6025 C->copy_array_elems().end());
6026 }
6027 if (ParentDir.getDirectiveKind() == OMPD_simd ||
6028 (getLangOpts().OpenMPSimd &&
6029 isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
6030 // For simd directive and simd-based directives in simd only mode, use the
6031 // following codegen:
6032 // int x = 0;
6033 // #pragma omp simd reduction(inscan, +: x)
6034 // for (..) {
6035 // <first part>
6036 // #pragma omp scan inclusive(x)
6037 // <second part>
6038 // }
6039 // is transformed to:
6040 // int x = 0;
6041 // for (..) {
6042 // int x_priv = 0;
6043 // <first part>
6044 // x = x_priv + x;
6045 // x_priv = x;
6046 // <second part>
6047 // }
6048 // and
6049 // int x = 0;
6050 // #pragma omp simd reduction(inscan, +: x)
6051 // for (..) {
6052 // <first part>
6053 // #pragma omp scan exclusive(x)
6054 // <second part>
6055 // }
6056 // to
6057 // int x = 0;
6058 // for (..) {
6059 // int x_priv = 0;
6060 // <second part>
6061 // int temp = x;
6062 // x = x_priv + x;
6063 // x_priv = temp;
6064 // <first part>
6065 // }
6066 llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
6067 EmitBranch(IsInclusive
6068 ? OMPScanReduce
6069 : BreakContinueStack.back().ContinueBlock.getBlock());
6071 {
6072 // New scope for correct construction/destruction of temp variables for
6073 // exclusive scan.
6074 LexicalScope Scope(*this, S.getSourceRange());
6076 EmitBlock(OMPScanReduce);
6077 if (!IsInclusive) {
6078 // Create temp var and copy LHS value to this temp value.
6079 // TMP = LHS;
6080 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
6081 const Expr *PrivateExpr = Privates[I];
6082 const Expr *TempExpr = CopyArrayTemps[I];
6084 *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
6085 LValue DestLVal = EmitLValue(TempExpr);
6086 LValue SrcLVal = EmitLValue(LHSs[I]);
6087 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(),
6088 SrcLVal.getAddress(),
6089 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
6090 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
6091 CopyOps[I]);
6092 }
6093 }
6094 CGM.getOpenMPRuntime().emitReduction(
6095 *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
6096 {/*WithNowait=*/true, /*SimpleReduction=*/true,
6097 /*IsPrivateVarReduction*/ {}, OMPD_simd});
6098 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
6099 const Expr *PrivateExpr = Privates[I];
6100 LValue DestLVal;
6101 LValue SrcLVal;
6102 if (IsInclusive) {
6103 DestLVal = EmitLValue(RHSs[I]);
6104 SrcLVal = EmitLValue(LHSs[I]);
6105 } else {
6106 const Expr *TempExpr = CopyArrayTemps[I];
6107 DestLVal = EmitLValue(RHSs[I]);
6108 SrcLVal = EmitLValue(TempExpr);
6109 }
6111 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(),
6112 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
6113 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]);
6114 }
6115 }
6117 OMPScanExitBlock = IsInclusive
6118 ? BreakContinueStack.back().ContinueBlock.getBlock()
6119 : OMPScanReduce;
6121 return;
6122 }
6123 if (!IsInclusive) {
6124 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
6126 }
6127 if (OMPFirstScanLoop) {
6128 // Emit buffer[i] = red; at the end of the input phase.
6129 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
6130 .getIterationVariable()
6131 ->IgnoreParenImpCasts();
6132 LValue IdxLVal = EmitLValue(IVExpr);
6133 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
6134 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
6135 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
6136 const Expr *PrivateExpr = Privates[I];
6137 const Expr *OrigExpr = Shareds[I];
6138 const Expr *CopyArrayElem = CopyArrayElems[I];
6139 OpaqueValueMapping IdxMapping(
6140 *this,
6142 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
6143 RValue::get(IdxVal));
6144 LValue DestLVal = EmitLValue(CopyArrayElem);
6145 LValue SrcLVal = EmitLValue(OrigExpr);
6147 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(),
6148 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
6149 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]);
6150 }
6151 }
6152 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
6153 if (IsInclusive) {
6155 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
6156 }
6158 if (!OMPFirstScanLoop) {
6159 // Emit red = buffer[i]; at the entrance to the scan phase.
6160 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
6161 .getIterationVariable()
6162 ->IgnoreParenImpCasts();
6163 LValue IdxLVal = EmitLValue(IVExpr);
6164 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
6165 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
6166 llvm::BasicBlock *ExclusiveExitBB = nullptr;
6167 if (!IsInclusive) {
6168 llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
6169 ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
6170 llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
6171 Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
6172 EmitBlock(ContBB);
6173 // Use idx - 1 iteration for exclusive scan.
6174 IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
6175 }
6176 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
6177 const Expr *PrivateExpr = Privates[I];
6178 const Expr *OrigExpr = Shareds[I];
6179 const Expr *CopyArrayElem = CopyArrayElems[I];
6180 OpaqueValueMapping IdxMapping(
6181 *this,
6183 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
6184 RValue::get(IdxVal));
6185 LValue SrcLVal = EmitLValue(CopyArrayElem);
6186 LValue DestLVal = EmitLValue(OrigExpr);
6188 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(),
6189 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
6190 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]);
6191 }
6192 if (!IsInclusive) {
6193 EmitBlock(ExclusiveExitBB);
6194 }
6195 }
6199}
6200
6202 const CodeGenLoopTy &CodeGenLoop,
6203 Expr *IncExpr) {
6204 // Emit the loop iteration variable.
6205 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
6206 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
6207 EmitVarDecl(*IVDecl);
6208
6209 // Emit the iterations count variable.
6210 // If it is not a variable, Sema decided to calculate iterations count on each
6211 // iteration (e.g., it is foldable into a constant).
6212 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
6213 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
6214 // Emit calculation of the iterations count.
6215 EmitIgnoredExpr(S.getCalcLastIteration());
6216 }
6217
6218 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
6219
6220 bool HasLastprivateClause = false;
6221 // Check pre-condition.
6222 {
6223 OMPLoopScope PreInitScope(*this, S);
6224 // Skip the entire loop if we don't meet the precondition.
6225 // If the condition constant folds and can be elided, avoid emitting the
6226 // whole loop.
6227 bool CondConstant;
6228 llvm::BasicBlock *ContBlock = nullptr;
6229 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
6230 if (!CondConstant)
6231 return;
6232 } else {
6233 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
6234 ContBlock = createBasicBlock("omp.precond.end");
6235 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
6236 getProfileCount(&S));
6237 EmitBlock(ThenBlock);
6239 }
6240
6241 emitAlignedClause(*this, S);
6242 // Emit 'then' code.
6243 {
6244 // Emit helper vars inits.
6245
6247 *this, cast<DeclRefExpr>(
6248 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
6249 ? S.getCombinedLowerBoundVariable()
6250 : S.getLowerBoundVariable())));
6252 *this, cast<DeclRefExpr>(
6253 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
6254 ? S.getCombinedUpperBoundVariable()
6255 : S.getUpperBoundVariable())));
6256 LValue ST =
6257 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
6258 LValue IL =
6259 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
6260
6261 OMPPrivateScope LoopScope(*this);
6262 if (EmitOMPFirstprivateClause(S, LoopScope)) {
6263 // Emit implicit barrier to synchronize threads and avoid data races
6264 // on initialization of firstprivate variables and post-update of
6265 // lastprivate variables.
6266 CGM.getOpenMPRuntime().emitBarrierCall(
6267 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
6268 /*ForceSimpleCall=*/true);
6269 }
6270 EmitOMPPrivateClause(S, LoopScope);
6271 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
6272 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
6273 !isOpenMPTeamsDirective(S.getDirectiveKind()))
6274 EmitOMPReductionClauseInit(S, LoopScope);
6275 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
6276 EmitOMPPrivateLoopCounters(S, LoopScope);
6277 (void)LoopScope.Privatize();
6278 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6279 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
6280
6281 // Detect the distribute schedule kind and chunk.
6282 llvm::Value *Chunk = nullptr;
6284 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
6285 ScheduleKind = C->getDistScheduleKind();
6286 if (const Expr *Ch = C->getChunkSize()) {
6287 Chunk = EmitScalarExpr(Ch);
6288 Chunk = EmitScalarConversion(Chunk, Ch->getType(),
6289 S.getIterationVariable()->getType(),
6290 S.getBeginLoc());
6291 }
6292 } else {
6293 // Default behaviour for dist_schedule clause.
6294 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
6295 *this, S, ScheduleKind, Chunk);
6296 }
6297 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
6298 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
6299
6300 // GPU fused schedule: omit the outer distribute loop and let the inner
6301 // worksharing loop schedule the flattened team/thread iteration space.
6302 if (canEmitGPUFusedDistSchedule(CGM, S, S.getDirectiveKind())) {
6305 CodeGenLoop(*this, S, LoopExit);
6306 EmitBlock(LoopExit.getBlock());
6307 } else {
6308 // OpenMP [2.10.8, distribute Construct, Description]
6309 // If dist_schedule is specified, kind must be static. If specified,
6310 // iterations are divided into chunks of size chunk_size, chunks are
6311 // assigned to the teams of the league in a round-robin fashion in the
6312 // order of the team number. When no chunk_size is specified, the
6313 // iteration space is divided into chunks that are approximately equal
6314 // in size, and at most one chunk is distributed to each team of the
6315 // league. The size of the chunks is unspecified in this case.
6316 bool StaticChunked =
6317 RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
6318 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
6319 if (RT.isStaticNonchunked(ScheduleKind,
6320 /* Chunked */ Chunk != nullptr) ||
6321 StaticChunked) {
6323 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
6324 LB.getAddress(), UB.getAddress(), ST.getAddress(),
6325 StaticChunked ? Chunk : nullptr);
6326 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
6327 StaticInit);
6330 // UB = min(UB, GlobalUB);
6332 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
6333 ? S.getCombinedEnsureUpperBound()
6334 : S.getEnsureUpperBound());
6335 // IV = LB;
6337 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
6338 ? S.getCombinedInit()
6339 : S.getInit());
6340
6341 const Expr *Cond =
6342 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
6343 ? S.getCombinedCond()
6344 : S.getCond();
6345
6346 if (StaticChunked)
6347 Cond = S.getCombinedDistCond();
6348
6349 // For static unchunked schedules generate:
6350 //
6351 // 1. For distribute alone, codegen
6352 // while (idx <= UB) {
6353 // BODY;
6354 // ++idx;
6355 // }
6356 //
6357 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
6358 // while (idx <= UB) {
6359 // <CodeGen rest of pragma>(LB, UB);
6360 // idx += ST;
6361 // }
6362 //
6363 // For static chunk one schedule generate:
6364 //
6365 // while (IV <= GlobalUB) {
6366 // <CodeGen rest of pragma>(LB, UB);
6367 // LB += ST;
6368 // UB += ST;
6369 // UB = min(UB, GlobalUB);
6370 // IV = LB;
6371 // }
6372 //
6374 *this, S,
6375 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6376 if (isOpenMPSimdDirective(S.getDirectiveKind()))
6377 CGF.EmitOMPSimdInit(S);
6378 },
6379 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
6380 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
6381 CGF.EmitOMPInnerLoop(
6382 S, LoopScope.requiresCleanups(), Cond, IncExpr,
6383 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
6384 CodeGenLoop(CGF, S, LoopExit);
6385 },
6386 [&S, StaticChunked](CodeGenFunction &CGF) {
6387 if (StaticChunked) {
6388 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
6389 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
6390 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
6391 CGF.EmitIgnoredExpr(S.getCombinedInit());
6392 }
6393 });
6394 });
6395 EmitBlock(LoopExit.getBlock());
6396 // Tell the runtime we are done.
6397 RT.emitForStaticFinish(*this, S.getEndLoc(), OMPD_distribute);
6398 } else {
6399 // Emit the outer loop, which requests its work chunk [LB..UB] from
6400 // runtime and runs the inner loop to process it.
6401 const OMPLoopArguments LoopArguments = {
6402 LB.getAddress(), UB.getAddress(), ST.getAddress(),
6403 IL.getAddress(), Chunk};
6404 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
6405 CodeGenLoop);
6406 }
6407 }
6408 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
6409 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
6410 return CGF.Builder.CreateIsNotNull(
6411 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
6412 });
6413 }
6414 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
6415 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
6416 !isOpenMPTeamsDirective(S.getDirectiveKind())) {
6417 EmitOMPReductionClauseFinal(S, OMPD_simd);
6418 // Emit post-update of the reduction variables if IsLastIter != 0.
6420 *this, S, [IL, &S](CodeGenFunction &CGF) {
6421 return CGF.Builder.CreateIsNotNull(
6422 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
6423 });
6424 }
6425 // Emit final copy of the lastprivate variables if IsLastIter != 0.
6426 if (HasLastprivateClause) {
6428 S, /*NoFinals=*/false,
6429 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
6430 }
6431 }
6432
6433 // We're now done with the loop, so jump to the continuation block.
6434 if (ContBlock) {
6435 EmitBranch(ContBlock);
6436 EmitBlock(ContBlock, true);
6437 }
6438 }
6439}
6440
6441// Pass OMPLoopDirective (instead of OMPDistributeDirective) to make this
6442// function available for "loop bind(teams)", which maps to "distribute".
6444 CodeGenFunction &CGF,
6445 CodeGenModule &CGM) {
6446 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6448 };
6449 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
6450 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, CodeGen);
6451}
6452
6457
6458static llvm::Function *
6460 const OMPExecutableDirective &D) {
6461 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
6463 CGF.CapturedStmtInfo = &CapStmtInfo;
6464 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, D);
6465 Fn->setDoesNotRecurse();
6466 return Fn;
6467}
6468
6469template <typename T>
6470static void emitRestoreIP(CodeGenFunction &CGF, const T *C,
6471 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
6472 llvm::OpenMPIRBuilder &OMPBuilder) {
6473
6474 unsigned NumLoops = C->getNumLoops();
6476 /*DestWidth=*/64, /*Signed=*/1);
6478 for (unsigned I = 0; I < NumLoops; I++) {
6479 const Expr *CounterVal = C->getLoopData(I);
6480 assert(CounterVal);
6481 llvm::Value *StoreValue = CGF.EmitScalarConversion(
6482 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
6483 CounterVal->getExprLoc());
6484 StoreValues.emplace_back(StoreValue);
6485 }
6486 OMPDoacrossKind<T> ODK;
6487 bool IsDependSource = ODK.isSource(C);
6488 CGF.Builder.restoreIP(
6489 OMPBuilder.createOrderedDepend(CGF.Builder, AllocaIP, NumLoops,
6490 StoreValues, ".cnt.addr", IsDependSource));
6491}
6492
6493void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
6494 if (CGM.getLangOpts().OpenMPIRBuilder) {
6495 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
6496 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
6497
6498 if (S.hasClausesOfKind<OMPDependClause>() ||
6499 S.hasClausesOfKind<OMPDoacrossClause>()) {
6500 // The ordered directive with depend clause.
6501 assert(!S.hasAssociatedStmt() && "No associated statement must be in "
6502 "ordered depend|doacross construct.");
6503 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
6504 AllocaInsertPt->getIterator());
6505 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
6506 emitRestoreIP(*this, DC, AllocaIP, OMPBuilder);
6507 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
6508 emitRestoreIP(*this, DC, AllocaIP, OMPBuilder);
6509 } else {
6510 // The ordered directive with threads or simd clause, or without clause.
6511 // Without clause, it behaves as if the threads clause is specified.
6512 const auto *C = S.getSingleClause<OMPSIMDClause>();
6513
6514 auto FiniCB = [this](InsertPointTy IP) {
6516 return llvm::Error::success();
6517 };
6518
6519 auto BodyGenCB = [&S, C,
6520 this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
6521 ArrayRef<llvm::BasicBlock *> DeallocBlocks) {
6522 Builder.restoreIP(CodeGenIP);
6523
6524 const CapturedStmt *CS = S.getInnermostCapturedStmt();
6525 if (C) {
6526 llvm::BasicBlock *FiniBB = splitBBWithSuffix(
6527 Builder, /*CreateBranch=*/false, ".ordered.after");
6529 GenerateOpenMPCapturedVars(*CS, CapturedVars);
6530 llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS, S);
6531 assert(S.getBeginLoc().isValid() &&
6532 "Outlined function call location must be valid.");
6533 ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc());
6534 OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, *FiniBB,
6535 OutlinedFn, CapturedVars);
6536 } else {
6538 *this, CS->getCapturedStmt(), AllocIP, CodeGenIP, "ordered");
6539 }
6540 return llvm::Error::success();
6541 };
6542
6543 OMPLexicalScope Scope(*this, S, OMPD_unknown);
6544 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
6545 OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C));
6546 Builder.restoreIP(AfterIP);
6547 }
6548 return;
6549 }
6550
6551 if (S.hasClausesOfKind<OMPDependClause>()) {
6552 assert(!S.hasAssociatedStmt() &&
6553 "No associated statement must be in ordered depend construct.");
6554 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
6555 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
6556 return;
6557 }
6558 if (S.hasClausesOfKind<OMPDoacrossClause>()) {
6559 assert(!S.hasAssociatedStmt() &&
6560 "No associated statement must be in ordered doacross construct.");
6561 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
6562 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
6563 return;
6564 }
6565 const auto *C = S.getSingleClause<OMPSIMDClause>();
6566 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
6567 PrePostActionTy &Action) {
6568 const CapturedStmt *CS = S.getInnermostCapturedStmt();
6569 if (C) {
6571 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
6572 llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS, S);
6573 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
6574 OutlinedFn, CapturedVars);
6575 } else {
6576 Action.Enter(CGF);
6577 CGF.EmitStmt(CS->getCapturedStmt());
6578 }
6579 };
6580 OMPLexicalScope Scope(*this, S, OMPD_unknown);
6581 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
6582}
6583
6584static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
6585 QualType SrcType, QualType DestType,
6586 SourceLocation Loc) {
6587 assert(CGF.hasScalarEvaluationKind(DestType) &&
6588 "DestType must have scalar evaluation kind.");
6589 assert(!Val.isAggregate() && "Must be a scalar or complex.");
6590 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
6591 DestType, Loc)
6593 Val.getComplexVal(), SrcType, DestType, Loc);
6594}
6595
6598 QualType DestType, SourceLocation Loc) {
6599 assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
6600 "DestType must have complex evaluation kind.");
6602 if (Val.isScalar()) {
6603 // Convert the input element to the element type of the complex.
6604 QualType DestElementType =
6605 DestType->castAs<ComplexType>()->getElementType();
6606 llvm::Value *ScalarVal = CGF.EmitScalarConversion(
6607 Val.getScalarVal(), SrcType, DestElementType, Loc);
6608 ComplexVal = CodeGenFunction::ComplexPairTy(
6609 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
6610 } else {
6611 assert(Val.isComplex() && "Must be a scalar or complex.");
6612 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
6613 QualType DestElementType =
6614 DestType->castAs<ComplexType>()->getElementType();
6615 ComplexVal.first = CGF.EmitScalarConversion(
6616 Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
6617 ComplexVal.second = CGF.EmitScalarConversion(
6618 Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
6619 }
6620 return ComplexVal;
6621}
6622
6623static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6624 LValue LVal, RValue RVal) {
6625 if (LVal.isGlobalReg())
6626 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
6627 else
6628 CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false);
6629}
6630
6632 llvm::AtomicOrdering AO, LValue LVal,
6633 SourceLocation Loc) {
6634 if (LVal.isGlobalReg())
6635 return CGF.EmitLoadOfLValue(LVal, Loc);
6636 return CGF.EmitAtomicLoad(
6637 LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO),
6638 LVal.isVolatile());
6639}
6640
6642 QualType RValTy, SourceLocation Loc) {
6643 switch (getEvaluationKind(LVal.getType())) {
6644 case TEK_Scalar:
6646 *this, RVal, RValTy, LVal.getType(), Loc)),
6647 LVal);
6648 break;
6649 case TEK_Complex:
6651 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
6652 /*isInit=*/false);
6653 break;
6654 case TEK_Aggregate:
6655 llvm_unreachable("Must be a scalar or complex.");
6656 }
6657}
6658
6659static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6660 const Expr *X, const Expr *V,
6661 SourceLocation Loc) {
6662 // v = x;
6663 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
6664 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
6665 LValue XLValue = CGF.EmitLValue(X);
6666 LValue VLValue = CGF.EmitLValue(V);
6667 RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc);
6668 // OpenMP, 2.17.7, atomic Construct
6669 // If the read or capture clause is specified and the acquire, acq_rel, or
6670 // seq_cst clause is specified then the strong flush on exit from the atomic
6671 // operation is also an acquire flush.
6672 switch (AO) {
6673 case llvm::AtomicOrdering::Acquire:
6674 case llvm::AtomicOrdering::AcquireRelease:
6675 case llvm::AtomicOrdering::SequentiallyConsistent:
6676 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, {}, Loc,
6677 llvm::AtomicOrdering::Acquire);
6678 break;
6679 case llvm::AtomicOrdering::Monotonic:
6680 case llvm::AtomicOrdering::Release:
6681 break;
6682 case llvm::AtomicOrdering::NotAtomic:
6683 case llvm::AtomicOrdering::Unordered:
6684 llvm_unreachable("Unexpected ordering.");
6685 }
6686 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
6688}
6689
6691 llvm::AtomicOrdering AO, const Expr *X,
6692 const Expr *E, SourceLocation Loc) {
6693 // x = expr;
6694 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
6695 emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
6697 // OpenMP, 2.17.7, atomic Construct
6698 // If the write, update, or capture clause is specified and the release,
6699 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6700 // the atomic operation is also a release flush.
6701 switch (AO) {
6702 case llvm::AtomicOrdering::Release:
6703 case llvm::AtomicOrdering::AcquireRelease:
6704 case llvm::AtomicOrdering::SequentiallyConsistent:
6705 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, {}, Loc,
6706 llvm::AtomicOrdering::Release);
6707 break;
6708 case llvm::AtomicOrdering::Acquire:
6709 case llvm::AtomicOrdering::Monotonic:
6710 break;
6711 case llvm::AtomicOrdering::NotAtomic:
6712 case llvm::AtomicOrdering::Unordered:
6713 llvm_unreachable("Unexpected ordering.");
6714 }
6715}
6716
6717static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
6718 RValue Update,
6720 llvm::AtomicOrdering AO,
6721 bool IsXLHSInRHSPart) {
6722 ASTContext &Context = CGF.getContext();
6723 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
6724 // expression is simple and atomic is allowed for the given type for the
6725 // target platform.
6726 if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() ||
6727 (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
6728 (Update.getScalarVal()->getType() != X.getAddress().getElementType())) ||
6729 !Context.getTargetInfo().hasBuiltinAtomic(
6730 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
6731 return std::make_pair(false, RValue::get(nullptr));
6732
6733 auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) {
6734 if (T->isIntegerTy())
6735 return true;
6736
6737 if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub))
6738 return llvm::isPowerOf2_64(CGF.CGM.getDataLayout().getTypeStoreSize(T));
6739
6740 return false;
6741 };
6742
6743 if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) ||
6744 !CheckAtomicSupport(X.getAddress().getElementType(), BO))
6745 return std::make_pair(false, RValue::get(nullptr));
6746
6747 bool IsInteger = X.getAddress().getElementType()->isIntegerTy();
6748 llvm::AtomicRMWInst::BinOp RMWOp;
6749 switch (BO) {
6750 case BO_Add:
6751 RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd;
6752 break;
6753 case BO_Sub:
6754 if (!IsXLHSInRHSPart)
6755 return std::make_pair(false, RValue::get(nullptr));
6756 RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub;
6757 break;
6758 case BO_And:
6759 RMWOp = llvm::AtomicRMWInst::And;
6760 break;
6761 case BO_Or:
6762 RMWOp = llvm::AtomicRMWInst::Or;
6763 break;
6764 case BO_Xor:
6765 RMWOp = llvm::AtomicRMWInst::Xor;
6766 break;
6767 case BO_LT:
6768 if (IsInteger)
6769 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6770 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
6771 : llvm::AtomicRMWInst::Max)
6772 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
6773 : llvm::AtomicRMWInst::UMax);
6774 else
6775 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin
6776 : llvm::AtomicRMWInst::FMax;
6777 break;
6778 case BO_GT:
6779 if (IsInteger)
6780 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6781 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
6782 : llvm::AtomicRMWInst::Min)
6783 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
6784 : llvm::AtomicRMWInst::UMin);
6785 else
6786 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax
6787 : llvm::AtomicRMWInst::FMin;
6788 break;
6789 case BO_Assign:
6790 RMWOp = llvm::AtomicRMWInst::Xchg;
6791 break;
6792 case BO_Mul:
6793 case BO_Div:
6794 case BO_Rem:
6795 case BO_Shl:
6796 case BO_Shr:
6797 case BO_LAnd:
6798 case BO_LOr:
6799 return std::make_pair(false, RValue::get(nullptr));
6800 case BO_PtrMemD:
6801 case BO_PtrMemI:
6802 case BO_LE:
6803 case BO_GE:
6804 case BO_EQ:
6805 case BO_NE:
6806 case BO_Cmp:
6807 case BO_AddAssign:
6808 case BO_SubAssign:
6809 case BO_AndAssign:
6810 case BO_OrAssign:
6811 case BO_XorAssign:
6812 case BO_MulAssign:
6813 case BO_DivAssign:
6814 case BO_RemAssign:
6815 case BO_ShlAssign:
6816 case BO_ShrAssign:
6817 case BO_Comma:
6818 llvm_unreachable("Unsupported atomic update operation");
6819 }
6820 llvm::Value *UpdateVal = Update.getScalarVal();
6821 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
6822 if (IsInteger)
6823 UpdateVal = CGF.Builder.CreateIntCast(
6824 IC, X.getAddress().getElementType(),
6825 X.getType()->hasSignedIntegerRepresentation());
6826 else
6827 UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC,
6828 X.getAddress().getElementType());
6829 }
6830 llvm::AtomicRMWInst *Res =
6831 CGF.emitAtomicRMWInst(RMWOp, X.getAddress(), UpdateVal, AO);
6832 return std::make_pair(true, RValue::get(Res));
6833}
6834
6837 llvm::AtomicOrdering AO, SourceLocation Loc,
6838 const llvm::function_ref<RValue(RValue)> CommonGen) {
6839 // Update expressions are allowed to have the following forms:
6840 // x binop= expr; -> xrval + expr;
6841 // x++, ++x -> xrval + 1;
6842 // x--, --x -> xrval - 1;
6843 // x = x binop expr; -> xrval binop expr
6844 // x = expr Op x; - > expr binop xrval;
6845 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
6846 if (!Res.first) {
6847 if (X.isGlobalReg()) {
6848 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
6849 // 'xrval'.
6850 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
6851 } else {
6852 // Perform compare-and-swap procedure.
6853 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
6854 }
6855 }
6856 return Res;
6857}
6858
6860 llvm::AtomicOrdering AO, const Expr *X,
6861 const Expr *E, const Expr *UE,
6862 bool IsXLHSInRHSPart, SourceLocation Loc) {
6863 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6864 "Update expr in 'atomic update' must be a binary operator.");
6865 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
6866 // Update expressions are allowed to have the following forms:
6867 // x binop= expr; -> xrval + expr;
6868 // x++, ++x -> xrval + 1;
6869 // x--, --x -> xrval - 1;
6870 // x = x binop expr; -> xrval binop expr
6871 // x = expr Op x; - > expr binop xrval;
6872 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
6873 LValue XLValue = CGF.EmitLValue(X);
6874 RValue ExprRValue = CGF.EmitAnyExpr(E);
6875 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
6876 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
6877 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6878 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6879 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
6880 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6881 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6882 return CGF.EmitAnyExpr(UE);
6883 };
6885 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
6887 // OpenMP, 2.17.7, atomic Construct
6888 // If the write, update, or capture clause is specified and the release,
6889 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6890 // the atomic operation is also a release flush.
6891 switch (AO) {
6892 case llvm::AtomicOrdering::Release:
6893 case llvm::AtomicOrdering::AcquireRelease:
6894 case llvm::AtomicOrdering::SequentiallyConsistent:
6895 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, {}, Loc,
6896 llvm::AtomicOrdering::Release);
6897 break;
6898 case llvm::AtomicOrdering::Acquire:
6899 case llvm::AtomicOrdering::Monotonic:
6900 break;
6901 case llvm::AtomicOrdering::NotAtomic:
6902 case llvm::AtomicOrdering::Unordered:
6903 llvm_unreachable("Unexpected ordering.");
6904 }
6905}
6906
6908 QualType SourceType, QualType ResType,
6909 SourceLocation Loc) {
6910 switch (CGF.getEvaluationKind(ResType)) {
6911 case TEK_Scalar:
6912 return RValue::get(
6913 convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
6914 case TEK_Complex: {
6915 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
6916 return RValue::getComplex(Res.first, Res.second);
6917 }
6918 case TEK_Aggregate:
6919 break;
6920 }
6921 llvm_unreachable("Must be a scalar or complex.");
6922}
6923
6925 llvm::AtomicOrdering AO,
6926 bool IsPostfixUpdate, const Expr *V,
6927 const Expr *X, const Expr *E,
6928 const Expr *UE, bool IsXLHSInRHSPart,
6929 SourceLocation Loc) {
6930 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
6931 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
6932 RValue NewVVal;
6933 LValue VLValue = CGF.EmitLValue(V);
6934 LValue XLValue = CGF.EmitLValue(X);
6935 RValue ExprRValue = CGF.EmitAnyExpr(E);
6936 QualType NewVValType;
6937 if (UE) {
6938 // 'x' is updated with some additional value.
6939 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6940 "Update expr in 'atomic capture' must be a binary operator.");
6941 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
6942 // Update expressions are allowed to have the following forms:
6943 // x binop= expr; -> xrval + expr;
6944 // x++, ++x -> xrval + 1;
6945 // x--, --x -> xrval - 1;
6946 // x = x binop expr; -> xrval binop expr
6947 // x = expr Op x; - > expr binop xrval;
6948 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
6949 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
6950 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6951 NewVValType = XRValExpr->getType();
6952 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6953 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
6954 IsPostfixUpdate](RValue XRValue) {
6955 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6956 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6957 RValue Res = CGF.EmitAnyExpr(UE);
6958 NewVVal = IsPostfixUpdate ? XRValue : Res;
6959 return Res;
6960 };
6961 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6962 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
6964 if (Res.first) {
6965 // 'atomicrmw' instruction was generated.
6966 if (IsPostfixUpdate) {
6967 // Use old value from 'atomicrmw'.
6968 NewVVal = Res.second;
6969 } else {
6970 // 'atomicrmw' does not provide new value, so evaluate it using old
6971 // value of 'x'.
6972 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6973 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
6974 NewVVal = CGF.EmitAnyExpr(UE);
6975 }
6976 }
6977 } else {
6978 // 'x' is simply rewritten with some 'expr'.
6979 NewVValType = X->getType().getNonReferenceType();
6980 ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
6981 X->getType().getNonReferenceType(), Loc);
6982 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
6983 NewVVal = XRValue;
6984 return ExprRValue;
6985 };
6986 // Try to perform atomicrmw xchg, otherwise simple exchange.
6987 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6988 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
6989 Loc, Gen);
6991 if (Res.first) {
6992 // 'atomicrmw' instruction was generated.
6993 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
6994 }
6995 }
6996 // Emit post-update store to 'v' of old/new 'x' value.
6997 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
6999 // OpenMP 5.1 removes the required flush for capture clause.
7000 if (CGF.CGM.getLangOpts().OpenMP < 51) {
7001 // OpenMP, 2.17.7, atomic Construct
7002 // If the write, update, or capture clause is specified and the release,
7003 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
7004 // the atomic operation is also a release flush.
7005 // If the read or capture clause is specified and the acquire, acq_rel, or
7006 // seq_cst clause is specified then the strong flush on exit from the atomic
7007 // operation is also an acquire flush.
7008 switch (AO) {
7009 case llvm::AtomicOrdering::Release:
7010 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, {}, Loc,
7011 llvm::AtomicOrdering::Release);
7012 break;
7013 case llvm::AtomicOrdering::Acquire:
7014 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, {}, Loc,
7015 llvm::AtomicOrdering::Acquire);
7016 break;
7017 case llvm::AtomicOrdering::AcquireRelease:
7018 case llvm::AtomicOrdering::SequentiallyConsistent:
7020 CGF, {}, Loc, llvm::AtomicOrdering::AcquireRelease);
7021 break;
7022 case llvm::AtomicOrdering::Monotonic:
7023 break;
7024 case llvm::AtomicOrdering::NotAtomic:
7025 case llvm::AtomicOrdering::Unordered:
7026 llvm_unreachable("Unexpected ordering.");
7027 }
7028 }
7029}
7030
7032 CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO,
7033 const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D,
7034 const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly,
7035 SourceLocation Loc) {
7036 llvm::OpenMPIRBuilder &OMPBuilder =
7038
7039 OMPAtomicCompareOp Op;
7040 assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator");
7041 switch (cast<BinaryOperator>(CE)->getOpcode()) {
7042 case BO_EQ:
7043 Op = OMPAtomicCompareOp::EQ;
7044 break;
7045 case BO_LT:
7046 Op = OMPAtomicCompareOp::MIN;
7047 break;
7048 case BO_GT:
7049 Op = OMPAtomicCompareOp::MAX;
7050 break;
7051 default:
7052 llvm_unreachable("unsupported atomic compare binary operator");
7053 }
7054
7055 LValue XLVal = CGF.EmitLValue(X);
7056 Address XAddr = XLVal.getAddress();
7057
7058 auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) {
7059 if (X->getType() == E->getType())
7060 return CGF.EmitScalarExpr(E);
7061 const Expr *NewE = E->IgnoreImplicitAsWritten();
7062 llvm::Value *V = CGF.EmitScalarExpr(NewE);
7063 if (NewE->getType() == X->getType())
7064 return V;
7065 return CGF.EmitScalarConversion(V, NewE->getType(), X->getType(), Loc);
7066 };
7067
7068 llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E);
7069 llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr;
7070 if (auto *CI = dyn_cast<llvm::ConstantInt>(EVal))
7071 EVal = CGF.Builder.CreateIntCast(
7072 CI, XLVal.getAddress().getElementType(),
7074 if (DVal)
7075 if (auto *CI = dyn_cast<llvm::ConstantInt>(DVal))
7076 DVal = CGF.Builder.CreateIntCast(
7077 CI, XLVal.getAddress().getElementType(),
7079
7080 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{
7081 XAddr.emitRawPointer(CGF), XAddr.getElementType(),
7082 X->getType()->hasSignedIntegerRepresentation(),
7083 X->getType().isVolatileQualified()};
7084 llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal;
7085 if (V) {
7086 LValue LV = CGF.EmitLValue(V);
7087 Address Addr = LV.getAddress();
7088 VOpVal = {Addr.emitRawPointer(CGF), Addr.getElementType(),
7089 V->getType()->hasSignedIntegerRepresentation(),
7090 V->getType().isVolatileQualified()};
7091 }
7092 if (R) {
7093 LValue LV = CGF.EmitLValue(R);
7094 Address Addr = LV.getAddress();
7095 ROpVal = {Addr.emitRawPointer(CGF), Addr.getElementType(),
7096 R->getType()->hasSignedIntegerRepresentation(),
7097 R->getType().isVolatileQualified()};
7098 }
7099
7100 if (FailAO == llvm::AtomicOrdering::NotAtomic) {
7101 // fail clause was not mentioned on the
7102 // "#pragma omp atomic compare" construct.
7103 CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare(
7104 CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr,
7106 } else
7107 CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare(
7108 CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr,
7109 IsPostfixUpdate, IsFailOnly, FailAO));
7110}
7111
7113 llvm::AtomicOrdering AO,
7114 llvm::AtomicOrdering FailAO, bool IsPostfixUpdate,
7115 const Expr *X, const Expr *V, const Expr *R,
7116 const Expr *E, const Expr *UE, const Expr *D,
7117 const Expr *CE, bool IsXLHSInRHSPart,
7118 bool IsFailOnly, SourceLocation Loc) {
7119 switch (Kind) {
7120 case OMPC_read:
7121 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
7122 break;
7123 case OMPC_write:
7124 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
7125 break;
7126 case OMPC_unknown:
7127 case OMPC_update:
7128 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
7129 break;
7130 case OMPC_capture:
7131 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
7132 IsXLHSInRHSPart, Loc);
7133 break;
7134 case OMPC_compare: {
7135 emitOMPAtomicCompareExpr(CGF, AO, FailAO, X, V, R, E, D, CE,
7137 break;
7138 }
7139 default:
7140 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
7141 }
7142}
7143
7144void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
7145 llvm::AtomicOrdering AO = CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
7146 // Fail Memory Clause Ordering.
7147 llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic;
7148 bool MemOrderingSpecified = false;
7149 if (S.getSingleClause<OMPSeqCstClause>()) {
7150 AO = llvm::AtomicOrdering::SequentiallyConsistent;
7151 MemOrderingSpecified = true;
7152 } else if (S.getSingleClause<OMPAcqRelClause>()) {
7153 AO = llvm::AtomicOrdering::AcquireRelease;
7154 MemOrderingSpecified = true;
7155 } else if (S.getSingleClause<OMPAcquireClause>()) {
7156 AO = llvm::AtomicOrdering::Acquire;
7157 MemOrderingSpecified = true;
7158 } else if (S.getSingleClause<OMPReleaseClause>()) {
7159 AO = llvm::AtomicOrdering::Release;
7160 MemOrderingSpecified = true;
7161 } else if (S.getSingleClause<OMPRelaxedClause>()) {
7162 AO = llvm::AtomicOrdering::Monotonic;
7163 MemOrderingSpecified = true;
7164 }
7165 llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered;
7166 OpenMPClauseKind Kind = OMPC_unknown;
7167 for (const OMPClause *C : S.clauses()) {
7168 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
7169 // if it is first).
7170 OpenMPClauseKind K = C->getClauseKind();
7171 // TBD
7172 if (K == OMPC_weak)
7173 return;
7174 if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire ||
7175 K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint)
7176 continue;
7177 Kind = K;
7178 KindsEncountered.insert(K);
7179 }
7180 // We just need to correct Kind here. No need to set a bool saying it is
7181 // actually compare capture because we can tell from whether V and R are
7182 // nullptr.
7183 if (KindsEncountered.contains(OMPC_compare) &&
7184 KindsEncountered.contains(OMPC_capture))
7185 Kind = OMPC_compare;
7186 if (!MemOrderingSpecified) {
7187 llvm::AtomicOrdering DefaultOrder =
7188 CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
7189 if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
7190 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
7191 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
7192 Kind == OMPC_capture)) {
7193 AO = DefaultOrder;
7194 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
7195 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
7196 AO = llvm::AtomicOrdering::Release;
7197 } else if (Kind == OMPC_read) {
7198 assert(Kind == OMPC_read && "Unexpected atomic kind.");
7199 AO = llvm::AtomicOrdering::Acquire;
7200 }
7201 }
7202 }
7203
7204 if (KindsEncountered.contains(OMPC_compare) &&
7205 KindsEncountered.contains(OMPC_fail)) {
7206 Kind = OMPC_compare;
7207 const auto *FailClause = S.getSingleClause<OMPFailClause>();
7208 if (FailClause) {
7209 OpenMPClauseKind FailParameter = FailClause->getFailParameter();
7210 if (FailParameter == llvm::omp::OMPC_relaxed)
7211 FailAO = llvm::AtomicOrdering::Monotonic;
7212 else if (FailParameter == llvm::omp::OMPC_acquire)
7213 FailAO = llvm::AtomicOrdering::Acquire;
7214 else if (FailParameter == llvm::omp::OMPC_seq_cst)
7215 FailAO = llvm::AtomicOrdering::SequentiallyConsistent;
7216 }
7217 }
7218
7219 LexicalScope Scope(*this, S.getSourceRange());
7220 EmitStopPoint(S.getAssociatedStmt());
7221 emitOMPAtomicExpr(*this, Kind, AO, FailAO, S.isPostfixUpdate(), S.getX(),
7222 S.getV(), S.getR(), S.getExpr(), S.getUpdateExpr(),
7223 S.getD(), S.getCondExpr(), S.isXLHSInRHSPart(),
7224 S.isFailOnly(), S.getBeginLoc());
7225}
7226
7228 const OMPExecutableDirective &S,
7229 const RegionCodeGenTy &CodeGen) {
7230 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
7231 CodeGenModule &CGM = CGF.CGM;
7232
7233 // On device emit this construct as inlined code.
7234 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
7235 OMPLexicalScope Scope(CGF, S, OMPD_target);
7237 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7238 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
7239 });
7240 return;
7241 }
7242
7244 llvm::Function *Fn = nullptr;
7245 llvm::Constant *FnID = nullptr;
7246
7247 const Expr *IfCond = nullptr;
7248 // Check for the at most one if clause associated with the target region.
7249 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7250 if (C->getNameModifier() == OMPD_unknown ||
7251 C->getNameModifier() == OMPD_target) {
7252 IfCond = C->getCondition();
7253 break;
7254 }
7255 }
7256
7257 // Check if we have any device clause associated with the directive.
7258 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
7259 nullptr, OMPC_DEVICE_unknown);
7260 if (auto *C = S.getSingleClause<OMPDeviceClause>())
7261 Device.setPointerAndInt(C->getDevice(), C->getModifier());
7262
7263 // Check if we have an if clause whose conditional always evaluates to false
7264 // or if we do not have any targets specified. If so the target region is not
7265 // an offload entry point.
7266 bool IsOffloadEntry = true;
7267 if (IfCond) {
7268 bool Val;
7269 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
7270 IsOffloadEntry = false;
7271 }
7272 if (CGM.getLangOpts().OMPTargetTriples.empty())
7273 IsOffloadEntry = false;
7274
7275 if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) {
7276 CGM.getDiags().Report(diag::err_missing_mandatory_offloading);
7277 }
7278
7279 assert(CGF.CurFuncDecl && "No parent declaration for target region!");
7280 StringRef ParentName;
7281 // In case we have Ctors/Dtors we use the complete type variant to produce
7282 // the mangling of the device outlined kernel.
7283 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
7284 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
7285 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
7286 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
7287 else
7288 ParentName =
7290
7291 // Emit target region as a standalone region.
7292 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
7293 IsOffloadEntry, CodeGen);
7294 OMPLexicalScope Scope(CGF, S, OMPD_task);
7295 auto &&SizeEmitter =
7296 [IsOffloadEntry](CodeGenFunction &CGF,
7297 const OMPLoopDirective &D) -> llvm::Value * {
7298 if (IsOffloadEntry) {
7299 OMPLoopScope(CGF, D);
7300 // Emit calculation of the iterations count.
7301 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
7302 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
7303 /*isSigned=*/false);
7304 return NumIterations;
7305 }
7306 return nullptr;
7307 };
7308 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
7309 SizeEmitter);
7310}
7311
7313 PrePostActionTy &Action) {
7314 Action.Enter(CGF);
7315 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7316 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
7317 CGF.EmitOMPPrivateClause(S, PrivateScope);
7318 (void)PrivateScope.Privatize();
7319 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
7321
7322 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
7323 CGF.EnsureInsertPoint();
7324}
7325
7327 StringRef ParentName,
7328 const OMPTargetDirective &S) {
7329 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7330 emitTargetRegion(CGF, S, Action);
7331 };
7332 llvm::Function *Fn;
7333 llvm::Constant *Addr;
7334 // Emit target region as a standalone region.
7335 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7336 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7337 assert(Fn && Addr && "Target device function emission failed.");
7338}
7339
7341 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7342 emitTargetRegion(CGF, S, Action);
7343 };
7345}
7346
7348 const OMPExecutableDirective &S,
7349 OpenMPDirectiveKind InnermostKind,
7350 const RegionCodeGenTy &CodeGen) {
7351 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
7352 llvm::Function *OutlinedFn =
7354 CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind,
7355 CodeGen);
7356
7357 const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
7358 const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
7359 if (NT || TL) {
7360 const Expr *NumTeams = NT ? NT->getNumTeams().front() : nullptr;
7361 const Expr *ThreadLimit = TL ? TL->getThreadLimit().front() : nullptr;
7362
7363 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
7364 S.getBeginLoc());
7365 }
7366
7367 OMPTeamsScope Scope(CGF, S);
7369 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
7370 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
7371 CapturedVars);
7372}
7373
7375 // Emit teams region as a standalone region.
7376 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7377 Action.Enter(CGF);
7378 OMPPrivateScope PrivateScope(CGF);
7379 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
7380 CGF.EmitOMPPrivateClause(S, PrivateScope);
7381 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7382 (void)PrivateScope.Privatize();
7383 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
7384 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7385 };
7386 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
7388 [](CodeGenFunction &) { return nullptr; });
7389}
7390
7392 const OMPTargetTeamsDirective &S) {
7393 auto *CS = S.getCapturedStmt(OMPD_teams);
7394 Action.Enter(CGF);
7395 // Emit teams region as a standalone region.
7396 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
7397 Action.Enter(CGF);
7398 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7399 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
7400 CGF.EmitOMPPrivateClause(S, PrivateScope);
7401 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7402 (void)PrivateScope.Privatize();
7403 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
7405 CGF.EmitStmt(CS->getCapturedStmt());
7406 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7407 };
7408 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
7410 [](CodeGenFunction &) { return nullptr; });
7411}
7412
7414 CodeGenModule &CGM, StringRef ParentName,
7415 const OMPTargetTeamsDirective &S) {
7416 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7417 emitTargetTeamsRegion(CGF, Action, S);
7418 };
7419 llvm::Function *Fn;
7420 llvm::Constant *Addr;
7421 // Emit target region as a standalone region.
7422 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7423 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7424 assert(Fn && Addr && "Target device function emission failed.");
7425}
7426
7428 const OMPTargetTeamsDirective &S) {
7429 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7430 emitTargetTeamsRegion(CGF, Action, S);
7431 };
7433}
7434
7435static void
7438 Action.Enter(CGF);
7439 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7441 };
7442
7443 // Emit teams region as a standalone region.
7444 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7445 PrePostActionTy &Action) {
7446 Action.Enter(CGF);
7447 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7448 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7449 (void)PrivateScope.Privatize();
7450 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
7451 CodeGenDistribute);
7452 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7453 };
7454 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
7456 [](CodeGenFunction &) { return nullptr; });
7457}
7458
7460 CodeGenModule &CGM, StringRef ParentName,
7462 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7463 emitTargetTeamsDistributeRegion(CGF, Action, S);
7464 };
7465 llvm::Function *Fn;
7466 llvm::Constant *Addr;
7467 // Emit target region as a standalone region.
7468 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7469 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7470 assert(Fn && Addr && "Target device function emission failed.");
7471}
7472
7475 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7476 emitTargetTeamsDistributeRegion(CGF, Action, S);
7477 };
7479}
7480
7482 CodeGenFunction &CGF, PrePostActionTy &Action,
7484 Action.Enter(CGF);
7485 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7487 };
7488
7489 // Emit teams region as a standalone region.
7490 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7491 PrePostActionTy &Action) {
7492 Action.Enter(CGF);
7493 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7494 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7495 (void)PrivateScope.Privatize();
7496 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
7497 CodeGenDistribute);
7498 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7499 };
7500 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
7502 [](CodeGenFunction &) { return nullptr; });
7503}
7504
7506 CodeGenModule &CGM, StringRef ParentName,
7508 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7510 };
7511 llvm::Function *Fn;
7512 llvm::Constant *Addr;
7513 // Emit target region as a standalone region.
7514 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7515 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7516 assert(Fn && Addr && "Target device function emission failed.");
7517}
7518
7521 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7523 };
7525}
7526
7528 const OMPTeamsDistributeDirective &S) {
7529
7530 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7532 };
7533
7534 // Emit teams region as a standalone region.
7535 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7536 PrePostActionTy &Action) {
7537 Action.Enter(CGF);
7538 OMPPrivateScope PrivateScope(CGF);
7539 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7540 (void)PrivateScope.Privatize();
7541 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
7542 CodeGenDistribute);
7543 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7544 };
7545 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
7547 [](CodeGenFunction &) { return nullptr; });
7548}
7549
7552 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7554 };
7555
7556 // Emit teams region as a standalone region.
7557 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7558 PrePostActionTy &Action) {
7559 Action.Enter(CGF);
7560 OMPPrivateScope PrivateScope(CGF);
7561 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7562 (void)PrivateScope.Privatize();
7563 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
7564 CodeGenDistribute);
7565 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7566 };
7567 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
7569 [](CodeGenFunction &) { return nullptr; });
7570}
7571
7574 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7576 S.getDistInc());
7577 };
7578
7579 // Emit teams region as a standalone region.
7580 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7581 PrePostActionTy &Action) {
7582 Action.Enter(CGF);
7583 OMPPrivateScope PrivateScope(CGF);
7584 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7585 (void)PrivateScope.Privatize();
7586 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
7587 CodeGenDistribute);
7588 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7589 };
7590 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
7592 [](CodeGenFunction &) { return nullptr; });
7593}
7594
7597 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7599 S.getDistInc());
7600 };
7601
7602 // Emit teams region as a standalone region.
7603 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7604 PrePostActionTy &Action) {
7605 Action.Enter(CGF);
7606 OMPPrivateScope PrivateScope(CGF);
7607 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7608 (void)PrivateScope.Privatize();
7610 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7611 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7612 };
7613 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd,
7614 CodeGen);
7616 [](CodeGenFunction &) { return nullptr; });
7617}
7618
7620 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7621 llvm::Value *Device = nullptr;
7622 llvm::Value *NumDependences = nullptr;
7623 llvm::Value *DependenceList = nullptr;
7624
7625 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7626 Device = EmitScalarExpr(C->getDevice());
7627
7628 // Build list and emit dependences
7631 if (!Data.Dependences.empty()) {
7632 Address DependenciesArray = Address::invalid();
7633 std::tie(NumDependences, DependenciesArray) =
7634 CGM.getOpenMPRuntime().emitDependClause(*this, Data.Dependences,
7635 S.getBeginLoc());
7636 DependenceList = DependenciesArray.emitRawPointer(*this);
7637 }
7638 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
7639
7640 assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() ||
7641 S.getSingleClause<OMPDestroyClause>() ||
7642 S.getSingleClause<OMPUseClause>())) &&
7643 "OMPNowaitClause clause is used separately in OMPInteropDirective.");
7644
7645 auto ItOMPInitClause = S.getClausesOfKind<OMPInitClause>();
7646 if (!ItOMPInitClause.empty()) {
7647 // Look at the multiple init clauses
7648 for (const OMPInitClause *C : ItOMPInitClause) {
7649 llvm::Value *InteropvarPtr =
7650 EmitLValue(C->getInteropVar()).getPointer(*this);
7651 llvm::omp::OMPInteropType InteropType =
7652 llvm::omp::OMPInteropType::Unknown;
7653 if (C->getIsTarget()) {
7654 InteropType = llvm::omp::OMPInteropType::Target;
7655 } else {
7656 assert(C->getIsTargetSync() &&
7657 "Expected interop-type target/targetsync");
7658 InteropType = llvm::omp::OMPInteropType::TargetSync;
7659 }
7660 OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType,
7661 Device, NumDependences, DependenceList,
7662 Data.HasNowaitClause);
7663 }
7664 }
7665 auto ItOMPDestroyClause = S.getClausesOfKind<OMPDestroyClause>();
7666 if (!ItOMPDestroyClause.empty()) {
7667 // Look at the multiple destroy clauses
7668 for (const OMPDestroyClause *C : ItOMPDestroyClause) {
7669 llvm::Value *InteropvarPtr =
7670 EmitLValue(C->getInteropVar()).getPointer(*this);
7671 OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device,
7672 NumDependences, DependenceList,
7673 Data.HasNowaitClause);
7674 }
7675 }
7676 auto ItOMPUseClause = S.getClausesOfKind<OMPUseClause>();
7677 if (!ItOMPUseClause.empty()) {
7678 // Look at the multiple use clauses
7679 for (const OMPUseClause *C : ItOMPUseClause) {
7680 llvm::Value *InteropvarPtr =
7681 EmitLValue(C->getInteropVar()).getPointer(*this);
7682 OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device,
7683 NumDependences, DependenceList,
7684 Data.HasNowaitClause);
7685 }
7686 }
7687}
7688
7691 PrePostActionTy &Action) {
7692 Action.Enter(CGF);
7693 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7695 S.getDistInc());
7696 };
7697
7698 // Emit teams region as a standalone region.
7699 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7700 PrePostActionTy &Action) {
7701 Action.Enter(CGF);
7702 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7703 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7704 (void)PrivateScope.Privatize();
7706 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7707 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7708 };
7709
7710 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
7711 CodeGenTeams);
7713 [](CodeGenFunction &) { return nullptr; });
7714}
7715
7717 CodeGenModule &CGM, StringRef ParentName,
7719 // Emit SPMD target teams distribute parallel for region as a standalone
7720 // region.
7721 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7723 };
7724 llvm::Function *Fn;
7725 llvm::Constant *Addr;
7726 // Emit target region as a standalone region.
7727 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7728 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7729 assert(Fn && Addr && "Target device function emission failed.");
7730}
7731
7739
7741 CodeGenFunction &CGF,
7743 PrePostActionTy &Action) {
7744 Action.Enter(CGF);
7745 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7747 S.getDistInc());
7748 };
7749
7750 // Emit teams region as a standalone region.
7751 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7752 PrePostActionTy &Action) {
7753 Action.Enter(CGF);
7754 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7755 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7756 (void)PrivateScope.Privatize();
7758 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7759 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7760 };
7761
7762 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
7763 CodeGenTeams);
7765 [](CodeGenFunction &) { return nullptr; });
7766}
7767
7769 CodeGenModule &CGM, StringRef ParentName,
7771 // Emit SPMD target teams distribute parallel for simd region as a standalone
7772 // region.
7773 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7775 };
7776 llvm::Function *Fn;
7777 llvm::Constant *Addr;
7778 // Emit target region as a standalone region.
7779 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7780 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7781 assert(Fn && Addr && "Target device function emission failed.");
7782}
7783
7791
7794 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
7795 S.getCancelRegion());
7796}
7797
7799 const Expr *IfCond = nullptr;
7800 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7801 if (C->getNameModifier() == OMPD_unknown ||
7802 C->getNameModifier() == OMPD_cancel) {
7803 IfCond = C->getCondition();
7804 break;
7805 }
7806 }
7807 if (CGM.getLangOpts().OpenMPIRBuilder) {
7808 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7809 // TODO: This check is necessary as we only generate `omp parallel` through
7810 // the OpenMPIRBuilder for now.
7811 if (S.getCancelRegion() == OMPD_parallel ||
7812 S.getCancelRegion() == OMPD_sections ||
7813 S.getCancelRegion() == OMPD_section) {
7814 llvm::Value *IfCondition = nullptr;
7815 if (IfCond)
7816 IfCondition = EmitScalarExpr(IfCond,
7817 /*IgnoreResultAssign=*/true);
7818 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
7819 OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion()));
7820 return Builder.restoreIP(AfterIP);
7821 }
7822 }
7823
7824 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
7825 S.getCancelRegion());
7826}
7827
7830 if (Kind == OMPD_parallel || Kind == OMPD_task ||
7831 Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
7832 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
7833 return ReturnBlock;
7834 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
7835 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
7836 Kind == OMPD_distribute_parallel_for ||
7837 Kind == OMPD_target_parallel_for ||
7838 Kind == OMPD_teams_distribute_parallel_for ||
7839 Kind == OMPD_target_teams_distribute_parallel_for);
7840 return OMPCancelStack.getExitBlock();
7841}
7842
7844 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
7845 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7846 CaptureDeviceAddrMap) {
7847 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7848 for (const Expr *OrigVarIt : C.varlist()) {
7849 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(OrigVarIt)->getDecl());
7850 if (!Processed.insert(OrigVD).second)
7851 continue;
7852
7853 // In order to identify the right initializer we need to match the
7854 // declaration used by the mapping logic. In some cases we may get
7855 // OMPCapturedExprDecl that refers to the original declaration.
7856 const ValueDecl *MatchingVD = OrigVD;
7857 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
7858 // OMPCapturedExprDecl are used to privative fields of the current
7859 // structure.
7860 const auto *ME = cast<MemberExpr>(OED->getInit());
7861 assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) &&
7862 "Base should be the current struct!");
7863 MatchingVD = ME->getMemberDecl();
7864 }
7865
7866 // If we don't have information about the current list item, move on to
7867 // the next one.
7868 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
7869 if (InitAddrIt == CaptureDeviceAddrMap.end())
7870 continue;
7871
7872 llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType());
7873
7874 // Return the address of the private variable.
7875 bool IsRegistered = PrivateScope.addPrivate(
7876 OrigVD,
7877 Address(InitAddrIt->second, Ty,
7878 getContext().getTypeAlignInChars(getContext().VoidPtrTy)));
7879 assert(IsRegistered && "firstprivate var already registered as private");
7880 // Silence the warning about unused variable.
7881 (void)IsRegistered;
7882 }
7883}
7884
7885static const VarDecl *getBaseDecl(const Expr *Ref) {
7886 const Expr *Base = Ref->IgnoreParenImpCasts();
7887 while (const auto *OASE = dyn_cast<ArraySectionExpr>(Base))
7888 Base = OASE->getBase()->IgnoreParenImpCasts();
7889 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
7890 Base = ASE->getBase()->IgnoreParenImpCasts();
7891 return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl());
7892}
7893
7895 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
7896 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7897 CaptureDeviceAddrMap) {
7898 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7899 for (const Expr *Ref : C.varlist()) {
7900 const VarDecl *OrigVD = getBaseDecl(Ref);
7901 if (!Processed.insert(OrigVD).second)
7902 continue;
7903 // In order to identify the right initializer we need to match the
7904 // declaration used by the mapping logic. In some cases we may get
7905 // OMPCapturedExprDecl that refers to the original declaration.
7906 const ValueDecl *MatchingVD = OrigVD;
7907 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
7908 // OMPCapturedExprDecl are used to privative fields of the current
7909 // structure.
7910 const auto *ME = cast<MemberExpr>(OED->getInit());
7911 assert(isa<CXXThisExpr>(ME->getBase()) &&
7912 "Base should be the current struct!");
7913 MatchingVD = ME->getMemberDecl();
7914 }
7915
7916 // If we don't have information about the current list item, move on to
7917 // the next one.
7918 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
7919 if (InitAddrIt == CaptureDeviceAddrMap.end())
7920 continue;
7921
7922 llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType());
7923
7924 Address PrivAddr =
7925 Address(InitAddrIt->second, Ty,
7926 getContext().getTypeAlignInChars(getContext().VoidPtrTy));
7927 // For declrefs and variable length array need to load the pointer for
7928 // correct mapping, since the pointer to the data was passed to the runtime.
7929 if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
7930 MatchingVD->getType()->isArrayType()) {
7932 OrigVD->getType().getNonReferenceType());
7933 PrivAddr =
7935 PtrTy->castAs<PointerType>());
7936 }
7937
7938 (void)PrivateScope.addPrivate(OrigVD, PrivAddr);
7939 }
7940}
7941
7942// Generate the instructions for '#pragma omp target data' directive.
7944 const OMPTargetDataDirective &S) {
7945 // Emit vtable only from host for target data directive.
7946 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
7947 CGM.getOpenMPRuntime().registerVTable(S);
7948
7949 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
7950 /*SeparateBeginEndCalls=*/true);
7951
7952 // Create a pre/post action to signal the privatization of the device pointer.
7953 // This action can be replaced by the OpenMP runtime code generation to
7954 // deactivate privatization.
7955 bool PrivatizeDevicePointers = false;
7956 class DevicePointerPrivActionTy : public PrePostActionTy {
7957 bool &PrivatizeDevicePointers;
7958
7959 public:
7960 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
7961 : PrivatizeDevicePointers(PrivatizeDevicePointers) {}
7962 void Enter(CodeGenFunction &CGF) override {
7963 PrivatizeDevicePointers = true;
7964 }
7965 };
7966 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
7967
7968 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7969 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7970 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
7971 };
7972
7973 // Codegen that selects whether to generate the privatization code or not.
7974 auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7975 RegionCodeGenTy RCG(InnermostCodeGen);
7976 PrivatizeDevicePointers = false;
7977
7978 // Call the pre-action to change the status of PrivatizeDevicePointers if
7979 // needed.
7980 Action.Enter(CGF);
7981
7982 if (PrivatizeDevicePointers) {
7983 OMPPrivateScope PrivateScope(CGF);
7984 // Emit all instances of the use_device_ptr clause.
7985 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7986 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
7988 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7989 CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
7991 (void)PrivateScope.Privatize();
7992 RCG(CGF);
7993 } else {
7994 // If we don't have target devices, don't bother emitting the data
7995 // mapping code.
7996 std::optional<OpenMPDirectiveKind> CaptureRegion;
7997 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7998 // Emit helper decls of the use_device_ptr/use_device_addr clauses.
7999 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
8000 for (const Expr *E : C->varlist()) {
8001 const Decl *D = cast<DeclRefExpr>(E)->getDecl();
8002 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
8003 CGF.EmitVarDecl(*OED);
8004 }
8005 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
8006 for (const Expr *E : C->varlist()) {
8007 const Decl *D = getBaseDecl(E);
8008 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
8009 CGF.EmitVarDecl(*OED);
8010 }
8011 } else {
8012 CaptureRegion = OMPD_unknown;
8013 }
8014
8015 OMPLexicalScope Scope(CGF, S, CaptureRegion);
8016 RCG(CGF);
8017 }
8018 };
8019
8020 // Forward the provided action to the privatization codegen.
8021 RegionCodeGenTy PrivRCG(PrivCodeGen);
8022 PrivRCG.setAction(Action);
8023
8024 // Notwithstanding the body of the region is emitted as inlined directive,
8025 // we don't use an inline scope as changes in the references inside the
8026 // region are expected to be visible outside, so we do not privative them.
8027 OMPLexicalScope Scope(CGF, S);
8028 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
8029 PrivRCG);
8030 };
8031
8033
8034 // If we don't have target devices, don't bother emitting the data mapping
8035 // code.
8036 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
8037 RCG(*this);
8038 return;
8039 }
8040
8041 // Check if we have any if clause associated with the directive.
8042 const Expr *IfCond = nullptr;
8043 if (const auto *C = S.getSingleClause<OMPIfClause>())
8044 IfCond = C->getCondition();
8045
8046 // Check if we have any device clause associated with the directive.
8047 const Expr *Device = nullptr;
8048 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
8049 Device = C->getDevice();
8050
8051 // Set the action to signal privatization of device pointers.
8052 RCG.setAction(PrivAction);
8053
8054 // Emit region code.
8055 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
8056 Info);
8057}
8058
8060 const OMPTargetEnterDataDirective &S) {
8061 // If we don't have target devices, don't bother emitting the data mapping
8062 // code.
8063 if (CGM.getLangOpts().OMPTargetTriples.empty())
8064 return;
8065
8066 // Check if we have any if clause associated with the directive.
8067 const Expr *IfCond = nullptr;
8068 if (const auto *C = S.getSingleClause<OMPIfClause>())
8069 IfCond = C->getCondition();
8070
8071 // Check if we have any device clause associated with the directive.
8072 const Expr *Device = nullptr;
8073 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
8074 Device = C->getDevice();
8075
8076 OMPLexicalScope Scope(*this, S, OMPD_task);
8077 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
8078}
8079
8081 const OMPTargetExitDataDirective &S) {
8082 // If we don't have target devices, don't bother emitting the data mapping
8083 // code.
8084 if (CGM.getLangOpts().OMPTargetTriples.empty())
8085 return;
8086
8087 // Check if we have any if clause associated with the directive.
8088 const Expr *IfCond = nullptr;
8089 if (const auto *C = S.getSingleClause<OMPIfClause>())
8090 IfCond = C->getCondition();
8091
8092 // Check if we have any device clause associated with the directive.
8093 const Expr *Device = nullptr;
8094 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
8095 Device = C->getDevice();
8096
8097 OMPLexicalScope Scope(*this, S, OMPD_task);
8098 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
8099}
8100
8103 PrePostActionTy &Action) {
8104 // Get the captured statement associated with the 'parallel' region.
8105 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
8106 Action.Enter(CGF);
8107 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
8108 Action.Enter(CGF);
8109 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8110 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
8111 CGF.EmitOMPPrivateClause(S, PrivateScope);
8112 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
8113 (void)PrivateScope.Privatize();
8114 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
8116 // TODO: Add support for clauses.
8117 CGF.EmitStmt(CS->getCapturedStmt());
8118 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
8119 };
8120 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
8123 [](CodeGenFunction &) { return nullptr; });
8124}
8125
8127 CodeGenModule &CGM, StringRef ParentName,
8128 const OMPTargetParallelDirective &S) {
8129 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8130 emitTargetParallelRegion(CGF, S, Action);
8131 };
8132 llvm::Function *Fn;
8133 llvm::Constant *Addr;
8134 // Emit target region as a standalone region.
8135 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8136 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
8137 assert(Fn && Addr && "Target device function emission failed.");
8138}
8139
8141 const OMPTargetParallelDirective &S) {
8142 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8143 emitTargetParallelRegion(CGF, S, Action);
8144 };
8146}
8147
8150 PrePostActionTy &Action) {
8151 Action.Enter(CGF);
8152 // Emit directive as a combined directive that consists of two implicit
8153 // directives: 'parallel' with 'for' directive.
8154 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8155 Action.Enter(CGF);
8157 CGF, OMPD_target_parallel_for, S.hasCancel());
8158 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
8160 };
8161 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
8163}
8164
8166 CodeGenModule &CGM, StringRef ParentName,
8168 // Emit SPMD target parallel for region as a standalone region.
8169 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8170 emitTargetParallelForRegion(CGF, S, Action);
8171 };
8172 llvm::Function *Fn;
8173 llvm::Constant *Addr;
8174 // Emit target region as a standalone region.
8175 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8176 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
8177 assert(Fn && Addr && "Target device function emission failed.");
8178}
8179
8182 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8183 emitTargetParallelForRegion(CGF, S, Action);
8184 };
8186}
8187
8188static void
8191 PrePostActionTy &Action) {
8192 Action.Enter(CGF);
8193 // Emit directive as a combined directive that consists of two implicit
8194 // directives: 'parallel' with 'for' directive.
8195 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8196 Action.Enter(CGF);
8197 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
8199 };
8200 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
8202}
8203
8205 CodeGenModule &CGM, StringRef ParentName,
8207 // Emit SPMD target parallel for region as a standalone region.
8208 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8209 emitTargetParallelForSimdRegion(CGF, S, Action);
8210 };
8211 llvm::Function *Fn;
8212 llvm::Constant *Addr;
8213 // Emit target region as a standalone region.
8214 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8215 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
8216 assert(Fn && Addr && "Target device function emission failed.");
8217}
8218
8221 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8222 emitTargetParallelForSimdRegion(CGF, S, Action);
8223 };
8225}
8226
8227/// Emit a helper variable and return corresponding lvalue.
8228static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
8229 const ImplicitParamDecl *PVD,
8231 const auto *VDecl = cast<VarDecl>(Helper->getDecl());
8232 Privates.addPrivate(VDecl, CGF.GetAddrOfLocalVar(PVD));
8233}
8234
8236 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
8237 // Emit outlined function for task construct.
8238 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
8239 Address CapturedStruct = Address::invalid();
8240 {
8241 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
8242 CapturedStruct = GenerateCapturedStmtArgument(*CS);
8243 }
8244 CanQualType SharedsTy =
8246 const Expr *IfCond = nullptr;
8247 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
8248 if (C->getNameModifier() == OMPD_unknown ||
8249 C->getNameModifier() == OMPD_taskloop) {
8250 IfCond = C->getCondition();
8251 break;
8252 }
8253 }
8254
8256 // Check if taskloop must be emitted without taskgroup.
8257 Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
8258 // TODO: Check if we should emit tied or untied task.
8259 Data.Tied = true;
8260 // Set scheduling for taskloop
8261 if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) {
8262 // grainsize clause
8263 Data.Schedule.setInt(/*IntVal=*/false);
8264 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
8265 Data.HasModifier =
8266 (Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true : false;
8267 } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
8268 // num_tasks clause
8269 Data.Schedule.setInt(/*IntVal=*/true);
8270 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
8271 Data.HasModifier =
8272 (Clause->getModifier() == OMPC_NUMTASKS_strict) ? true : false;
8273 }
8274
8275 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
8276 // if (PreCond) {
8277 // for (IV in 0..LastIteration) BODY;
8278 // <Final counter/linear vars updates>;
8279 // }
8280 //
8281
8282 // Emit: if (PreCond) - begin.
8283 // If the condition constant folds and can be elided, avoid emitting the
8284 // whole loop.
8285 bool CondConstant;
8286 llvm::BasicBlock *ContBlock = nullptr;
8287 OMPLoopScope PreInitScope(CGF, S);
8288 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
8289 if (!CondConstant)
8290 return;
8291 } else {
8292 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
8293 ContBlock = CGF.createBasicBlock("taskloop.if.end");
8294 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
8295 CGF.getProfileCount(&S));
8296 CGF.EmitBlock(ThenBlock);
8297 CGF.incrementProfileCounter(&S);
8298 }
8299
8300 (void)CGF.EmitOMPLinearClauseInit(S);
8301
8302 OMPPrivateScope LoopScope(CGF);
8303 // Emit helper vars inits.
8304 enum { LowerBound = 5, UpperBound, Stride, LastIter };
8305 auto *I = CS->getCapturedDecl()->param_begin();
8306 auto *LBP = std::next(I, LowerBound);
8307 auto *UBP = std::next(I, UpperBound);
8308 auto *STP = std::next(I, Stride);
8309 auto *LIP = std::next(I, LastIter);
8310 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
8311 LoopScope);
8312 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
8313 LoopScope);
8314 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
8315 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
8316 LoopScope);
8317 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
8318 CGF.EmitOMPLinearClause(S, LoopScope);
8319 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
8320 (void)LoopScope.Privatize();
8321 // Emit the loop iteration variable.
8322 const Expr *IVExpr = S.getIterationVariable();
8323 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
8324 CGF.EmitVarDecl(*IVDecl);
8325 CGF.EmitIgnoredExpr(S.getInit());
8326
8327 // Emit the iterations count variable.
8328 // If it is not a variable, Sema decided to calculate iterations count on
8329 // each iteration (e.g., it is foldable into a constant).
8330 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
8331 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
8332 // Emit calculation of the iterations count.
8333 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
8334 }
8335
8336 {
8337 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
8339 CGF, S,
8340 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8341 if (isOpenMPSimdDirective(S.getDirectiveKind()))
8342 CGF.EmitOMPSimdInit(S);
8343 },
8344 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
8345 CGF.EmitOMPInnerLoop(
8346 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
8347 [&S](CodeGenFunction &CGF) {
8348 emitOMPLoopBodyWithStopPoint(CGF, S,
8349 CodeGenFunction::JumpDest());
8350 },
8351 [](CodeGenFunction &) {});
8352 });
8353 }
8354 // Emit: if (PreCond) - end.
8355 if (ContBlock) {
8356 CGF.EmitBranch(ContBlock);
8357 CGF.EmitBlock(ContBlock, true);
8358 }
8359 // Emit final copy of the lastprivate variables if IsLastIter != 0.
8360 if (HasLastprivateClause) {
8361 CGF.EmitOMPLastprivateClauseFinal(
8362 S, isOpenMPSimdDirective(S.getDirectiveKind()),
8363 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
8364 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
8365 (*LIP)->getType(), S.getBeginLoc())));
8366 }
8367 LoopScope.restoreMap();
8368 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) {
8369 return CGF.Builder.CreateIsNotNull(
8370 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
8371 (*LIP)->getType(), S.getBeginLoc()));
8372 });
8373 };
8374 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
8375 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
8376 const OMPTaskDataTy &Data) {
8377 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
8378 &Data](CodeGenFunction &CGF, PrePostActionTy &) {
8379 OMPLoopScope PreInitScope(CGF, S);
8380 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
8381 OutlinedFn, SharedsTy,
8382 CapturedStruct, IfCond, Data);
8383 };
8384 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
8385 CodeGen);
8386 };
8387 if (Data.Nogroup) {
8388 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
8389 } else {
8390 CGM.getOpenMPRuntime().emitTaskgroupRegion(
8391 *this,
8392 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
8393 PrePostActionTy &Action) {
8394 Action.Enter(CGF);
8395 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
8396 Data);
8397 },
8398 S.getBeginLoc());
8399 }
8400}
8401
8407
8409 const OMPTaskLoopSimdDirective &S) {
8410 auto LPCRegion =
8412 OMPLexicalScope Scope(*this, S);
8414}
8415
8417 const OMPMasterTaskLoopDirective &S) {
8418 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8419 Action.Enter(CGF);
8421 };
8422 auto LPCRegion =
8424 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
8425 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
8426}
8427
8429 const OMPMaskedTaskLoopDirective &S) {
8430 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8431 Action.Enter(CGF);
8433 };
8434 auto LPCRegion =
8436 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
8437 CGM.getOpenMPRuntime().emitMaskedRegion(*this, CodeGen, S.getBeginLoc());
8438}
8439
8442 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8443 Action.Enter(CGF);
8445 };
8446 auto LPCRegion =
8448 OMPLexicalScope Scope(*this, S);
8449 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
8450}
8451
8454 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8455 Action.Enter(CGF);
8457 };
8458 auto LPCRegion =
8460 OMPLexicalScope Scope(*this, S);
8461 CGM.getOpenMPRuntime().emitMaskedRegion(*this, CodeGen, S.getBeginLoc());
8462}
8463
8466 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8467 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8468 PrePostActionTy &Action) {
8469 Action.Enter(CGF);
8471 };
8472 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8473 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
8474 S.getBeginLoc());
8475 };
8476 auto LPCRegion =
8478 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen,
8480}
8481
8484 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8485 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8486 PrePostActionTy &Action) {
8487 Action.Enter(CGF);
8489 };
8490 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8491 CGM.getOpenMPRuntime().emitMaskedRegion(CGF, TaskLoopCodeGen,
8492 S.getBeginLoc());
8493 };
8494 auto LPCRegion =
8496 emitCommonOMPParallelDirective(*this, S, OMPD_masked_taskloop, CodeGen,
8498}
8499
8502 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8503 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8504 PrePostActionTy &Action) {
8505 Action.Enter(CGF);
8507 };
8508 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8509 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
8510 S.getBeginLoc());
8511 };
8512 auto LPCRegion =
8514 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen,
8516}
8517
8520 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8521 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8522 PrePostActionTy &Action) {
8523 Action.Enter(CGF);
8525 };
8526 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8527 CGM.getOpenMPRuntime().emitMaskedRegion(CGF, TaskLoopCodeGen,
8528 S.getBeginLoc());
8529 };
8530 auto LPCRegion =
8532 emitCommonOMPParallelDirective(*this, S, OMPD_masked_taskloop_simd, CodeGen,
8534}
8535
8536// Generate the instructions for '#pragma omp target update' directive.
8538 const OMPTargetUpdateDirective &S) {
8539 // If we don't have target devices, don't bother emitting the data mapping
8540 // code.
8541 if (CGM.getLangOpts().OMPTargetTriples.empty())
8542 return;
8543
8544 // Check if we have any if clause associated with the directive.
8545 const Expr *IfCond = nullptr;
8546 if (const auto *C = S.getSingleClause<OMPIfClause>())
8547 IfCond = C->getCondition();
8548
8549 // Check if we have any device clause associated with the directive.
8550 const Expr *Device = nullptr;
8551 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
8552 Device = C->getDevice();
8553
8554 OMPLexicalScope Scope(*this, S, OMPD_task);
8555 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
8556}
8557
8559 const OMPGenericLoopDirective &S) {
8560 // Always expect a bind clause on the loop directive. It it wasn't
8561 // in the source, it should have been added in sema.
8562
8564 if (const auto *C = S.getSingleClause<OMPBindClause>())
8565 BindKind = C->getBindKind();
8566
8567 switch (BindKind) {
8568 case OMPC_BIND_parallel: // for
8569 return emitOMPForDirective(S, *this, CGM, /*HasCancel=*/false);
8570 case OMPC_BIND_teams: // distribute
8571 return emitOMPDistributeDirective(S, *this, CGM);
8572 case OMPC_BIND_thread: // simd
8573 return emitOMPSimdDirective(S, *this, CGM);
8574 case OMPC_BIND_unknown:
8575 break;
8576 }
8577
8578 // Unimplemented, just inline the underlying statement for now.
8579 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8580 // Emit the loop iteration variable.
8581 const Stmt *CS =
8582 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
8583 const auto *ForS = dyn_cast<ForStmt>(CS);
8584 if (ForS && !isa<DeclStmt>(ForS->getInit())) {
8585 OMPPrivateScope LoopScope(CGF);
8586 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
8587 (void)LoopScope.Privatize();
8588 CGF.EmitStmt(CS);
8589 LoopScope.restoreMap();
8590 } else {
8591 CGF.EmitStmt(CS);
8592 }
8593 };
8594 OMPLexicalScope Scope(*this, S, OMPD_unknown);
8595 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen);
8596}
8597
8599 const OMPLoopDirective &S) {
8600 // Emit combined directive as if its constituent constructs are 'parallel'
8601 // and 'for'.
8602 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8603 Action.Enter(CGF);
8604 emitOMPCopyinClause(CGF, S);
8605 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
8606 };
8607 {
8608 auto LPCRegion =
8610 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
8612 }
8613 // Check for outer lastprivate conditional update.
8615}
8616
8619 // To be consistent with current behavior of 'target teams loop', emit
8620 // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'.
8621 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8623 };
8624
8625 // Emit teams region as a standalone region.
8626 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8627 PrePostActionTy &Action) {
8628 Action.Enter(CGF);
8629 OMPPrivateScope PrivateScope(CGF);
8630 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
8631 (void)PrivateScope.Privatize();
8632 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
8633 CodeGenDistribute);
8634 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
8635 };
8636 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
8638 [](CodeGenFunction &) { return nullptr; });
8639}
8640
8641#ifndef NDEBUG
8643 std::string StatusMsg,
8644 const OMPExecutableDirective &D) {
8645 bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice;
8646 if (IsDevice)
8647 StatusMsg += ": DEVICE";
8648 else
8649 StatusMsg += ": HOST";
8650 SourceLocation L = D.getBeginLoc();
8651 auto &SM = CGF.getContext().getSourceManager();
8652 PresumedLoc PLoc = SM.getPresumedLoc(L);
8653 const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr;
8654 unsigned LineNo =
8655 PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L);
8656 llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n";
8657}
8658#endif
8659
8661 CodeGenFunction &CGF, PrePostActionTy &Action,
8663 Action.Enter(CGF);
8664 // Emit 'teams loop' as if its constituent constructs are 'distribute,
8665 // 'parallel, and 'for'.
8666 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8668 S.getDistInc());
8669 };
8670
8671 // Emit teams region as a standalone region.
8672 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8673 PrePostActionTy &Action) {
8674 Action.Enter(CGF);
8675 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8676 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
8677 (void)PrivateScope.Privatize();
8679 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
8680 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
8681 };
8682 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8684 CGF, TTL_CODEGEN_TYPE " as parallel for", S));
8685 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
8686 CodeGenTeams);
8688 [](CodeGenFunction &) { return nullptr; });
8689}
8690
8692 CodeGenFunction &CGF, PrePostActionTy &Action,
8694 Action.Enter(CGF);
8695 // Emit 'teams loop' as if its constituent construct is 'distribute'.
8696 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8698 };
8699
8700 // Emit teams region as a standalone region.
8701 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8702 PrePostActionTy &Action) {
8703 Action.Enter(CGF);
8704 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8705 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
8706 (void)PrivateScope.Privatize();
8708 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
8709 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
8710 };
8711 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8713 CGF, TTL_CODEGEN_TYPE " as distribute", S));
8714 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
8716 [](CodeGenFunction &) { return nullptr; });
8717}
8718
8721 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8722 if (S.canBeParallelFor())
8724 else
8726 };
8728}
8729
8731 CodeGenModule &CGM, StringRef ParentName,
8733 // Emit SPMD target parallel loop region as a standalone region.
8734 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8735 if (S.canBeParallelFor())
8737 else
8739 };
8740 llvm::Function *Fn;
8741 llvm::Constant *Addr;
8742 // Emit target region as a standalone region.
8743 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8744 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
8745 assert(Fn && Addr &&
8746 "Target device function emission failed for 'target teams loop'.");
8747}
8748
8751 PrePostActionTy &Action) {
8752 Action.Enter(CGF);
8753 // Emit as 'parallel for'.
8754 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8755 Action.Enter(CGF);
8757 CGF, OMPD_target_parallel_loop, /*hasCancel=*/false);
8758 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
8760 };
8761 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
8763}
8764
8766 CodeGenModule &CGM, StringRef ParentName,
8768 // Emit target parallel loop region as a standalone region.
8769 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8771 };
8772 llvm::Function *Fn;
8773 llvm::Constant *Addr;
8774 // Emit target region as a standalone region.
8775 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8776 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
8777 assert(Fn && Addr && "Target device function emission failed.");
8778}
8779
8780/// Emit combined directive 'target parallel loop' as if its constituent
8781/// constructs are 'target', 'parallel', and 'for'.
8784 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8786 };
8788}
8789
8791 const OMPExecutableDirective &D) {
8792 if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
8794 return;
8795 }
8796 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
8797 return;
8798 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
8799 OMPPrivateScope GlobalsScope(CGF);
8800 if (isOpenMPTaskingDirective(D.getDirectiveKind())) {
8801 // Capture global firstprivates to avoid crash.
8802 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
8803 for (const Expr *Ref : C->varlist()) {
8804 const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
8805 if (!DRE)
8806 continue;
8807 const auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
8808 if (!VD || VD->hasLocalStorage())
8809 continue;
8810 if (!CGF.LocalDeclMap.count(VD)) {
8811 LValue GlobLVal = CGF.EmitLValue(Ref);
8812 GlobalsScope.addPrivate(VD, GlobLVal.getAddress());
8813 }
8814 }
8815 }
8816 }
8817 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
8818 (void)GlobalsScope.Privatize();
8819 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
8821 } else {
8822 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
8823 for (const Expr *E : LD->counters()) {
8824 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
8825 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
8826 LValue GlobLVal = CGF.EmitLValue(E);
8827 GlobalsScope.addPrivate(VD, GlobLVal.getAddress());
8828 }
8829 if (isa<OMPCapturedExprDecl>(VD)) {
8830 // Emit only those that were not explicitly referenced in clauses.
8831 if (!CGF.LocalDeclMap.count(VD))
8832 CGF.EmitVarDecl(*VD);
8833 }
8834 }
8835 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
8836 if (!C->getNumForLoops())
8837 continue;
8838 for (unsigned I = LD->getLoopsNumber(),
8839 E = C->getLoopNumIterations().size();
8840 I < E; ++I) {
8841 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
8842 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
8843 // Emit only those that were not explicitly referenced in clauses.
8844 if (!CGF.LocalDeclMap.count(VD))
8845 CGF.EmitVarDecl(*VD);
8846 }
8847 }
8848 }
8849 }
8850 (void)GlobalsScope.Privatize();
8851 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
8852 }
8853 };
8854 if (D.getDirectiveKind() == OMPD_atomic ||
8855 D.getDirectiveKind() == OMPD_critical ||
8856 D.getDirectiveKind() == OMPD_section ||
8857 D.getDirectiveKind() == OMPD_master ||
8858 D.getDirectiveKind() == OMPD_masked ||
8859 D.getDirectiveKind() == OMPD_unroll ||
8860 D.getDirectiveKind() == OMPD_assume) {
8861 EmitStmt(D.getAssociatedStmt());
8862 } else {
8863 auto LPCRegion =
8865 OMPSimdLexicalScope Scope(*this, D);
8866 CGM.getOpenMPRuntime().emitInlinedDirective(
8867 *this,
8868 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
8869 : D.getDirectiveKind(),
8870 CodeGen);
8871 }
8872 // Check for outer lastprivate conditional update.
8874}
8875
8877 EmitStmt(S.getAssociatedStmt());
8878}
Defines the clang::ASTContext interface.
#define V(N, I)
static bool isAllocatableDecl(const VarDecl *VD)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, PrePostActionTy &Action)
static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, PrePostActionTy &Action)
static const VarDecl * getBaseDecl(const Expr *Ref)
static void emitTargetTeamsGenericLoopRegionAsParallel(CodeGenFunction &CGF, PrePostActionTy &Action, const OMPTargetTeamsGenericLoopDirective &S)
static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, const Expr *X, const Expr *V, SourceLocation Loc)
static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, bool IsPostfixUpdate, const Expr *V, const Expr *X, const Expr *E, const Expr *UE, bool IsXLHSInRHSPart, SourceLocation Loc)
static void emitScanBasedDirective(CodeGenFunction &CGF, const OMPLoopDirective &S, llvm::function_ref< llvm::Value *(CodeGenFunction &)> NumIteratorsGen, llvm::function_ref< void(CodeGenFunction &)> FirstGen, llvm::function_ref< void(CodeGenFunction &)> SecondGen)
Emits the code for the directive with inscan reductions.
static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO, LValue LVal, RValue RVal)
static bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T)
static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, QualType DstType, StringRef Name, LValue AddrLV)
static bool canEmitGPUFusedDistSchedule(const CodeGenModule &CGM, const OMPLoopDirective &S, OpenMPDirectiveKind DKind)
Whether a combined distribute parallel for may use the fused distr_static_chunk + static_chunkone sch...
static void emitDistributeParallelForDistributeInnerBoundParams(CodeGenFunction &CGF, const OMPExecutableDirective &S, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars)
static void emitScanBasedDirectiveFinals(CodeGenFunction &CGF, const OMPLoopDirective &S, llvm::function_ref< llvm::Value *(CodeGenFunction &)> NumIteratorsGen)
Copies final inscan reductions values to the original variables.
static void checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, const OMPExecutableDirective &S)
static std::pair< LValue, LValue > emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S)
The following two functions generate expressions for the loop lower and upper bounds in case of stati...
static void emitTargetParallelForRegion(CodeGenFunction &CGF, const OMPTargetParallelForDirective &S, PrePostActionTy &Action)
static llvm::Function * emitOutlinedFunctionPrologueAggregate(CodeGenFunction &CGF, FunctionArgList &Args, llvm::MapVector< const Decl *, std::pair< const VarDecl *, Address > > &LocalAddrs, llvm::DenseMap< const Decl *, std::pair< const Expr *, llvm::Value * > > &VLASizes, llvm::Value *&CXXThisValue, llvm::Value *&ContextV, const CapturedStmt &CS, SourceLocation Loc, StringRef FunctionName)
static LValue EmitOMPHelperVar(CodeGenFunction &CGF, const DeclRefExpr *Helper)
Emit a helper variable and return corresponding lvalue.
static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, const Expr *X, const Expr *E, const Expr *UE, bool IsXLHSInRHSPart, SourceLocation Loc)
static llvm::Value * convertToScalarValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, QualType DestType, SourceLocation Loc)
static llvm::Function * emitOutlinedOrderedFunction(CodeGenModule &CGM, const CapturedStmt *S, const OMPExecutableDirective &D)
static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount)
static std::pair< bool, RValue > emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, RValue Update, BinaryOperatorKind BO, llvm::AtomicOrdering AO, bool IsXLHSInRHSPart)
static std::pair< LValue, LValue > emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S)
static void emitTargetTeamsGenericLoopRegionAsDistribute(CodeGenFunction &CGF, PrePostActionTy &Action, const OMPTargetTeamsGenericLoopDirective &S)
static void emitTargetParallelRegion(CodeGenFunction &CGF, const OMPTargetParallelDirective &S, PrePostActionTy &Action)
static std::pair< llvm::Value *, llvm::Value * > emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, Address LB, Address UB)
When dealing with dispatch schedules (e.g.
static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S)
static void emitRestoreIP(CodeGenFunction &CGF, const T *C, llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, llvm::OpenMPIRBuilder &OMPBuilder)
static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &S, const RegionCodeGenTy &CodeGen)
static void emitSimdlenSafelenClause(CodeGenFunction &CGF, const OMPExecutableDirective &D)
static void emitAlignedClause(CodeGenFunction &CGF, const OMPExecutableDirective &D)
static bool isSimdSupportedByOpenMPIRBuilder(const OMPLoopDirective &S)
static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, const OMPExecutableDirective &S, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, const CodeGenBoundParametersTy &CodeGenBoundParameters)
static void applyConservativeSimdOrderedDirective(const Stmt &AssociatedStmt, LoopInfoStack &LoopStack)
static bool emitWorksharingDirective(CodeGenFunction &CGF, const OMPLoopDirective &S, bool HasCancel)
static void emitPostUpdateForReductionClause(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::function_ref< llvm::Value *(CodeGenFunction &)> CondGen)
static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, const unsigned IVSize, const bool IVSigned)
static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF, std::string StatusMsg, const OMPExecutableDirective &D)
static bool isForSupportedByOpenMPIRBuilder(const OMPLoopDirective &S, bool HasCancel)
static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF, llvm::AtomicOrdering AO, LValue LVal, SourceLocation Loc)
static std::pair< llvm::Value *, llvm::Value * > emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, Address LB, Address UB)
if the 'for' loop has a dispatch schedule (e.g.
static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO, bool IsPostfixUpdate, const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *UE, const Expr *D, const Expr *CE, bool IsXLHSInRHSPart, bool IsFailOnly, SourceLocation Loc)
#define TTL_CODEGEN_TYPE
static CodeGenFunction::ComplexPairTy convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, QualType DestType, SourceLocation Loc)
static ImplicitParamDecl * createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, QualType Ty, CapturedDecl *CD, SourceLocation Loc)
static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF, const CapturedStmt *S)
Emit a captured statement and return the function as well as its captured closure context.
static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, const OMPLoopDirective &S, CodeGenFunction::JumpDest LoopExit)
static void emitOMPDistributeDirective(const OMPLoopDirective &S, CodeGenFunction &CGF, CodeGenModule &CGM)
static void emitOMPCopyinClause(CodeGenFunction &CGF, const OMPExecutableDirective &S)
static void emitTargetTeamsDistributeParallelForRegion(CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, PrePostActionTy &Action)
static bool hasOrderedDirective(const Stmt *S)
static llvm::CallInst * emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap, llvm::ArrayRef< llvm::Value * > Args)
Emit a call to a previously captured closure.
static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S)
static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, int MaxLevel, int Level=0)
static void emitOMPForDirective(const OMPLoopDirective &S, CodeGenFunction &CGF, CodeGenModule &CGM, bool HasCancel)
static void emitEmptyBoundParameters(CodeGenFunction &, const OMPExecutableDirective &, llvm::SmallVectorImpl< llvm::Value * > &)
static void emitTargetParallelForSimdRegion(CodeGenFunction &CGF, const OMPTargetParallelForSimdDirective &S, PrePostActionTy &Action)
static void emitOMPSimdDirective(const OMPLoopDirective &S, CodeGenFunction &CGF, CodeGenModule &CGM)
static void emitOMPAtomicCompareExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO, const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D, const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly, SourceLocation Loc)
std::pair< llvm::Function *, llvm::Value * > EmittedClosureTy
static OpenMPDirectiveKind getEffectiveDirectiveKind(const OMPExecutableDirective &S)
static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, const OMPTargetTeamsDirective &S)
static void buildDependences(const OMPExecutableDirective &S, OMPTaskDataTy &Data)
static RValue convertToType(CodeGenFunction &CGF, RValue Value, QualType SourceType, QualType ResType, SourceLocation Loc)
static void emitScanBasedDirectiveDecls(CodeGenFunction &CGF, const OMPLoopDirective &S, llvm::function_ref< llvm::Value *(CodeGenFunction &)> NumIteratorsGen)
Emits internal temp array declarations for the directive with inscan reductions.
static void emitTargetTeamsDistributeParallelForSimdRegion(CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForSimdDirective &S, PrePostActionTy &Action)
static void emitTargetTeamsDistributeSimdRegion(CodeGenFunction &CGF, PrePostActionTy &Action, const OMPTargetTeamsDistributeSimdDirective &S)
static llvm::MapVector< llvm::Value *, llvm::Value * > GetAlignedMapping(const OMPLoopDirective &S, CodeGenFunction &CGF)
static llvm::omp::ScheduleKind convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind)
static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, const ImplicitParamDecl *PVD, CodeGenFunction::OMPPrivateScope &Privates)
Emit a helper variable and return corresponding lvalue.
static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, const OMPExecutableDirective &S, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
static void emitTargetParallelGenericLoopRegion(CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S, PrePostActionTy &Action)
static QualType getCanonicalParamType(ASTContext &C, QualType T)
static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, const RegionCodeGenTy &SimdInitGen, const RegionCodeGenTy &BodyCodeGen)
static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, const Twine &Name, llvm::Value *Init=nullptr)
static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, const Expr *X, const Expr *E, SourceLocation Loc)
static llvm::Function * emitOutlinedFunctionPrologue(CodeGenFunction &CGF, FunctionArgList &Args, llvm::MapVector< const Decl *, std::pair< const VarDecl *, Address > > &LocalAddrs, llvm::DenseMap< const Decl *, std::pair< const Expr *, llvm::Value * > > &VLASizes, llvm::Value *&CXXThisValue, const FunctionOptions &FO)
static void emitInnerParallelForWhenCombined(CodeGenFunction &CGF, const OMPLoopDirective &S, CodeGenFunction::JumpDest LoopExit)
static void emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, const OMPTargetTeamsDistributeDirective &S)
This file defines OpenMP nodes for declarative directives.
TokenType getType() const
Returns the token's type, e.g.
FormatToken * Next
The next token in the unwrapped line.
static const Decl * getCanonicalDecl(const Decl *D)
#define X(type, name)
Definition Value.h:97
#define SM(sm)
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
Defines the PrettyStackTraceEntry class, which is used to make crashes give more contextual informati...
Defines the SourceManager interface.
This file defines OpenMP AST classes for executable directives and clauses.
This represents clause 'aligned' in the 'pragma omp ...' directives.
This represents 'bind' clause in the 'pragma omp ...' directives.
This represents 'pragma omp cancel' directive.
OpenMPDirectiveKind getCancelRegion() const
Get cancellation region for the current cancellation point.
This represents 'pragma omp cancellation point' directive.
OpenMPDirectiveKind getCancelRegion() const
Get cancellation region for the current cancellation point.
The base class for all transformation directives of canonical loop sequences (currently only 'fuse')
This represents clause 'copyin' in the 'pragma omp ...' directives.
This represents clause 'copyprivate' in the 'pragma omp ...' directives.
This represents implicit clause 'depend' for the 'pragma omp task' directive.
This represents implicit clause 'depobj' for the 'pragma omp depobj' directive. This clause does not ...
This represents 'destroy' clause in the 'pragma omp depobj' directive or the 'pragma omp interop' dir...
This represents 'device' clause in the 'pragma omp ...' directive.
This represents 'dist_schedule' clause in the 'pragma omp ...' directive.
This represents 'pragma omp distribute' directive.
This represents 'pragma omp distribute parallel for' composite directive.
This represents 'pragma omp distribute parallel for simd' composite directive.
This represents 'pragma omp distribute simd' composite directive.
This represents the 'doacross' clause for the 'pragma omp ordered' directive.
This represents 'pragma omp error' directive.
This represents 'filter' clause in the 'pragma omp ...' directive.
This represents implicit clause 'flush' for the 'pragma omp flush' directive. This clause does not ex...
Represents the 'pragma omp fuse' loop transformation directive.
Stmt * getTransformedStmt() const
Gets the associated loops after the transformation.
This represents 'pragma omp loop' directive.
This represents 'grainsize' clause in the 'pragma omp ...' directive.
This represents 'hint' clause in the 'pragma omp ...' directive.
This represents clause 'inclusive' in the 'pragma omp scan' directive.
This represents the 'init' clause in 'pragma omp ...' directives.
Represents the 'pragma omp interchange' loop transformation directive.
Stmt * getTransformedStmt() const
Gets the associated loops after the transformation.
This represents 'pragma omp interop' directive.
This represents 'pragma omp masked' directive.
This represents 'pragma omp masked taskloop' directive.
This represents 'pragma omp masked taskloop simd' directive.
This represents 'pragma omp master taskloop' directive.
This represents 'pragma omp master taskloop simd' directive.
This represents 'pragma omp metadirective' directive.
Stmt * getIfStmt() const
This represents 'nogroup' clause in the 'pragma omp ...' directive.
This represents 'num_tasks' clause in the 'pragma omp ...' directive.
This represents 'num_teams' clause in the 'pragma omp ...' directive.
This represents 'order' clause in the 'pragma omp ...' directive.
This represents 'pragma omp parallel masked taskloop' directive.
This represents 'pragma omp parallel masked taskloop simd' directive.
This represents 'pragma omp parallel master taskloop' directive.
This represents 'pragma omp parallel master taskloop simd' directive.
This represents 'priority' clause in the 'pragma omp ...' directive.
Represents the 'pragma omp reverse' loop transformation directive.
Stmt * getTransformedStmt() const
Gets/sets the associated loops after the transformation, i.e.
This represents 'simd' clause in the 'pragma omp ...' directive.
This represents 'pragma omp scan' directive.
Represents the 'pragma omp split' loop transformation directive.
Stmt * getTransformedStmt() const
Gets/sets the associated loops after the transformation, i.e.
This represents the 'pragma omp stripe' loop transformation directive.
Stmt * getTransformedStmt() const
Gets/sets the associated loops after striping.
This represents 'pragma omp target data' directive.
This represents 'pragma omp target' directive.
This represents 'pragma omp target enter data' directive.
This represents 'pragma omp target exit data' directive.
This represents 'pragma omp target parallel' directive.
This represents 'pragma omp target parallel for' directive.
bool hasCancel() const
Return true if current directive has inner cancel directive.
This represents 'pragma omp target parallel for simd' directive.
This represents 'pragma omp target parallel loop' directive.
This represents 'pragma omp target simd' directive.
This represents 'pragma omp target teams' directive.
This represents 'pragma omp target teams distribute' combined directive.
This represents 'pragma omp target teams distribute parallel for' combined directive.
This represents 'pragma omp target teams distribute parallel for simd' combined directive.
This represents 'pragma omp target teams distribute simd' combined directive.
This represents 'pragma omp target teams loop' directive.
bool canBeParallelFor() const
Return true if current loop directive's associated loop can be a parallel for.
This represents 'pragma omp target update' directive.
This represents 'pragma omp taskloop' directive.
This represents 'pragma omp taskloop simd' directive.
This represents 'pragma omp teams' directive.
This represents 'pragma omp teams distribute' directive.
This represents 'pragma omp teams distribute parallel for' composite directive.
This represents 'pragma omp teams distribute parallel for simd' composite directive.
This represents 'pragma omp teams distribute simd' combined directive.
This represents 'pragma omp teams loop' directive.
This represents 'thread_limit' clause in the 'pragma omp ...' directive.
This represents the 'pragma omp tile' loop transformation directive.
Stmt * getTransformedStmt() const
Gets/sets the associated loops after tiling.
This represents the 'pragma omp unroll' loop transformation directive.
This represents the 'use' clause in 'pragma omp ...' directives.
This represents clause 'use_device_addr' in the 'pragma omp ...' directives.
This represents clause 'use_device_ptr' in the 'pragma omp ...' directives.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:223
SourceManager & getSourceManager()
Definition ASTContext.h:863
TranslationUnitDecl * getTranslationUnitDecl() const
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
IdentifierTable & Idents
Definition ASTContext.h:802
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
QualType getUIntPtrType() const
Return a type compatible with "uintptr_t" (C99 7.18.1.4), as defined by the target.
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
TypeSourceInfo * getTrivialTypeSourceInfo(QualType T, SourceLocation Loc=SourceLocation()) const
Allocate a TypeSourceInfo where all locations have been initialized to a given location,...
unsigned getOpenMPDefaultSimdAlign(QualType T) const
Get default simd alignment of the specified complete type in bits.
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
QualType getFunctionType(QualType ResultTy, ArrayRef< QualType > Args, const FunctionProtoType::ExtProtoInfo &EPI) const
Return a normal function type with a typed argument list.
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
CanQualType getCanonicalTagType(const TagDecl *TD) const
ASTRecordLayout - This class contains layout information for one RecordDecl, which is a struct/union/...
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition TypeBase.h:3784
Represents an attribute applied to a statement.
Definition Stmt.h:2213
ArrayRef< const Attr * > getAttrs() const
Definition Stmt.h:2245
static BinaryOperator * Create(const ASTContext &C, Expr *lhs, Expr *rhs, Opcode opc, QualType ResTy, ExprValueKind VK, ExprObjectKind OK, SourceLocation opLoc, FPOptionsOverride FPFeatures)
Definition Expr.cpp:5103
Represents the body of a CapturedStmt, and serves as its DeclContext.
Definition Decl.h:4966
unsigned getNumParams() const
Definition Decl.h:5004
ImplicitParamDecl * getContextParam() const
Retrieve the parameter containing captured variables.
Definition Decl.h:5024
unsigned getContextParamPosition() const
Definition Decl.h:5033
bool isNothrow() const
Definition Decl.cpp:5700
static CapturedDecl * Create(ASTContext &C, DeclContext *DC, unsigned NumParams)
Definition Decl.cpp:5685
param_iterator param_end() const
Retrieve an iterator one past the last parameter decl.
Definition Decl.h:5041
param_iterator param_begin() const
Retrieve an iterator pointing to the first parameter decl.
Definition Decl.h:5039
Stmt * getBody() const override
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Definition Decl.cpp:5697
ImplicitParamDecl * getParam(unsigned i) const
Definition Decl.h:5006
This captures a statement into a function.
Definition Stmt.h:3947
SourceLocation getEndLoc() const LLVM_READONLY
Definition Stmt.h:4146
CapturedDecl * getCapturedDecl()
Retrieve the outlined function declaration.
Definition Stmt.cpp:1493
child_range children()
Definition Stmt.cpp:1484
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition Stmt.h:4068
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition Stmt.h:4051
capture_init_iterator capture_init_begin()
Retrieve the first initialization argument.
Definition Stmt.h:4124
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Stmt.h:4142
capture_init_iterator capture_init_end()
Retrieve the iterator pointing one past the last initialization argument.
Definition Stmt.h:4134
capture_range captures()
Definition Stmt.h:4085
Expr *const * const_capture_init_iterator
Const iterator that walks over the capture initialization arguments.
Definition Stmt.h:4111
CharUnits - This is an opaque type for sizes expressed in character units.
Definition CharUnits.h:38
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition CharUnits.h:201
std::string SampleProfileFile
Name of the profile file to use with -fprofile-sample-use.
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:209
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition Address.h:269
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
static AggValueSlot ignored()
ignored - Returns an aggregate value slot indicating that the aggregate value is being ignored.
Definition CGValue.h:619
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:213
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition CGBuilder.h:118
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition CGBuilder.h:138
CGFunctionInfo - Class to encapsulate the information about a function definition.
Manages list of lastprivate conditional decls for the specified directive.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
Manages list of nontemporal decls for the specified directive.
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
Manages list of nontemporal decls for the specified directive.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation())
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc)
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const
Translates the native parameter of outlined function if this is required for target.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getAllocatedAddress() const
Returns the raw, allocated address, which is not necessarily the address of the object itself.
API for captured statement code generation.
virtual const FieldDecl * lookup(const VarDecl *VD) const
Lookup the captured field decl for a variable.
RAII for correct setting/restoring of CapturedStmtInfo.
LValue getReferenceLValue(CodeGenFunction &CGF, const Expr *RefExpr) const
void ForceCleanup()
Force the emission of cleanups now, instead of waiting until this object is destroyed.
RAII for preserving necessary info during inlined region body codegen.
RAII for preserving necessary info during Outlined region body codegen.
Controls insertion of cancellation exit blocks in worksharing constructs.
Save/restore original map of previously emitted local vars in case when we need to duplicate emission...
The class used to assign some variables some temporarily addresses.
bool apply(CodeGenFunction &CGF)
Applies new addresses to the list of the variables.
void restore(CodeGenFunction &CGF)
Restores original addresses of the variables.
bool setVarAddr(CodeGenFunction &CGF, const VarDecl *LocalVD, Address TempAddr)
Sets the address of the variable LocalVD to be TempAddr in function CGF.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
void restoreMap()
Restore all mapped variables w/o clean up.
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
void ForceCleanup(std::initializer_list< llvm::Value ** > ValuesToReload={})
Force the emission of cleanups now, instead of waiting until this object is destroyed.
bool requiresCleanups() const
Determine whether this scope requires any cleanups.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
void EmitOMPParallelMaskedTaskLoopDirective(const OMPParallelMaskedTaskLoopDirective &S)
void EmitOMPParallelMaskedDirective(const OMPParallelMaskedDirective &S)
void EmitOMPTaskyieldDirective(const OMPTaskyieldDirective &S)
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr, const VarDecl *ConditionalDecl=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
void EmitOMPLastprivateClauseFinal(const OMPExecutableDirective &D, bool NoFinals, llvm::Value *IsLastIterCond=nullptr)
Emit final copying of lastprivate values to original variables at the end of the worksharing or simd ...
void processInReduction(const OMPExecutableDirective &S, OMPTaskDataTy &Data, CodeGenFunction &CGF, const CapturedStmt *CS, OMPPrivateScope &Scope)
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
void EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S)
void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy, SourceLocation Loc)
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
void EmitOMPCanonicalLoop(const OMPCanonicalLoop *S)
Emit an OMPCanonicalLoop using the OpenMPIRBuilder.
void EmitOMPGenericLoopDirective(const OMPGenericLoopDirective &S)
void EmitOMPScanDirective(const OMPScanDirective &S)
static bool hasScalarEvaluationKind(QualType T)
llvm::function_ref< std::pair< llvm::Value *, llvm::Value * >(CodeGenFunction &, const OMPExecutableDirective &S, Address LB, Address UB)> CodeGenDispatchBoundsTy
LValue InitCapturedStruct(const CapturedStmt &S)
Definition CGStmt.cpp:3378
CGCapturedStmtInfo * CapturedStmtInfo
void EmitOMPDistributeDirective(const OMPDistributeDirective &S)
void EmitOMPParallelForDirective(const OMPParallelForDirective &S)
void EmitOMPMasterDirective(const OMPMasterDirective &S)
void EmitOMPParallelMasterTaskLoopSimdDirective(const OMPParallelMasterTaskLoopSimdDirective &S)
void EmitOMPSimdInit(const OMPLoopDirective &D)
Helpers for the OpenMP loop directives.
const OMPExecutableDirective * OMPParentLoopDirectiveForScan
Parent loop-based directive for scan directive.
void EmitOMPFlushDirective(const OMPFlushDirective &S)
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
bool EmitOMPFirstprivateClause(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope)
void EmitOMPTaskgroupDirective(const OMPTaskgroupDirective &S)
void EmitOMPTargetTeamsDistributeParallelForSimdDirective(const OMPTargetTeamsDistributeParallelForSimdDirective &S)
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
void EmitOMPReductionClauseInit(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope, bool ForInscan=false)
Emit initial code for reduction variables.
void EmitOMPDistributeSimdDirective(const OMPDistributeSimdDirective &S)
void EmitAutoVarDecl(const VarDecl &D)
EmitAutoVarDecl - Emit an auto variable declaration.
Definition CGDecl.cpp:1349
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
llvm::Function * GenerateOpenMPCapturedStmtFunctionAggregate(const CapturedStmt &S, const OMPExecutableDirective &D)
void EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
void EmitOMPTargetParallelForDirective(const OMPTargetParallelForDirective &S)
const LangOptions & getLangOpts() const
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
EmitAutoVarAlloca - Emit the alloca and debug information for a local variable.
Definition CGDecl.cpp:1483
void EmitAtomicUpdate(LValue LVal, llvm::AtomicOrdering AO, const llvm::function_ref< RValue(RValue)> &UpdateOp, bool IsVolatile)
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
Definition CGExpr.cpp:3422
void EmitOMPSplitDirective(const OMPSplitDirective &S)
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
void EmitOMPReductionClauseFinal(const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind)
Emit final update of reduction values to original variables at the end of the directive.
void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit)
Helper for the OpenMP loop directives.
void EmitOMPScopeDirective(const OMPScopeDirective &S)
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
JumpDest ReturnBlock
ReturnBlock - Unified return block.
void EmitOMPTargetTeamsDistributeSimdDirective(const OMPTargetTeamsDistributeSimdDirective &S)
const llvm::function_ref< void(CodeGenFunction &, llvm::Function *, const OMPTaskDataTy &)> TaskGenTy
llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Location)
Converts Location to a DebugLoc, if debug information is enabled.
bool EmitOMPCopyinClause(const OMPExecutableDirective &D)
Emit code for copyin clause in D directive.
void EmitOMPLinearClause(const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope)
Emit initial code for linear clauses.
llvm::BasicBlock * OMPBeforeScanBlock
void EmitOMPInterchangeDirective(const OMPInterchangeDirective &S)
void EmitOMPPrivateLoopCounters(const OMPLoopDirective &S, OMPPrivateScope &LoopScope)
Emit initial code for loop counters of loop-based directives.
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
void EmitOMPDepobjDirective(const OMPDepobjDirective &S)
void EmitOMPMetaDirective(const OMPMetaDirective &S)
void EmitOMPCriticalDirective(const OMPCriticalDirective &S)
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
Definition CGExpr.cpp:258
void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S)
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
Definition CGExpr.cpp:2519
void EmitOMPCancelDirective(const OMPCancelDirective &S)
void EmitOMPBarrierDirective(const OMPBarrierDirective &S)
llvm::Value * EmitComplexToScalarConversion(ComplexPairTy Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified complex type to the specified destination type,...
void EmitOMPOrderedDirective(const OMPOrderedDirective &S)
bool EmitOMPWorksharingLoop(const OMPLoopDirective &S, Expr *EUB, const CodeGenLoopBoundsTy &CodeGenLoopBounds, const CodeGenDispatchBoundsTy &CGDispatchBounds)
Emit code for the worksharing loop-based directive.
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
llvm::CanonicalLoopInfo * EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth)
Emit the Stmt S and return its topmost canonical loop, if any.
void EmitOMPSectionsDirective(const OMPSectionsDirective &S)
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
void EmitOMPInteropDirective(const OMPInteropDirective &S)
void EmitOMPParallelSectionsDirective(const OMPParallelSectionsDirective &S)
void EmitOMPTargetParallelDirective(const OMPTargetParallelDirective &S)
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition CGExpr.cpp:239
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
void EmitOMPTargetParallelForSimdDirective(const OMPTargetParallelForSimdDirective &S)
void EmitOMPTargetParallelGenericLoopDirective(const OMPTargetParallelGenericLoopDirective &S)
Emit combined directive 'target parallel loop' as if its constituent constructs are 'target',...
void EmitOMPUseDeviceAddrClause(const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, const llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap)
void EmitOMPTeamsDistributeParallelForSimdDirective(const OMPTeamsDistributeParallelForSimdDirective &S)
void EmitOMPMaskedDirective(const OMPMaskedDirective &S)
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
void EmitOMPTeamsDistributeSimdDirective(const OMPTeamsDistributeSimdDirective &S)
RValue EmitAtomicLoad(LValue LV, SourceLocation SL, AggValueSlot Slot=AggValueSlot::ignored())
void EmitOMPDistributeLoop(const OMPLoopDirective &S, const CodeGenLoopTy &CodeGenLoop, Expr *IncExpr)
Emit code for the distribute loop-based directive.
void EmitOMPMasterTaskLoopDirective(const OMPMasterTaskLoopDirective &S)
void EmitOMPReverseDirective(const OMPReverseDirective &S)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
void EmitOMPCancellationPointDirective(const OMPCancellationPointDirective &S)
void EmitOMPTargetTeamsDistributeParallelForDirective(const OMPTargetTeamsDistributeParallelForDirective &S)
void EmitOMPMaskedTaskLoopDirective(const OMPMaskedTaskLoopDirective &S)
llvm::function_ref< std::pair< LValue, LValue >(CodeGenFunction &, const OMPExecutableDirective &S)> CodeGenLoopBoundsTy
void EmitOMPTargetExitDataDirective(const OMPTargetExitDataDirective &S)
RawAddress CreateMemTempWithoutCast(QualType T, const Twine &Name="tmp")
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen without...
Definition CGExpr.cpp:231
void EmitOMPTargetEnterDataDirective(const OMPTargetEnterDataDirective &S)
void EmitOMPMaskedTaskLoopSimdDirective(const OMPMaskedTaskLoopSimdDirective &S)
std::pair< bool, RValue > EmitOMPAtomicSimpleUpdateExpr(LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, llvm::AtomicOrdering AO, SourceLocation Loc, const llvm::function_ref< RValue(RValue)> CommonGen)
Emit atomic update code for constructs: X = X BO E or X = E BO E.
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
void EmitOMPParallelDirective(const OMPParallelDirective &S)
void EmitOMPTaskDirective(const OMPTaskDirective &S)
void EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective &S)
void EmitOMPDistributeParallelForDirective(const OMPDistributeParallelForDirective &S)
void EmitOMPAssumeDirective(const OMPAssumeDirective &S)
int ExpectedOMPLoopDepth
Number of nested loop to be consumed by the last surrounding loop-associated directive.
void EmitOMPPrivateClause(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope)
void EmitOMPTeamsDistributeDirective(const OMPTeamsDistributeDirective &S)
void EmitStopPoint(const Stmt *S)
EmitStopPoint - Emit a debug stoppoint if we are emitting debug info.
Definition CGStmt.cpp:48
void EmitOMPTargetUpdateDirective(const OMPTargetUpdateDirective &S)
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
void EmitOMPTargetTeamsGenericLoopDirective(const OMPTargetTeamsGenericLoopDirective &S)
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
void EmitAutoVarCleanups(const AutoVarEmission &emission)
Definition CGDecl.cpp:2218
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
Definition CGExpr.cpp:2770
SmallVector< llvm::CanonicalLoopInfo *, 4 > OMPLoopNestStack
List of recently emitted OMPCanonicalLoops.
void EmitOMPTeamsDistributeParallelForDirective(const OMPTeamsDistributeParallelForDirective &S)
llvm::AtomicRMWInst * emitAtomicRMWInst(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, llvm::AtomicOrdering Order=llvm::AtomicOrdering::SequentiallyConsistent, llvm::SyncScope::ID SSID=llvm::SyncScope::System, const AtomicExpr *AE=nullptr)
Emit an atomicrmw instruction, and applying relevant metadata when applicable.
void EmitOMPFuseDirective(const OMPFuseDirective &S)
void EmitOMPTargetTeamsDistributeDirective(const OMPTargetTeamsDistributeDirective &S)
void EmitOMPUseDevicePtrClause(const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, const llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap)
RValue EmitAnyExpr(const Expr *E, AggValueSlot aggSlot=AggValueSlot::ignored(), bool ignoreResult=false)
EmitAnyExpr - Emit code to compute the specified expression which can have any type.
Definition CGExpr.cpp:280
void EmitStmt(const Stmt *S, ArrayRef< const Attr * > Attrs={})
EmitStmt - Emit the code for the statement.
Definition CGStmt.cpp:58
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
void EmitOMPParallelForSimdDirective(const OMPParallelForSimdDirective &S)
llvm::Type * ConvertTypeForMem(QualType T)
void EmitOMPInnerLoop(const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, const Expr *IncExpr, const llvm::function_ref< void(CodeGenFunction &)> BodyGen, const llvm::function_ref< void(CodeGenFunction &)> PostIncGen)
Emit inner loop of the worksharing/simd construct.
void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
void EmitOMPTargetDirective(const OMPTargetDirective &S)
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitOMPTeamsDirective(const OMPTeamsDirective &S)
void EmitSimpleOMPExecutableDirective(const OMPExecutableDirective &D)
Emit simple code for OpenMP directives in Simd-only mode.
void EmitOMPErrorDirective(const OMPErrorDirective &S)
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
void EmitOMPParallelMaskedTaskLoopSimdDirective(const OMPParallelMaskedTaskLoopSimdDirective &S)
void EmitOMPTargetTeamsDirective(const OMPTargetTeamsDirective &S)
void EmitOMPTargetDataDirective(const OMPTargetDataDirective &S)
Address GenerateCapturedStmtArgument(const CapturedStmt &S)
Definition CGStmt.cpp:3419
bool EmitOMPLastprivateClauseInit(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope)
Emit initial code for lastprivate variables.
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
Definition CGStmt.cpp:663
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, const OMPExecutableDirective &D)
void EmitOMPSimdDirective(const OMPSimdDirective &S)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:195
Address EmitLoadOfReference(LValue RefLVal, LValueBaseInfo *PointeeBaseInfo=nullptr, TBAAAccessInfo *PointeeTBAAInfo=nullptr)
Definition CGExpr.cpp:3380
void EmitOMPParallelGenericLoopDirective(const OMPLoopDirective &S)
void EmitOMPTargetSimdDirective(const OMPTargetSimdDirective &S)
void EmitOMPTeamsGenericLoopDirective(const OMPTeamsGenericLoopDirective &S)
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
Definition CGDecl.cpp:204
bool EmitOMPLinearClauseInit(const OMPLoopDirective &D)
Emit initial code for linear variables.
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
void EmitOMPUnrollDirective(const OMPUnrollDirective &S)
void EmitOMPStripeDirective(const OMPStripeDirective &S)
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void EmitOMPSingleDirective(const OMPSingleDirective &S)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
llvm::function_ref< void(CodeGenFunction &, SourceLocation, const unsigned, const bool)> CodeGenOrderedTy
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation.
Definition CGExpr.cpp:2278
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
llvm::Function * GenerateCapturedStmtFunction(const CapturedStmt &S)
Creates the outlined function for a CapturedStmt.
Definition CGStmt.cpp:3426
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
uint64_t getProfileCount(const Stmt *S)
Get the profiler's count for the given statement.
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
void EmitOMPTileDirective(const OMPTileDirective &S)
void EmitDecl(const Decl &D, bool EvaluateConditionDecl=false)
EmitDecl - Emit a declaration.
Definition CGDecl.cpp:52
void EmitOMPAtomicDirective(const OMPAtomicDirective &S)
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
ConstantEmission tryEmitAsConstant(const DeclRefExpr *RefExpr)
Try to emit a reference to the given value without producing it as an l-value.
Definition CGExpr.cpp:1941
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1714
void EmitStoreThroughGlobalRegLValue(RValue Src, LValue Dst)
Store of global named registers are always calls to intrinsics.
Definition CGExpr.cpp:3220
void EmitOMPParallelMasterTaskLoopDirective(const OMPParallelMasterTaskLoopDirective &S)
void EmitOMPDistributeParallelForSimdDirective(const OMPDistributeParallelForSimdDirective &S)
void EmitOMPSectionDirective(const OMPSectionDirective &S)
void EnsureInsertPoint()
EnsureInsertPoint - Ensure that an insertion point is defined so that emitted IR has a place to go.
void EmitOMPForSimdDirective(const OMPForSimdDirective &S)
llvm::LLVMContext & getLLVMContext()
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
void emitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, SourceLocation Loc, SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue=nullptr)
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
llvm::function_ref< void(CodeGenFunction &, const OMPLoopDirective &, JumpDest)> CodeGenLoopTy
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
Definition CGDecl.cpp:1823
void EmitOMPParallelMasterDirective(const OMPParallelMasterDirective &S)
void EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, OMPTaskDataTy &Data)
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
void EmitOMPForDirective(const OMPForDirective &S)
void EmitOMPLinearClauseFinal(const OMPLoopDirective &D, const llvm::function_ref< llvm::Value *(CodeGenFunction &)> CondGen)
Emit final code for linear clauses.
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition CGStmt.cpp:643
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
Definition CGDecl.cpp:2108
void EmitOMPSimdFinal(const OMPLoopDirective &D, const llvm::function_ref< llvm::Value *(CodeGenFunction &)> CondGen)
This class organizes the cross-function state that is used while generating LLVM code.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
const llvm::DataLayout & getDataLayout() const
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
const llvm::Triple & getTriple() const
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition CGCall.cpp:1873
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition CGCall.cpp:747
const CGFunctionInfo & arrangeDeviceKernelCallerDeclaration(QualType resultType, const FunctionArgList &args)
A device kernel caller function is an offload device entry point function with a target device depend...
Definition CGCall.cpp:763
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition CGCall.h:376
LValue - This represents an lvalue references.
Definition CGValue.h:183
llvm::Value * getPointer(CodeGenFunction &CGF) const
Address getAddress() const
Definition CGValue.h:373
QualType getType() const
Definition CGValue.h:303
void setAddress(Address address)
Definition CGValue.h:375
A stack of loop information corresponding to loop nesting levels.
Definition CGLoopInfo.h:210
void setVectorizeWidth(unsigned W)
Set the vectorize width for the next loop pushed.
Definition CGLoopInfo.h:280
void setParallel(bool Enable=true)
Set the next pushed loop as parallel.
Definition CGLoopInfo.h:245
void push(llvm::BasicBlock *Header, const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc)
Begin a new structured loop.
void setVectorizeEnable(bool Enable=true)
Set the next pushed loop 'vectorize.enable'.
Definition CGLoopInfo.h:248
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition CGValue.h:42
bool isScalar() const
Definition CGValue.h:64
static RValue get(llvm::Value *V)
Definition CGValue.h:99
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition CGValue.h:109
bool isAggregate() const
Definition CGValue.h:66
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition CGValue.h:72
bool isComplex() const
Definition CGValue.h:65
std::pair< llvm::Value *, llvm::Value * > getComplexVal() const
getComplexVal - Return the real/imag components of this complex value.
Definition CGValue.h:79
An abstract representation of an aligned address.
Definition Address.h:42
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:72
llvm::Value * getPointer() const
Definition Address.h:66
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void setAction(PrePostActionTy &Action) const
Complex values, per C99 6.2.5p11.
Definition TypeBase.h:3337
CompoundStmt - This represents a group of statements like { stmt stmt }.
Definition Stmt.h:1750
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
DeclContext * getParent()
getParent - Returns the containing DeclContext.
Definition DeclBase.h:2122
A reference to a declared variable, function, enum, etc.
Definition Expr.h:1273
static DeclRefExpr * Create(const ASTContext &Context, NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc, ValueDecl *D, bool RefersToEnclosingVariableOrCapture, SourceLocation NameLoc, QualType T, ExprValueKind VK, NamedDecl *FoundD=nullptr, const TemplateArgumentListInfo *TemplateArgs=nullptr, NonOdrUseReason NOUR=NOUR_None)
Definition Expr.cpp:494
ValueDecl * getDecl()
Definition Expr.h:1341
DeclStmt - Adaptor class for mixing declarations with statements and expressions.
Definition Stmt.h:1641
decl_range decls()
Definition Stmt.h:1689
Decl - This represents one declaration (or definition), e.g.
Definition DeclBase.h:86
T * getAttr() const
Definition DeclBase.h:581
SourceLocation getBodyRBrace() const
getBodyRBrace - Gets the right brace of the body, if a body exists.
virtual bool hasBody() const
Returns true if this Decl represents a declaration for a body of code, such as a function or method d...
Definition DeclBase.h:1106
SourceLocation getLocation() const
Definition DeclBase.h:447
bool hasAttr() const
Definition DeclBase.h:585
The name of a declaration.
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Decl.h:831
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
This represents one expression.
Definition Expr.h:112
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition Expr.cpp:3098
Expr * IgnoreImplicitAsWritten() LLVM_READONLY
Skip past any implicit AST nodes which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3090
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
Definition Expr.cpp:3078
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:283
QualType getType() const
Definition Expr.h:144
Represents difference between two FPOptions values.
Represents a member of a struct/union/class.
Definition Decl.h:3182
Represents a function declaration or definition.
Definition Decl.h:2018
static FunctionDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation NLoc, DeclarationName N, QualType T, TypeSourceInfo *TInfo, StorageClass SC, bool UsesFPIntrin=false, bool isInlineSpecified=false, bool hasWrittenPrototype=true, ConstexprSpecKind ConstexprKind=ConstexprSpecKind::Unspecified, const AssociatedConstraint &TrailingRequiresClause={})
Definition Decl.h:2207
GlobalDecl - represents a global declaration.
Definition GlobalDecl.h:57
One of these records is kept for each identifier that is lexed.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
static ImplicitCastExpr * Create(const ASTContext &Context, QualType T, CastKind Kind, Expr *Operand, const CXXCastPath *BasePath, ExprValueKind Cat, FPOptionsOverride FPO)
Definition Expr.cpp:2079
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition Decl.cpp:5596
std::vector< llvm::Triple > OMPTargetTriples
Triples of the OpenMP targets that the host code codegen should take into account in order to generat...
Represents a point when we exit a loop.
IdentifierInfo * getIdentifier() const
Get the identifier that names this declaration, if there is one.
Definition Decl.h:295
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
A C++ nested-name-specifier augmented with source location information.
This is a basic class for representing single OpenMP clause.
This represents 'final' clause in the 'pragma omp ...' directive.
Representation of the 'full' clause of the 'pragma omp unroll' directive.
This represents 'if' clause in the 'pragma omp ...' directive.
This represents 'num_threads' clause in the 'pragma omp ...' directive.
Representation of the 'partial' clause of the 'pragma omp unroll' directive.
This represents 'safelen' clause in the 'pragma omp ...' directive.
This represents 'simdlen' clause in the 'pragma omp ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition Expr.h:1181
static ParmVarDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, StorageClass S, Expr *DefArg)
Definition Decl.cpp:2931
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3390
Represents an unpacked "presumed" location which can be presented to the user.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
If a crash happens while one of these objects are live, the message is printed out along with the spe...
A (possibly-)qualified type.
Definition TypeBase.h:937
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition TypeBase.h:8630
Represents a struct/union/class.
Definition Decl.h:4347
unsigned getNumFields() const
Returns the number of fields (non-static data members) in this record.
Definition Decl.h:4563
field_range fields() const
Definition Decl.h:4550
field_iterator field_begin() const
Definition Decl.cpp:5269
Base for LValueReferenceType and RValueReferenceType.
Definition TypeBase.h:3635
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41
Encodes a location in the source.
A trivial tuple used to represent a source range.
Stmt - This represents one statement.
Definition Stmt.h:86
child_range children()
Definition Stmt.cpp:304
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:343
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition Stmt.cpp:210
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Stmt.cpp:355
bool isArrayType() const
Definition TypeBase.h:8781
bool isPointerType() const
Definition TypeBase.h:8682
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9342
bool isReferenceType() const
Definition TypeBase.h:8706
bool isLValueReferenceType() const
Definition TypeBase.h:8710
bool isAnyComplexType() const
Definition TypeBase.h:8817
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
Definition Type.cpp:2311
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition TypeBase.h:2862
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition TypeBase.h:9328
static UnaryOperator * Create(const ASTContext &C, Expr *input, Opcode opc, QualType type, ExprValueKind VK, ExprObjectKind OK, SourceLocation l, bool CanOverflow, FPOptionsOverride FPFeatures)
Definition Expr.cpp:5160
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition Decl.h:712
QualType getType() const
Definition Decl.h:723
Represents a variable declaration or definition.
Definition Decl.h:924
TLSKind getTLSKind() const
Definition Decl.cpp:2147
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:2236
@ CInit
C-style initialization with assignment.
Definition Decl.h:929
bool hasGlobalStorage() const
Returns true for all variables that do not have local storage.
Definition Decl.h:1239
bool isStaticLocal() const
Returns true if a variable with function scope is a static local variable.
Definition Decl.h:1206
const Expr * getInit() const
Definition Decl.h:1381
bool hasLocalStorage() const
Returns true if a variable with function scope is a non-static local variable.
Definition Decl.h:1182
@ TLS_None
Not a TLS variable.
Definition Decl.h:944
Represents a C array with a specified size that is not an integer-constant-expression.
Definition TypeBase.h:4028
Expr * getSizeExpr() const
Definition TypeBase.h:4042
Definition SPIR.cpp:35
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:155
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
Definition CGValue.h:146
@ Address
A pointer to a ValueDecl.
Definition Primitives.h:28
bool Inc(InterpState &S, CodePtr OpPC, bool CanOverflow)
1) Pops a pointer from the stack 2) Load the value from the pointer 3) Writes the value increased by ...
Definition Interp.h:962
CharSourceRange getSourceRange(const SourceRange &Range)
Returns the token CharSourceRange corresponding to Range.
Definition FixIt.h:32
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
CanQual< Type > CanQualType
Represents a canonical, potentially-qualified type.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Ctor_Complete
Complete object ctor.
Definition ABI.h:25
Privates[]
This class represents the 'transparent' clause in the 'pragma omp task' directive.
bool isa(CodeGen::Address addr)
Definition Address.h:330
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
@ OK_Ordinary
An ordinary object is located at an address in memory.
Definition Specifiers.h:152
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ Tile
'tile' clause, allowed on 'loop' and Combined constructs.
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition OpenMPKinds.h:39
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition OpenMPKinds.h:40
@ CR_OpenMP
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
@ SC_Static
Definition Specifiers.h:253
@ SC_None
Definition Specifiers.h:251
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
@ OMPC_DIST_SCHEDULE_unknown
Expr * Cond
};
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ Result
The result type of a method or function.
Definition TypeBase.h:905
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
bool isOpenMPGenericLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive constitutes a 'loop' directive in the outermost nest.
OpenMPBindClauseKind
OpenMP bindings for the 'bind' clause.
@ OMPC_BIND_unknown
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition OpenMPKinds.h:55
bool IsXLHSInRHSPart
True if UE has the first form and false if the second.
bool IsPostfixUpdate
True if original value of 'x' must be stored in 'v', not an updated one.
@ Dtor_Complete
Complete object dtor.
Definition ABI.h:36
OpenMPSeverityClauseKind
OpenMP attributes for 'severity' clause.
bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of the composite or combined directives that need loop ...
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition OpenMPKinds.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition Specifiers.h:136
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition Specifiers.h:140
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
OpenMPNumThreadsClauseModifier
@ OMPC_NUMTHREADS_unknown
bool IsFailOnly
True if 'v' is updated only when the condition is false (compare capture only).
U cast(CodeGen::Address addr)
Definition Address.h:327
@ OMPC_DEVICE_unknown
Definition OpenMPKinds.h:51
llvm::omp::Clause OpenMPClauseKind
OpenMP clauses.
Definition OpenMPKinds.h:28
@ ThreadPrivateVar
Parameter for Thread private variable.
Definition Decl.h:1760
@ Other
Other implicit parameter.
Definition Decl.h:1763
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition OpenMPKinds.h:31
@ OMPC_SCHEDULE_unknown
Definition OpenMPKinds.h:35
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
#define true
Definition stdbool.h:25
Struct with the values to be passed to the static runtime function.
QualType getType() const
Definition CGCall.h:249
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
static Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
llvm::OpenMPIRBuilder::InsertPointTy InsertPointTy
static void EmitOMPOutlinedRegionBody(CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Twine RegionName)
Emit the body of an OMP region that will be outlined in OpenMPIRBuilder::finalize().
static Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable /p VD.
static void EmitCaptureStmt(CodeGenFunction &CGF, InsertPointTy CodeGenIP, llvm::BasicBlock &FiniBB, llvm::Function *Fn, ArrayRef< llvm::Value * > Args)
static std::string getNameWithSeparators(ArrayRef< StringRef > Parts, StringRef FirstSeparator=".", StringRef Separator=".")
Get the platform-specific name separator.
static void FinalizeOMPRegion(CodeGenFunction &CGF, InsertPointTy IP)
Emit the Finalization for an OMP region.
static void EmitOMPInlinedRegionBody(CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Twine RegionName)
Emit the body of an OMP region.
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:648
Extra information about a function prototype.
Definition TypeBase.h:5454
Scheduling data for loop-based OpenMP directives.
bool UseFusedDistChunkSchedule
Request the fused distr_static_chunk + static_chunkone runtime schedule in for_static_init.
OpenMPScheduleClauseModifier M2
OpenMPScheduleClauseModifier M1
OpenMPScheduleClauseKind Schedule