22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/Frontend/OpenMP/OMPGridValues.h"
24#include "llvm/Support/MathExtras.h"
27using namespace CodeGen;
28using namespace llvm::omp;
33 llvm::FunctionCallee EnterCallee =
nullptr;
35 llvm::FunctionCallee ExitCallee =
nullptr;
38 llvm::BasicBlock *ContBlock =
nullptr;
41 NVPTXActionTy(llvm::FunctionCallee EnterCallee,
43 llvm::FunctionCallee ExitCallee,
45 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
50 llvm::Value *CallBool = CGF.
Builder.CreateIsNotNull(EnterRes);
54 CGF.
Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
72class ExecutionRuntimeModesRAII {
81 : ExecMode(ExecMode) {
82 SavedExecMode = ExecMode;
85 ~ExecutionRuntimeModesRAII() { ExecMode = SavedExecMode; }
90 if (
const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) {
91 const Expr *
Base = ASE->getBase()->IgnoreParenImpCasts();
92 while (
const auto *TempASE = dyn_cast<ArraySubscriptExpr>(
Base))
93 Base = TempASE->getBase()->IgnoreParenImpCasts();
95 }
else if (
auto *OASE = dyn_cast<ArraySectionExpr>(RefExpr)) {
96 const Expr *
Base = OASE->getBase()->IgnoreParenImpCasts();
97 while (
const auto *TempOASE = dyn_cast<ArraySectionExpr>(
Base))
98 Base = TempOASE->getBase()->IgnoreParenImpCasts();
99 while (
const auto *TempASE = dyn_cast<ArraySubscriptExpr>(
Base))
100 Base = TempASE->getBase()->IgnoreParenImpCasts();
104 if (
const auto *DE = dyn_cast<DeclRefExpr>(RefExpr))
105 return cast<ValueDecl>(DE->getDecl()->getCanonicalDecl());
106 const auto *ME = cast<MemberExpr>(RefExpr);
107 return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());
110static RecordDecl *buildRecordForGlobalizedVars(
113 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
117 if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())
121 GlobalizedVars.emplace_back(
C.getDeclAlign(
D),
D);
122 for (
const ValueDecl *
D : EscapedDeclsForTeams)
123 GlobalizedVars.emplace_back(
C.getDeclAlign(
D),
D);
129 RecordDecl *GlobalizedRD =
C.buildImplicitRecord(
"_globalized_locals_ty");
132 EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end());
133 for (
const auto &Pair : GlobalizedVars) {
137 Type =
C.getPointerType(
Type.getNonReferenceType());
142 if (SingleEscaped.count(VD)) {
157 llvm::APInt ArraySize(32, BufSize);
158 Type =
C.getConstantArrayType(
Type, ArraySize,
nullptr,
159 ArraySizeModifier::Normal, 0);
167 llvm::APInt Align(32, Pair.first.getQuantity());
168 Field->addAttr(AlignedAttr::CreateImplicit(
171 C.getIntTypeForBitwidth(32, 0),
173 {}, AlignedAttr::GNU_aligned));
176 MappedDeclsFields.try_emplace(VD, Field);
183class CheckVarsEscapingDeclContext final
186 llvm::SetVector<const ValueDecl *> EscapedDecls;
187 llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls;
188 llvm::SetVector<const ValueDecl *> DelayedVariableLengthDecls;
191 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
192 bool AllEscaped =
false;
193 bool IsForCombinedParallelRegion =
false;
195 void markAsEscaped(
const ValueDecl *VD) {
197 if (!isa<VarDecl>(VD) ||
198 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
205 bool IsCaptured =
false;
206 if (
auto *CSI = CGF.CapturedStmtInfo) {
207 if (
const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {
211 if (!IsForCombinedParallelRegion) {
214 const auto *
Attr = FD->getAttr<OMPCaptureKindAttr>();
217 if (((
Attr->getCaptureKind() != OMPC_map) &&
219 ((
Attr->getCaptureKind() == OMPC_map) &&
220 !FD->getType()->isAnyPointerType()))
223 if (!FD->getType()->isReferenceType()) {
225 "Parameter captured by value with variably modified type");
226 EscapedParameters.insert(VD);
227 }
else if (!IsForCombinedParallelRegion) {
232 if ((!CGF.CapturedStmtInfo ||
233 (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&
241 EscapedVariableLengthDecls.insert(VD);
243 DelayedVariableLengthDecls.insert(VD);
245 EscapedDecls.insert(VD);
248 void VisitValueDecl(
const ValueDecl *VD) {
251 if (
const auto *VarD = dyn_cast<VarDecl>(VD)) {
252 if (!isa<ParmVarDecl>(VarD) && VarD->hasInit()) {
253 const bool SavedAllEscaped = AllEscaped;
255 Visit(VarD->getInit());
256 AllEscaped = SavedAllEscaped;
262 bool IsCombinedParallelRegion) {
266 if (
C.capturesVariable() && !
C.capturesVariableByCopy()) {
268 bool SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion;
269 if (IsCombinedParallelRegion) {
273 IsForCombinedParallelRegion =
false;
276 C->getClauseKind() == OMPC_reduction ||
277 C->getClauseKind() == OMPC_linear ||
278 C->getClauseKind() == OMPC_private)
281 if (
const auto *PC = dyn_cast<OMPFirstprivateClause>(
C))
282 Vars = PC->getVarRefs();
283 else if (
const auto *PC = dyn_cast<OMPLastprivateClause>(
C))
284 Vars = PC->getVarRefs();
286 llvm_unreachable(
"Unexpected clause.");
287 for (
const auto *
E : Vars) {
291 IsForCombinedParallelRegion =
true;
295 if (IsForCombinedParallelRegion)
300 if (isa<OMPCapturedExprDecl>(VD))
302 IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion;
307 void buildRecordForGlobalizedVars(
bool IsInTTDRegion) {
308 assert(!GlobalizedRD &&
309 "Record for globalized variables is built already.");
311 unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
313 EscapedDeclsForTeams = EscapedDecls.getArrayRef();
315 EscapedDeclsForParallel = EscapedDecls.getArrayRef();
316 GlobalizedRD = ::buildRecordForGlobalizedVars(
317 CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,
318 MappedDeclsFields, WarpSize);
324 : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {
326 virtual ~CheckVarsEscapingDeclContext() =
default;
327 void VisitDeclStmt(
const DeclStmt *S) {
330 for (
const Decl *
D : S->decls())
331 if (
const auto *VD = dyn_cast_or_null<ValueDecl>(
D))
337 if (!
D->hasAssociatedStmt())
340 dyn_cast_or_null<CapturedStmt>(
D->getAssociatedStmt())) {
345 if (CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown) {
346 VisitStmt(S->getCapturedStmt());
349 VisitOpenMPCapturedStmt(
351 CaptureRegions.back() == OMPD_parallel &&
359 if (
C.capturesVariable() && !
C.capturesVariableByCopy()) {
362 if (isa<OMPCapturedExprDecl>(VD))
371 if (
C.capturesVariable()) {
375 if (
E->isInitCapture(&
C) || isa<OMPCapturedExprDecl>(VD))
386 const VarDecl *VD =
C.getVariable();
396 for (
const Expr *Arg :
E->arguments()) {
399 if (Arg->isLValue()) {
400 const bool SavedAllEscaped = AllEscaped;
403 AllEscaped = SavedAllEscaped;
416 if (isa<OMPCapturedExprDecl>(VD))
424 if (
E->getOpcode() == UO_AddrOf) {
425 const bool SavedAllEscaped = AllEscaped;
428 AllEscaped = SavedAllEscaped;
436 if (
E->getCastKind() == CK_ArrayToPointerDecay) {
437 const bool SavedAllEscaped = AllEscaped;
440 AllEscaped = SavedAllEscaped;
445 void VisitExpr(
const Expr *
E) {
448 bool SavedAllEscaped = AllEscaped;
454 AllEscaped = SavedAllEscaped;
456 void VisitStmt(
const Stmt *S) {
459 for (
const Stmt *Child : S->children())
466 const RecordDecl *getGlobalizedRecord(
bool IsInTTDRegion) {
468 buildRecordForGlobalizedVars(IsInTTDRegion);
474 assert(GlobalizedRD &&
475 "Record for globalized variables must be generated already.");
476 return MappedDeclsFields.lookup(VD);
481 return EscapedDecls.getArrayRef();
486 const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters()
const {
487 return EscapedParameters;
493 return EscapedVariableLengthDecls.getArrayRef();
499 return DelayedVariableLengthDecls.getArrayRef();
505CGOpenMPRuntimeGPU::getExecutionMode()
const {
506 return CurrentExecutionMode;
510CGOpenMPRuntimeGPU::getDataSharingMode()
const {
511 return CurrentDataSharingMode;
517 const auto *CS =
D.getInnermostCapturedStmt();
519 CS->getCapturedStmt()->IgnoreContainers(
true);
522 if (
const auto *NestedDir =
523 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
525 switch (
D.getDirectiveKind()) {
529 if (DKind == OMPD_teams) {
530 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
535 if (
const auto *NND =
536 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
537 DKind = NND->getDirectiveKind();
543 case OMPD_target_teams:
545 case OMPD_target_simd:
546 case OMPD_target_parallel:
547 case OMPD_target_parallel_for:
548 case OMPD_target_parallel_for_simd:
549 case OMPD_target_teams_distribute:
550 case OMPD_target_teams_distribute_simd:
551 case OMPD_target_teams_distribute_parallel_for:
552 case OMPD_target_teams_distribute_parallel_for_simd:
555 case OMPD_parallel_for:
556 case OMPD_parallel_master:
557 case OMPD_parallel_sections:
559 case OMPD_parallel_for_simd:
561 case OMPD_cancellation_point:
563 case OMPD_threadprivate:
581 case OMPD_target_data:
582 case OMPD_target_exit_data:
583 case OMPD_target_enter_data:
584 case OMPD_distribute:
585 case OMPD_distribute_simd:
586 case OMPD_distribute_parallel_for:
587 case OMPD_distribute_parallel_for_simd:
588 case OMPD_teams_distribute:
589 case OMPD_teams_distribute_simd:
590 case OMPD_teams_distribute_parallel_for:
591 case OMPD_teams_distribute_parallel_for_simd:
592 case OMPD_target_update:
593 case OMPD_declare_simd:
594 case OMPD_declare_variant:
595 case OMPD_begin_declare_variant:
596 case OMPD_end_declare_variant:
597 case OMPD_declare_target:
598 case OMPD_end_declare_target:
599 case OMPD_declare_reduction:
600 case OMPD_declare_mapper:
602 case OMPD_taskloop_simd:
603 case OMPD_master_taskloop:
604 case OMPD_master_taskloop_simd:
605 case OMPD_parallel_master_taskloop:
606 case OMPD_parallel_master_taskloop_simd:
610 llvm_unreachable(
"Unexpected directive.");
620 switch (DirectiveKind) {
622 case OMPD_target_teams:
624 case OMPD_target_parallel_loop:
625 case OMPD_target_parallel:
626 case OMPD_target_parallel_for:
627 case OMPD_target_parallel_for_simd:
628 case OMPD_target_teams_distribute_parallel_for:
629 case OMPD_target_teams_distribute_parallel_for_simd:
630 case OMPD_target_simd:
631 case OMPD_target_teams_distribute_simd:
633 case OMPD_target_teams_distribute:
635 case OMPD_target_teams_loop:
638 if (
auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(&
D))
639 return TTLD->canBeParallelFor();
643 case OMPD_parallel_for:
644 case OMPD_parallel_master:
645 case OMPD_parallel_sections:
647 case OMPD_parallel_for_simd:
649 case OMPD_cancellation_point:
651 case OMPD_threadprivate:
669 case OMPD_target_data:
670 case OMPD_target_exit_data:
671 case OMPD_target_enter_data:
672 case OMPD_distribute:
673 case OMPD_distribute_simd:
674 case OMPD_distribute_parallel_for:
675 case OMPD_distribute_parallel_for_simd:
676 case OMPD_teams_distribute:
677 case OMPD_teams_distribute_simd:
678 case OMPD_teams_distribute_parallel_for:
679 case OMPD_teams_distribute_parallel_for_simd:
680 case OMPD_target_update:
681 case OMPD_declare_simd:
682 case OMPD_declare_variant:
683 case OMPD_begin_declare_variant:
684 case OMPD_end_declare_variant:
685 case OMPD_declare_target:
686 case OMPD_end_declare_target:
687 case OMPD_declare_reduction:
688 case OMPD_declare_mapper:
690 case OMPD_taskloop_simd:
691 case OMPD_master_taskloop:
692 case OMPD_master_taskloop_simd:
693 case OMPD_parallel_master_taskloop:
694 case OMPD_parallel_master_taskloop_simd:
701 "Unknown programming model for OpenMP directive on NVPTX target.");
705 StringRef ParentName,
706 llvm::Function *&OutlinedFn,
707 llvm::Constant *&OutlinedFnID,
710 ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode,
EM_NonSPMD);
711 EntryFunctionState EST;
712 WrapperFunctionsMap.clear();
714 [[maybe_unused]]
bool IsBareKernel =
D.getSingleClause<
OMPXBareClause>();
715 assert(!IsBareKernel &&
"bare kernel should not be at generic mode");
719 CGOpenMPRuntimeGPU::EntryFunctionState &EST;
723 NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST,
728 RT.emitKernelInit(
D, CGF, EST,
false);
730 RT.setLocThreadIdInsertPt(CGF,
true);
735 RT.emitKernelDeinit(CGF, EST,
false);
739 IsInTTDRegion =
true;
741 IsOffloadEntry, CodeGen);
742 IsInTTDRegion =
false;
747 EntryFunctionState &EST,
bool IsSPMD) {
748 int32_t MinThreadsVal = 1, MaxThreadsVal = -1, MinTeamsVal = 1,
751 MinTeamsVal, MaxTeamsVal);
755 Bld, IsSPMD, MinThreadsVal, MaxThreadsVal, MinTeamsVal, MaxTeamsVal));
757 emitGenericVarsProlog(CGF, EST.Loc);
761 EntryFunctionState &EST,
764 emitGenericVarsEpilog(CGF);
769 "_openmp_teams_reduction_type_$_", RecordDecl::TagKind::Union);
771 for (
const RecordDecl *TeamReductionRec : TeamsReductions) {
772 QualType RecTy =
C.getRecordType(TeamReductionRec);
782 QualType StaticTy =
C.getRecordType(StaticRD);
783 llvm::Type *LLVMReductionsBufferTy =
787 TeamsReductions.empty()
789 : DL.getTypeAllocSize(LLVMReductionsBufferTy).getFixedValue();
791 OMPBuilder.createTargetDeinit(Bld, ReductionDataSize,
792 C.getLangOpts().OpenMPCUDAReductionBufNum);
793 TeamsReductions.clear();
797 StringRef ParentName,
798 llvm::Function *&OutlinedFn,
799 llvm::Constant *&OutlinedFnID,
802 ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode,
EM_SPMD);
803 EntryFunctionState EST;
810 CGOpenMPRuntimeGPU::EntryFunctionState &EST;
812 DataSharingMode Mode;
817 CGOpenMPRuntimeGPU::EntryFunctionState &EST,
819 : RT(RT), EST(EST), IsBareKernel(IsBareKernel),
820 Mode(RT.CurrentDataSharingMode),
D(
D) {}
823 RT.CurrentDataSharingMode = DataSharingMode::DS_CUDA;
826 RT.emitKernelInit(
D, CGF, EST,
true);
828 RT.setLocThreadIdInsertPt(CGF,
true);
832 RT.CurrentDataSharingMode = Mode;
835 RT.clearLocThreadIdInsertPt(CGF);
836 RT.emitKernelDeinit(CGF, EST,
true);
838 } Action(*
this, EST, IsBareKernel,
D);
840 IsInTTDRegion =
true;
842 IsOffloadEntry, CodeGen);
843 IsInTTDRegion =
false;
846void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
848 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
853 assert(!ParentName.empty() &&
"Invalid target region parent name!");
857 if (Mode || IsBareKernel)
858 emitSPMDKernel(
D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
861 emitNonSPMDKernel(
D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
867 llvm::OpenMPIRBuilderConfig Config(
875 llvm_unreachable(
"OpenMP can only handle device code.");
885 "__omp_rtl_debug_kind");
887 "__omp_rtl_assume_teams_oversubscription");
889 "__omp_rtl_assume_threads_oversubscription");
891 "__omp_rtl_assume_no_thread_state");
893 "__omp_rtl_assume_no_nested_parallelism");
897 ProcBindKind ProcBind,
903 llvm::Value *NumThreads,
909 const Expr *NumTeams,
910 const Expr *ThreadLimit,
918 bool PrevIsInTTDRegion = IsInTTDRegion;
919 IsInTTDRegion =
false;
922 CGF,
D, ThreadIDVar, InnermostKind, CodeGen));
923 IsInTTDRegion = PrevIsInTTDRegion;
925 llvm::Function *WrapperFun =
926 createParallelDataSharingWrapper(OutlinedFun,
D);
927 WrapperFunctionsMap[OutlinedFun] = WrapperFun;
939 "expected teams directive.");
944 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(
946 Dir = dyn_cast_or_null<OMPExecutableDirective>(S);
954 for (
const Expr *
E :
C->getVarRefs())
964 "expected teams directive.");
966 for (
const Expr *
E :
C->privates())
979 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
986 if (!LastPrivatesReductions.empty()) {
987 GlobalizedRD = ::buildRecordForGlobalizedVars(
989 MappedDeclsFields, WarpSize);
991 }
else if (!LastPrivatesReductions.empty()) {
992 assert(!TeamAndReductions.first &&
993 "Previous team declaration is not expected.");
994 TeamAndReductions.first =
D.getCapturedStmt(OMPD_teams)->getCapturedDecl();
995 std::swap(TeamAndReductions.second, LastPrivatesReductions);
1002 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
1006 NVPTXPrePostActionTy(
1008 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
1010 :
Loc(
Loc), GlobalizedRD(GlobalizedRD),
1011 MappedDeclsFields(MappedDeclsFields) {}
1016 auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.
CurFn).first;
1017 I->getSecond().MappedParams =
1018 std::make_unique<CodeGenFunction::OMPMapVars>();
1019 DeclToAddrMapTy &
Data = I->getSecond().LocalVarData;
1020 for (
const auto &Pair : MappedDeclsFields) {
1021 assert(Pair.getFirst()->isCanonicalDecl() &&
1022 "Expected canonical declaration");
1023 Data.insert(std::make_pair(Pair.getFirst(), MappedVarData()));
1026 Rt.emitGenericVarsProlog(CGF,
Loc);
1030 .emitGenericVarsEpilog(CGF);
1032 } Action(
Loc, GlobalizedRD, MappedDeclsFields);
1035 CGF,
D, ThreadIDVar, InnermostKind, CodeGen);
1047 const auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
1048 if (I == FunctionGlobalizedDecls.end())
1051 for (
auto &Rec : I->getSecond().LocalVarData) {
1052 const auto *VD = cast<VarDecl>(Rec.first);
1053 bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
1057 llvm::Value *ParValue;
1066 llvm::CallBase *VoidPtr =
1071 VoidPtr->addRetAttr(llvm::Attribute::get(
1078 VoidPtr, VarPtrTy, VD->
getName() +
"_on_stack");
1081 Rec.second.PrivateAddr = VarAddr.
getAddress();
1082 Rec.second.GlobalizedVal = VoidPtr;
1087 I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.
getAddress());
1090 VoidPtr->setDebugLoc(DI->SourceLocToDebugLoc(VD->
getLocation()));
1093 for (
const auto *ValueD : I->getSecond().EscapedVariableLengthDecls) {
1094 const auto *VD = cast<VarDecl>(ValueD);
1095 std::pair<llvm::Value *, llvm::Value *> AddrSizePair =
1097 I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(AddrSizePair);
1101 I->getSecond().MappedParams->setVarAddr(CGF, VD,
Base.getAddress());
1103 I->getSecond().MappedParams->apply(CGF);
1108 const auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
1109 if (I == FunctionGlobalizedDecls.end())
1113 return llvm::is_contained(I->getSecond().DelayedVariableLengthDecls, VD);
1116std::pair<llvm::Value *, llvm::Value *>
1124 Size = Bld.CreateNUWAdd(
1126 llvm::Value *AlignVal =
1128 Size = Bld.CreateUDiv(Size, AlignVal);
1129 Size = Bld.CreateNUWMul(Size, AlignVal);
1132 llvm::Value *AllocArgs[] = {Size};
1133 llvm::CallBase *VoidPtr =
1137 VoidPtr->addRetAttr(llvm::Attribute::get(
1140 return std::make_pair(VoidPtr, Size);
1145 const std::pair<llvm::Value *, llvm::Value *> &AddrSizePair) {
1149 {AddrSizePair.first, AddrSizePair.second});
1156 const auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
1157 if (I != FunctionGlobalizedDecls.end()) {
1160 for (
const auto &AddrSizePair :
1161 llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {
1164 {AddrSizePair.first, AddrSizePair.second});
1167 for (
auto &Rec : llvm::reverse(I->getSecond().LocalVarData)) {
1168 const auto *VD = cast<VarDecl>(Rec.first);
1169 I->getSecond().MappedParams->restore(CGF);
1171 llvm::Value *FreeArgs[] = {Rec.second.GlobalizedVal,
1183 llvm::Function *OutlinedFn,
1197 OutlinedFnArgs.push_back(llvm::ConstantPointerNull::get(
CGM.
VoidPtrTy));
1200 OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1201 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1207 llvm::Function *OutlinedFn,
1210 llvm::Value *NumThreads) {
1214 auto &&ParallelGen = [
this,
Loc, OutlinedFn, CapturedVars, IfCond,
1218 llvm::Value *NumThreadsVal = NumThreads;
1219 llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn];
1220 llvm::Value *ID = llvm::ConstantPointerNull::get(
CGM.
Int8PtrTy);
1223 llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn,
CGM.
Int8PtrTy);
1231 llvm::ArrayType::get(
CGM.
VoidPtrTy, CapturedVars.size()),
1232 "captured_vars_addrs");
1234 if (!CapturedVars.empty()) {
1238 for (llvm::Value *
V : CapturedVars) {
1241 if (
V->getType()->isIntegerTy())
1251 llvm::Value *IfCondVal =
nullptr;
1256 IfCondVal = llvm::ConstantInt::get(CGF.
Int32Ty, 1);
1259 NumThreadsVal = llvm::ConstantInt::get(CGF.
Int32Ty, -1);
1261 NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.
Int32Ty),
1263 assert(IfCondVal &&
"Expected a value");
1265 llvm::Value *Args[] = {
1270 llvm::ConstantInt::get(CGF.
Int32Ty, -1),
1273 Bld.CreateBitOrPointerCast(CapturedVarsAddrs.
emitRawPointer(CGF),
1275 llvm::ConstantInt::get(
CGM.
SizeTy, CapturedVars.size())};
1291 llvm::Value *Args[] = {
1292 llvm::ConstantPointerNull::get(
1294 llvm::ConstantInt::get(CGF.
Int32Ty, 0,
true)};
1331 CGM.
getModule(), OMPRTL___kmpc_warp_active_thread_mask));
1333 llvm::Value *ThreadID = RT.getGPUThreadID(CGF);
1336 llvm::Value *TeamWidth = RT.getGPUNumThreads(CGF);
1349 llvm::Value *CmpLoopBound = CGF.
Builder.CreateICmpSLT(CounterVal, TeamWidth);
1350 CGF.
Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB);
1356 llvm::Value *CmpThreadToCounter =
1357 CGF.
Builder.CreateICmpEQ(ThreadID, CounterVal);
1358 CGF.
Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB);
1377 llvm::Value *IncCounterVal =
1391 "Cast type must sized.");
1393 "Val type must sized.");
1395 if (ValTy == CastTy)
1399 return CGF.
Builder.CreateBitCast(Val, LLVMCastTy);
1401 return CGF.
Builder.CreateIntCast(Val, LLVMCastTy,
1668 if (Options.SimpleReduction) {
1669 assert(!TeamsReduction && !ParallelReduction &&
1670 "Invalid reduction selection in emitReduction.");
1671 (void)ParallelReduction;
1673 ReductionOps, Options);
1677 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> VarFieldMap;
1680 for (
const Expr *DRE : Privates) {
1681 PrivatesReductions[Cnt] = cast<DeclRefExpr>(DRE)->getDecl();
1684 const RecordDecl *ReductionRec = ::buildRecordForGlobalizedVars(
1685 CGM.
getContext(), PrivatesReductions, std::nullopt, VarFieldMap, 1);
1688 TeamsReductions.push_back(ReductionRec);
1693 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1696 InsertPointTy CodeGenIP(CGF.
Builder.GetInsertBlock(),
1697 CGF.
Builder.GetInsertPoint());
1698 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(
1705 llvm::Type *ElementType;
1707 llvm::Value *PrivateVariable;
1708 llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy AtomicReductionGen =
nullptr;
1710 const auto *RHSVar =
1711 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[Idx])->getDecl());
1713 const auto *LHSVar =
1714 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[Idx])->getDecl());
1716 llvm::OpenMPIRBuilder::EvalKind EvalKind;
1719 EvalKind = llvm::OpenMPIRBuilder::EvalKind::Scalar;
1722 EvalKind = llvm::OpenMPIRBuilder::EvalKind::Complex;
1725 EvalKind = llvm::OpenMPIRBuilder::EvalKind::Aggregate;
1728 auto ReductionGen = [&](InsertPointTy CodeGenIP,
unsigned I,
1729 llvm::Value **LHSPtr, llvm::Value **RHSPtr,
1730 llvm::Function *NewFunc) {
1731 CGF.
Builder.restoreIP(CodeGenIP);
1732 auto *CurFn = CGF.
CurFn;
1733 CGF.
CurFn = NewFunc;
1736 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()))
1737 .emitRawPointer(CGF);
1739 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()))
1740 .emitRawPointer(CGF);
1743 cast<DeclRefExpr>(LHSExprs[I]),
1744 cast<DeclRefExpr>(RHSExprs[I]));
1748 return InsertPointTy(CGF.
Builder.GetInsertBlock(),
1749 CGF.
Builder.GetInsertPoint());
1751 ReductionInfos.emplace_back(llvm::OpenMPIRBuilder::ReductionInfo(
1752 ElementType,
Variable, PrivateVariable, EvalKind,
1753 nullptr, ReductionGen, AtomicReductionGen));
1758 OmpLoc, AllocaIP, CodeGenIP, ReductionInfos,
false, TeamsReduction,
1759 DistributeReduction, llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang,
1767 const VarDecl *NativeParam)
const {
1772 const Type *NonQualTy = QC.
strip(ArgType);
1774 if (
const auto *
Attr = FD->
getAttr<OMPCaptureKindAttr>()) {
1775 if (
Attr->getCaptureKind() == OMPC_map) {
1782 enum { NVPTX_local_addr = 5 };
1785 if (isa<ImplicitParamDecl>(NativeParam))
1800 const VarDecl *TargetParam)
const {
1801 assert(NativeParam != TargetParam &&
1803 "Native arg must not be the same as target arg.");
1807 const Type *NonQualTy = QC.
strip(NativeParamType);
1809 unsigned NativePointeeAddrSpace =
1817 llvm::PointerType::get(CGF.
getLLVMContext(), NativePointeeAddrSpace));
1821 return NativeParamAddr;
1828 TargetArgs.reserve(Args.size());
1829 auto *FnType = OutlinedFn.getFunctionType();
1830 for (
unsigned I = 0,
E = Args.size(); I <
E; ++I) {
1831 if (FnType->isVarArg() && FnType->getNumParams() <= I) {
1832 TargetArgs.append(std::next(Args.begin(), I), Args.end());
1835 llvm::Type *TargetType = FnType->getParamType(I);
1836 llvm::Value *NativeArg = Args[I];
1837 if (!TargetType->isPointerTy()) {
1838 TargetArgs.emplace_back(NativeArg);
1841 TargetArgs.emplace_back(
1851llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(
1854 const auto &CS = *
D.getCapturedStmt(OMPD_parallel);
1868 WrapperArgs.emplace_back(&ParallelLevelArg);
1869 WrapperArgs.emplace_back(&WrapperArg);
1874 auto *Fn = llvm::Function::Create(
1876 Twine(OutlinedParallelFn->getName(),
"_wrapper"), &
CGM.
getModule());
1884 Fn->addFnAttr(llvm::Attribute::NoInline);
1887 Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
1888 Fn->setDoesNotRecurse();
1894 const auto *RD = CS.getCapturedRecordDecl();
1895 auto CurField = RD->field_begin();
1907 auto CI = CS.capture_begin();
1913 llvm::Value *GlobalArgsPtr = GlobalArgs.
getPointer();
1914 llvm::Value *DataSharingArgs[] = {GlobalArgsPtr};
1922 if (CS.capture_size() > 0 ||
1938 cast<OMPLoopDirective>(
D).getLowerBoundVariable()->getExprLoc());
1939 Args.emplace_back(LB);
1948 cast<OMPLoopDirective>(
D).getUpperBoundVariable()->getExprLoc());
1949 Args.emplace_back(UB);
1952 if (CS.capture_size() > 0) {
1954 for (
unsigned I = 0,
E = CS.capture_size(); I <
E; ++I, ++CI, ++CurField) {
1955 QualType ElemTy = CurField->getType();
1964 if (CI->capturesVariableByCopy() &&
1965 !CI->getCapturedVar()->getType()->isAnyPointerType()) {
1969 Args.emplace_back(Arg);
1983 assert(
D &&
"Expected function or captured|block decl.");
1984 assert(FunctionGlobalizedDecls.count(CGF.
CurFn) == 0 &&
1985 "Function is registered already.");
1986 assert((!TeamAndReductions.first || TeamAndReductions.first ==
D) &&
1987 "Team is set but not processed.");
1988 const Stmt *Body =
nullptr;
1989 bool NeedToDelayGlobalization =
false;
1990 if (
const auto *FD = dyn_cast<FunctionDecl>(
D)) {
1991 Body = FD->getBody();
1992 }
else if (
const auto *BD = dyn_cast<BlockDecl>(
D)) {
1993 Body = BD->getBody();
1994 }
else if (
const auto *CD = dyn_cast<CapturedDecl>(
D)) {
1995 Body = CD->getBody();
1997 if (NeedToDelayGlobalization &&
2003 CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);
2004 VarChecker.Visit(Body);
2006 VarChecker.getGlobalizedRecord(IsInTTDRegion);
2007 TeamAndReductions.first =
nullptr;
2008 TeamAndReductions.second.clear();
2010 VarChecker.getEscapedVariableLengthDecls();
2012 VarChecker.getDelayedVariableLengthDecls();
2013 if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty() &&
2014 DelayedVariableLengthDecls.empty())
2016 auto I = FunctionGlobalizedDecls.try_emplace(CGF.
CurFn).first;
2017 I->getSecond().MappedParams =
2018 std::make_unique<CodeGenFunction::OMPMapVars>();
2019 I->getSecond().EscapedParameters.insert(
2020 VarChecker.getEscapedParameters().begin(),
2021 VarChecker.getEscapedParameters().end());
2022 I->getSecond().EscapedVariableLengthDecls.append(
2023 EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end());
2024 I->getSecond().DelayedVariableLengthDecls.append(
2025 DelayedVariableLengthDecls.begin(), DelayedVariableLengthDecls.end());
2026 DeclToAddrMapTy &
Data = I->getSecond().LocalVarData;
2027 for (
const ValueDecl *VD : VarChecker.getEscapedDecls()) {
2029 Data.insert(std::make_pair(VD, MappedVarData()));
2031 if (!NeedToDelayGlobalization) {
2034 GlobalizationScope() =
default;
2038 .emitGenericVarsEpilog(CGF);
2047 if (VD && VD->
hasAttr<OMPAllocateDeclAttr>()) {
2048 const auto *A = VD->
getAttr<OMPAllocateDeclAttr>();
2050 switch (A->getAllocatorType()) {
2051 case OMPAllocateDeclAttr::OMPNullMemAlloc:
2052 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
2053 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
2054 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
2056 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
2058 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
2061 case OMPAllocateDeclAttr::OMPConstMemAlloc:
2064 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
2067 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
2068 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
2072 auto *GV =
new llvm::GlobalVariable(
2074 llvm::GlobalValue::InternalLinkage, llvm::PoisonValue::get(VarTy),
2076 nullptr, llvm::GlobalValue::NotThreadLocal,
2091 auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
2092 if (I == FunctionGlobalizedDecls.end())
2094 auto VDI = I->getSecond().LocalVarData.find(VD);
2095 if (VDI != I->getSecond().LocalVarData.end())
2096 return VDI->second.PrivateAddr;
2101 auto VDI = I->getSecond().LocalVarData.find(
2102 cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl())
2103 ->getCanonicalDecl());
2104 if (VDI != I->getSecond().LocalVarData.end())
2105 return VDI->second.PrivateAddr;
2113 FunctionGlobalizedDecls.erase(CGF.
CurFn);
2120 llvm::Value *&Chunk)
const {
2123 ScheduleKind = OMPC_DIST_SCHEDULE_static;
2125 RT.getGPUNumThreads(CGF),
2127 S.getIterationVariable()->getType(), S.getBeginLoc());
2131 CGF, S, ScheduleKind, Chunk);
2137 const Expr *&ChunkExpr)
const {
2138 ScheduleKind = OMPC_SCHEDULE_static;
2140 llvm::APInt ChunkSize(32, 1);
2149 " Expected target-based directive.");
2154 if (!
C.capturesVariable())
2156 const VarDecl *VD =
C.getCapturedVar();
2157 const auto *RD = VD->
getType()
2161 if (!RD || !RD->isLambda())
2170 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
2172 RD->getCaptureFields(Captures, ThisCapture);
2182 const ValueDecl *VD = LC.getCapturedVar();
2187 auto It = Captures.find(VD);
2188 assert(It != Captures.end() &&
"Found lambda capture without field.");
2202 if (!VD || !VD->
hasAttr<OMPAllocateDeclAttr>())
2204 const auto *A = VD->
getAttr<OMPAllocateDeclAttr>();
2205 switch(A->getAllocatorType()) {
2206 case OMPAllocateDeclAttr::OMPNullMemAlloc:
2207 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
2209 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
2210 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
2211 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
2212 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
2213 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
2216 case OMPAllocateDeclAttr::OMPConstMemAlloc:
2219 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
2222 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
2223 llvm_unreachable(
"Expected predefined allocator for the variables with the "
2234 if (Feature.getValue()) {
2246 for (
const OMPClause *Clause :
D->clauselists()) {
2247 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
2260 llvm::raw_svector_ostream Out(Buffer);
2262 <<
" does not support unified addressing";
2263 CGM.
Error(Clause->getBeginLoc(), Out.str());
2334 llvm_unreachable(
"Unexpected GPU arch.");
2344 const char *LocSize =
"__kmpc_get_hardware_num_threads_in_block";
2345 llvm::Function *F = M->getFunction(LocSize);
2347 F = llvm::Function::Create(
2348 llvm::FunctionType::get(CGF.
Int32Ty, std::nullopt,
false),
2349 llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.
CGM.
getModule());
2351 return Bld.CreateCall(F, std::nullopt,
"nvptx_num_threads");
2358 CGM.
getModule(), OMPRTL___kmpc_get_hardware_thread_id_in_block),
static void getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl * > &Vars)
Get list of reduction variables from the teams ... directives.
static llvm::Value * castValueToType(CodeGenFunction &CGF, llvm::Value *Val, QualType ValTy, QualType CastTy, SourceLocation Loc)
Cast value to the specified type.
static void getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl * > &Vars)
Get list of lastprivate variables from the teams distribute ... or teams {distribute ....
static bool hasNestedSPMDDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner (nested) SPMD construct, if any.
static bool supportsSPMDExecutionMode(ASTContext &Ctx, const OMPExecutableDirective &D)
static OffloadArch getOffloadArch(CodeGenModule &CGM)
This file defines OpenMP nodes for declarative directives.
This file defines OpenMP AST classes for clauses.
static std::pair< ValueDecl *, bool > getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc, SourceRange &ERange, bool AllowArraySection=false, StringRef DiagType="")
This file defines OpenMP AST classes for executable directives and clauses.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
QualType getUIntPtrType() const
Return a type compatible with "uintptr_t" (C99 7.18.1.4), as defined by the target.
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
const TargetInfo & getTargetInfo() const
QualType getAddrSpaceQualType(QualType T, LangAS AddressSpace) const
Return the uniqued reference to the type for an address space qualified type with the specified type ...
unsigned getTargetAddressSpace(LangAS AS) const
Attr - This represents one attribute.
A class which contains all the information about a particular captured value.
BlockExpr - Adaptor class for mixing a BlockDecl with expressions.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Describes the capture of either a variable, or 'this', or variable-length array type.
This captures a statement into a function.
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
CharUnits - This is an opaque type for sizes expressed in character units.
bool isZero() const
isZero - Test whether the quantity equals zero.
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
CGFunctionInfo - Class to encapsulate the information about a function definition.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits inlined function for the specified OpenMP teams.
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
DataSharingMode
Target codegen is specialized based on two data-sharing modes: CUDA, in which the local variables are...
@ DS_CUDA
CUDA data sharing mode.
@ DS_Generic
Generic data-sharing mode.
void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const override
Choose a default value for the dist_schedule clause.
Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) override
Gets the OpenMP-specific address of the local variable.
void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override
Emits OpenMP-specific function prolog.
void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const override
Choose a default value for the schedule clause.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
This function ought to emit, in the general case, a call to.
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
void getKmpcFreeShared(CodeGenFunction &CGF, const std::pair< llvm::Value *, llvm::Value * > &AddrSizePair) override
Get call to __kmpc_free_shared.
CGOpenMPRuntimeGPU(CodeGenModule &CGM)
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits inlined function for the specified OpenMP parallel.
void functionFinished(CodeGenFunction &CGF) override
Cleans up references to the objects in finished function.
llvm::Value * getGPUThreadID(CodeGenFunction &CGF)
Get the id of the current thread on the GPU.
void processRequiresDirective(const OMPRequiresDecl *D) override
Perform check on requires decl to ensure that target architecture supports unified addressing.
void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args=std::nullopt) const override
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
bool isDelayedVariableLengthDecl(CodeGenFunction &CGF, const VarDecl *VD) const override
Declare generalized virtual functions which need to be defined by all specializations of OpenMPGPURun...
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
ExecutionMode
Defines the execution mode.
@ EM_NonSPMD
Non-SPMD execution mode (1 master thread, others are workers).
@ EM_Unknown
Unknown execution mode (orphaned directive).
@ EM_SPMD
SPMD execution mode (all threads are worker threads).
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
llvm::Value * getGPUNumThreads(CodeGenFunction &CGF)
Get the maximum number of threads in a block of the GPU.
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
std::pair< llvm::Value *, llvm::Value * > getKmpcAllocShared(CodeGenFunction &CGF, const VarDecl *VD) override
Get call to __kmpc_alloc_shared.
bool isGPU() const override
Returns true if the current target is a GPU.
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const override
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Helper to determine the min/max number of threads/teams for D.
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
virtual void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const
Choose default schedule type and chunk value for the dist_schedule clause.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args=std::nullopt) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
CapturedRegionKind getKind() const
bool isCXXThisExprCaptured() const
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
CGCapturedStmtInfo * CapturedStmtInfo
LValue MakeNaturalAlignPointeeRawAddrLValue(llvm::Value *V, QualType T)
Same as MakeNaturalAlignPointeeAddrLValue except that the pointer is known to be unsigned.
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
llvm::Type * ConvertTypeForMem(QualType T)
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
const TargetInfo & getTarget() const
llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Location)
Converts Location to a DebugLoc, if debug information is enabled.
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
CGDebugInfo * getDebugInfo()
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
ASTContext & getContext() const
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
CodeGenTypes & getTypes() const
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
llvm::Value * LoadCXXThis()
LoadCXXThis - Load the value of 'this'.
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
llvm::LLVMContext & getLLVMContext()
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
This class organizes the cross-function state that is used while generating LLVM code.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
const LangOptions & getLangOpts() const
CodeGenTypes & getTypes()
const TargetInfo & getTarget() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
ASTContext & getContext() const
llvm::LLVMContext & getLLVMContext()
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
unsigned getTargetAddressSpace(QualType T) const
llvm::Type * ConvertTypeForMem(QualType T)
ConvertTypeForMem - Convert type T into a llvm::Type.
Information for lazily generating a cleanup.
FunctionArgList - Type for representing both the decl and type of parameters to a function.
LValue - This represents an lvalue references.
Address getAddress() const
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
An abstract representation of an aligned address.
llvm::Value * getPointer() const
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void setAction(PrePostActionTy &Action) const
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
void addDecl(Decl *D)
Add the declaration D into this context.
A reference to a declared variable, function, enum, etc.
DeclStmt - Adaptor class for mixing declarations with statements and expressions.
Decl - This represents one declaration (or definition), e.g.
attr_iterator attr_end() const
bool isCanonicalDecl() const
Whether this particular Decl is a canonical one.
attr_iterator attr_begin() const
SourceLocation getLocation() const
DeclContext * getDeclContext()
SourceLocation getBeginLoc() const LLVM_READONLY
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
SourceLocation getBeginLoc() const LLVM_READONLY
This represents one expression.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
bool isLValue() const
isLValue - True if this expression is an "l-value" according to the rules of the current language.
Represents a member of a struct/union/class.
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
GlobalDecl - represents a global declaration.
ImplicitCastExpr - Allows us to explicitly represent implicit type conversions, which have no direct ...
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Describes the capture of a variable or of this, or of a C++1y init-capture.
A C++ lambda expression, which produces a function object (of unspecified type) that can be invoked l...
std::string OMPHostIRFile
Name of the IR file that contains the result of the OpenMP target host code generation.
IdentifierInfo * getIdentifier() const
Get the identifier that names this declaration, if there is one.
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
This is a basic class for representing single OpenMP clause.
This is a basic class for representing single OpenMP executable directive.
OpenMPDirectiveKind getDirectiveKind() const
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause * > Clauses)
This represents clause 'lastprivate' in the '#pragma omp ...' directives.
This is a common base class for loop directives ('omp simd', 'omp for', 'omp for simd' etc....
This represents clause 'reduction' in the '#pragma omp ...' directives.
This represents '#pragma omp requires...' directive.
This represents 'ompx_bare' clause in the '#pragma omp target teams ...' directive.
static ParmVarDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, StorageClass S, Expr *DefArg)
PointerType - C99 6.7.5.1 - Pointer Declarators.
A (possibly-)qualified type.
LangAS getAddressSpace() const
Return the address space of this type.
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
QualType getCanonicalType() const
A qualifier set is used to build a set of qualifiers.
const Type * strip(QualType type)
Collect any qualifiers on the given type and return an unqualified type.
QualType apply(const ASTContext &Context, QualType QT) const
Apply the collected qualifiers to the given type.
void addAddressSpace(LangAS space)
Represents a struct/union/class.
virtual void completeDefinition()
Note that the definition of this type is now complete.
Scope - A scope is a transient data structure that is used while parsing the program.
Encodes a location in the source.
RetTy Visit(PTR(Stmt) S, ParamTys... P)
Stmt - This represents one statement.
void startDefinition()
Starts the definition of this tag declaration.
unsigned getNewAlign() const
Return the largest alignment for which a suitably-sized allocation with '::operator new(size_t)' is g...
TargetOptions & getTargetOpts() const
Retrieve the target options.
virtual const llvm::omp::GV & getGridValue() const
virtual bool hasFeature(StringRef Feature) const
Determine whether the given target has the given feature.
llvm::StringMap< bool > FeatureMap
The map of which features have been enabled disabled based on the command line.
The base class of the type hierarchy.
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
bool isReferenceType() const
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
bool isLValueReferenceType() const
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
UnaryOperator - This represents the unary-expression's (except sizeof and alignof),...
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
bool isInitCapture() const
Whether this variable is the implicit variable for a lambda init-capture.
Represents a variable declaration or definition.
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
bool isInitCapture() const
Whether this variable is the implicit variable for a lambda init-capture.
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
The JSON file list parser is used to communicate input to InstallAPI.
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
@ ICIS_NoInit
No in-class initializer.
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
bool isOpenMPPrivate(OpenMPClauseKind Kind)
Checks if the specified clause is one of private clauses like 'private', 'firstprivate',...
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
OffloadArch StringToOffloadArch(llvm::StringRef S)
bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of the composite or combined directives that need loop ...
LangAS
Defines the address space values used by the address space qualifier of QualType.
const char * OffloadArchToString(OffloadArch A)
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
LangAS getLangASFromTargetAS(unsigned TargetAS)
@ CXXThis
Parameter for C++ 'this' argument.
@ Other
Other implicit parameter.
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
llvm::PointerType * VoidPtrTy
llvm::IntegerType * SizeTy
llvm::PointerType * VoidPtrPtrTy
llvm::IntegerType * Int32Ty
llvm::PointerType * Int8PtrTy