22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/Frontend/OpenMP/OMPGridValues.h"
26using namespace CodeGen;
27using namespace llvm::omp;
32 llvm::FunctionCallee EnterCallee =
nullptr;
34 llvm::FunctionCallee ExitCallee =
nullptr;
37 llvm::BasicBlock *ContBlock =
nullptr;
40 NVPTXActionTy(llvm::FunctionCallee EnterCallee,
42 llvm::FunctionCallee ExitCallee,
44 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
49 llvm::Value *CallBool = CGF.
Builder.CreateIsNotNull(EnterRes);
53 CGF.
Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
71class ExecutionRuntimeModesRAII {
80 : ExecMode(ExecMode) {
81 SavedExecMode = ExecMode;
84 ~ExecutionRuntimeModesRAII() { ExecMode = SavedExecMode; }
89 if (
const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) {
90 const Expr *
Base = ASE->getBase()->IgnoreParenImpCasts();
91 while (
const auto *TempASE = dyn_cast<ArraySubscriptExpr>(
Base))
92 Base = TempASE->getBase()->IgnoreParenImpCasts();
94 }
else if (
auto *OASE = dyn_cast<ArraySectionExpr>(RefExpr)) {
95 const Expr *
Base = OASE->getBase()->IgnoreParenImpCasts();
96 while (
const auto *TempOASE = dyn_cast<ArraySectionExpr>(
Base))
97 Base = TempOASE->getBase()->IgnoreParenImpCasts();
98 while (
const auto *TempASE = dyn_cast<ArraySubscriptExpr>(
Base))
99 Base = TempASE->getBase()->IgnoreParenImpCasts();
103 if (
const auto *DE = dyn_cast<DeclRefExpr>(RefExpr))
104 return cast<ValueDecl>(DE->getDecl()->getCanonicalDecl());
105 const auto *ME = cast<MemberExpr>(RefExpr);
106 return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());
109static RecordDecl *buildRecordForGlobalizedVars(
112 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
116 if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())
120 GlobalizedVars.emplace_back(
C.getDeclAlign(
D),
D);
121 for (
const ValueDecl *
D : EscapedDeclsForTeams)
122 GlobalizedVars.emplace_back(
C.getDeclAlign(
D),
D);
128 RecordDecl *GlobalizedRD =
C.buildImplicitRecord(
"_globalized_locals_ty");
131 EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end());
132 for (
const auto &Pair : GlobalizedVars) {
136 Type =
C.getPointerType(
Type.getNonReferenceType());
141 if (SingleEscaped.count(VD)) {
156 llvm::APInt ArraySize(32, BufSize);
157 Type =
C.getConstantArrayType(
Type, ArraySize,
nullptr,
158 ArraySizeModifier::Normal, 0);
166 llvm::APInt Align(32, Pair.first.getQuantity());
167 Field->addAttr(AlignedAttr::CreateImplicit(
170 C.getIntTypeForBitwidth(32, 0),
172 {}, AlignedAttr::GNU_aligned));
175 MappedDeclsFields.try_emplace(VD, Field);
182class CheckVarsEscapingDeclContext final
185 llvm::SetVector<const ValueDecl *> EscapedDecls;
186 llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls;
187 llvm::SetVector<const ValueDecl *> DelayedVariableLengthDecls;
190 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
191 bool AllEscaped =
false;
192 bool IsForCombinedParallelRegion =
false;
194 void markAsEscaped(
const ValueDecl *VD) {
196 if (!isa<VarDecl>(VD) ||
197 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
204 bool IsCaptured =
false;
205 if (
auto *CSI = CGF.CapturedStmtInfo) {
206 if (
const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {
210 if (!IsForCombinedParallelRegion) {
213 const auto *
Attr = FD->getAttr<OMPCaptureKindAttr>();
216 if (((
Attr->getCaptureKind() != OMPC_map) &&
218 ((
Attr->getCaptureKind() == OMPC_map) &&
219 !FD->getType()->isAnyPointerType()))
222 if (!FD->getType()->isReferenceType()) {
224 "Parameter captured by value with variably modified type");
225 EscapedParameters.insert(VD);
226 }
else if (!IsForCombinedParallelRegion) {
231 if ((!CGF.CapturedStmtInfo ||
232 (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&
240 EscapedVariableLengthDecls.insert(VD);
242 DelayedVariableLengthDecls.insert(VD);
244 EscapedDecls.insert(VD);
247 void VisitValueDecl(
const ValueDecl *VD) {
250 if (
const auto *VarD = dyn_cast<VarDecl>(VD)) {
251 if (!isa<ParmVarDecl>(VarD) && VarD->hasInit()) {
252 const bool SavedAllEscaped = AllEscaped;
254 Visit(VarD->getInit());
255 AllEscaped = SavedAllEscaped;
261 bool IsCombinedParallelRegion) {
265 if (
C.capturesVariable() && !
C.capturesVariableByCopy()) {
267 bool SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion;
268 if (IsCombinedParallelRegion) {
272 IsForCombinedParallelRegion =
false;
275 C->getClauseKind() == OMPC_reduction ||
276 C->getClauseKind() == OMPC_linear ||
277 C->getClauseKind() == OMPC_private)
280 if (
const auto *PC = dyn_cast<OMPFirstprivateClause>(
C))
281 Vars = PC->getVarRefs();
282 else if (
const auto *PC = dyn_cast<OMPLastprivateClause>(
C))
283 Vars = PC->getVarRefs();
285 llvm_unreachable(
"Unexpected clause.");
286 for (
const auto *
E : Vars) {
290 IsForCombinedParallelRegion =
true;
294 if (IsForCombinedParallelRegion)
299 if (isa<OMPCapturedExprDecl>(VD))
301 IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion;
306 void buildRecordForGlobalizedVars(
bool IsInTTDRegion) {
307 assert(!GlobalizedRD &&
308 "Record for globalized variables is built already.");
310 unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
312 EscapedDeclsForTeams = EscapedDecls.getArrayRef();
314 EscapedDeclsForParallel = EscapedDecls.getArrayRef();
315 GlobalizedRD = ::buildRecordForGlobalizedVars(
316 CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,
317 MappedDeclsFields, WarpSize);
323 : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {
325 virtual ~CheckVarsEscapingDeclContext() =
default;
326 void VisitDeclStmt(
const DeclStmt *S) {
329 for (
const Decl *
D : S->decls())
330 if (
const auto *VD = dyn_cast_or_null<ValueDecl>(
D))
336 if (!
D->hasAssociatedStmt())
339 dyn_cast_or_null<CapturedStmt>(
D->getAssociatedStmt())) {
344 if (CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown) {
345 VisitStmt(S->getCapturedStmt());
348 VisitOpenMPCapturedStmt(
350 CaptureRegions.back() == OMPD_parallel &&
358 if (
C.capturesVariable() && !
C.capturesVariableByCopy()) {
361 if (isa<OMPCapturedExprDecl>(VD))
370 if (
C.capturesVariable()) {
374 if (
E->isInitCapture(&
C) || isa<OMPCapturedExprDecl>(VD))
385 const VarDecl *VD =
C.getVariable();
395 for (
const Expr *Arg :
E->arguments()) {
398 if (Arg->isLValue()) {
399 const bool SavedAllEscaped = AllEscaped;
402 AllEscaped = SavedAllEscaped;
415 if (isa<OMPCapturedExprDecl>(VD))
423 if (
E->getOpcode() == UO_AddrOf) {
424 const bool SavedAllEscaped = AllEscaped;
427 AllEscaped = SavedAllEscaped;
435 if (
E->getCastKind() == CK_ArrayToPointerDecay) {
436 const bool SavedAllEscaped = AllEscaped;
439 AllEscaped = SavedAllEscaped;
444 void VisitExpr(
const Expr *
E) {
447 bool SavedAllEscaped = AllEscaped;
453 AllEscaped = SavedAllEscaped;
455 void VisitStmt(
const Stmt *S) {
458 for (
const Stmt *Child : S->children())
465 const RecordDecl *getGlobalizedRecord(
bool IsInTTDRegion) {
467 buildRecordForGlobalizedVars(IsInTTDRegion);
473 assert(GlobalizedRD &&
474 "Record for globalized variables must be generated already.");
475 return MappedDeclsFields.lookup(VD);
480 return EscapedDecls.getArrayRef();
485 const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters()
const {
486 return EscapedParameters;
492 return EscapedVariableLengthDecls.getArrayRef();
498 return DelayedVariableLengthDecls.getArrayRef();
504CGOpenMPRuntimeGPU::getExecutionMode()
const {
505 return CurrentExecutionMode;
509CGOpenMPRuntimeGPU::getDataSharingMode()
const {
510 return CurrentDataSharingMode;
516 const auto *CS =
D.getInnermostCapturedStmt();
518 CS->getCapturedStmt()->IgnoreContainers(
true);
521 if (
const auto *NestedDir =
522 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
524 switch (
D.getDirectiveKind()) {
528 if (DKind == OMPD_teams) {
529 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
534 if (
const auto *NND =
535 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
536 DKind = NND->getDirectiveKind();
542 case OMPD_target_teams:
544 case OMPD_target_simd:
545 case OMPD_target_parallel:
546 case OMPD_target_parallel_for:
547 case OMPD_target_parallel_for_simd:
548 case OMPD_target_teams_distribute:
549 case OMPD_target_teams_distribute_simd:
550 case OMPD_target_teams_distribute_parallel_for:
551 case OMPD_target_teams_distribute_parallel_for_simd:
554 case OMPD_parallel_for:
555 case OMPD_parallel_master:
556 case OMPD_parallel_sections:
558 case OMPD_parallel_for_simd:
560 case OMPD_cancellation_point:
562 case OMPD_threadprivate:
580 case OMPD_target_data:
581 case OMPD_target_exit_data:
582 case OMPD_target_enter_data:
583 case OMPD_distribute:
584 case OMPD_distribute_simd:
585 case OMPD_distribute_parallel_for:
586 case OMPD_distribute_parallel_for_simd:
587 case OMPD_teams_distribute:
588 case OMPD_teams_distribute_simd:
589 case OMPD_teams_distribute_parallel_for:
590 case OMPD_teams_distribute_parallel_for_simd:
591 case OMPD_target_update:
592 case OMPD_declare_simd:
593 case OMPD_declare_variant:
594 case OMPD_begin_declare_variant:
595 case OMPD_end_declare_variant:
596 case OMPD_declare_target:
597 case OMPD_end_declare_target:
598 case OMPD_declare_reduction:
599 case OMPD_declare_mapper:
601 case OMPD_taskloop_simd:
602 case OMPD_master_taskloop:
603 case OMPD_master_taskloop_simd:
604 case OMPD_parallel_master_taskloop:
605 case OMPD_parallel_master_taskloop_simd:
609 llvm_unreachable(
"Unexpected directive.");
619 switch (DirectiveKind) {
621 case OMPD_target_teams:
623 case OMPD_target_parallel_loop:
624 case OMPD_target_parallel:
625 case OMPD_target_parallel_for:
626 case OMPD_target_parallel_for_simd:
627 case OMPD_target_teams_distribute_parallel_for:
628 case OMPD_target_teams_distribute_parallel_for_simd:
629 case OMPD_target_simd:
630 case OMPD_target_teams_distribute_simd:
632 case OMPD_target_teams_distribute:
634 case OMPD_target_teams_loop:
637 if (
auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(&
D))
638 return TTLD->canBeParallelFor();
642 case OMPD_parallel_for:
643 case OMPD_parallel_master:
644 case OMPD_parallel_sections:
646 case OMPD_parallel_for_simd:
648 case OMPD_cancellation_point:
650 case OMPD_threadprivate:
668 case OMPD_target_data:
669 case OMPD_target_exit_data:
670 case OMPD_target_enter_data:
671 case OMPD_distribute:
672 case OMPD_distribute_simd:
673 case OMPD_distribute_parallel_for:
674 case OMPD_distribute_parallel_for_simd:
675 case OMPD_teams_distribute:
676 case OMPD_teams_distribute_simd:
677 case OMPD_teams_distribute_parallel_for:
678 case OMPD_teams_distribute_parallel_for_simd:
679 case OMPD_target_update:
680 case OMPD_declare_simd:
681 case OMPD_declare_variant:
682 case OMPD_begin_declare_variant:
683 case OMPD_end_declare_variant:
684 case OMPD_declare_target:
685 case OMPD_end_declare_target:
686 case OMPD_declare_reduction:
687 case OMPD_declare_mapper:
689 case OMPD_taskloop_simd:
690 case OMPD_master_taskloop:
691 case OMPD_master_taskloop_simd:
692 case OMPD_parallel_master_taskloop:
693 case OMPD_parallel_master_taskloop_simd:
700 "Unknown programming model for OpenMP directive on NVPTX target.");
704 StringRef ParentName,
705 llvm::Function *&OutlinedFn,
706 llvm::Constant *&OutlinedFnID,
709 ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode,
EM_NonSPMD);
710 EntryFunctionState EST;
711 WrapperFunctionsMap.clear();
713 [[maybe_unused]]
bool IsBareKernel =
D.getSingleClause<
OMPXBareClause>();
714 assert(!IsBareKernel &&
"bare kernel should not be at generic mode");
718 CGOpenMPRuntimeGPU::EntryFunctionState &EST;
722 NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST,
727 RT.emitKernelInit(
D, CGF, EST,
false);
729 RT.setLocThreadIdInsertPt(CGF,
true);
734 RT.emitKernelDeinit(CGF, EST,
false);
738 IsInTTDRegion =
true;
740 IsOffloadEntry, CodeGen);
741 IsInTTDRegion =
false;
746 EntryFunctionState &EST,
bool IsSPMD) {
747 int32_t MinThreadsVal = 1, MaxThreadsVal = -1, MinTeamsVal = 1,
750 MinTeamsVal, MaxTeamsVal);
754 Bld, IsSPMD, MinThreadsVal, MaxThreadsVal, MinTeamsVal, MaxTeamsVal));
756 emitGenericVarsProlog(CGF, EST.Loc);
760 EntryFunctionState &EST,
763 emitGenericVarsEpilog(CGF);
768 "_openmp_teams_reduction_type_$_", RecordDecl::TagKind::Union);
770 for (
const RecordDecl *TeamReductionRec : TeamsReductions) {
771 QualType RecTy =
C.getRecordType(TeamReductionRec);
781 QualType StaticTy =
C.getRecordType(StaticRD);
782 llvm::Type *LLVMReductionsBufferTy =
786 TeamsReductions.empty()
788 : DL.getTypeAllocSize(LLVMReductionsBufferTy).getFixedValue();
790 OMPBuilder.createTargetDeinit(Bld, ReductionDataSize,
791 C.getLangOpts().OpenMPCUDAReductionBufNum);
792 TeamsReductions.clear();
796 StringRef ParentName,
797 llvm::Function *&OutlinedFn,
798 llvm::Constant *&OutlinedFnID,
801 ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode,
EM_SPMD);
802 EntryFunctionState EST;
809 CGOpenMPRuntimeGPU::EntryFunctionState &EST;
811 DataSharingMode Mode;
816 CGOpenMPRuntimeGPU::EntryFunctionState &EST,
818 : RT(RT), EST(EST), IsBareKernel(IsBareKernel),
819 Mode(RT.CurrentDataSharingMode),
D(
D) {}
822 RT.CurrentDataSharingMode = DataSharingMode::DS_CUDA;
825 RT.emitKernelInit(
D, CGF, EST,
true);
827 RT.setLocThreadIdInsertPt(CGF,
true);
831 RT.CurrentDataSharingMode = Mode;
834 RT.clearLocThreadIdInsertPt(CGF);
835 RT.emitKernelDeinit(CGF, EST,
true);
837 } Action(*
this, EST, IsBareKernel,
D);
839 IsInTTDRegion =
true;
841 IsOffloadEntry, CodeGen);
842 IsInTTDRegion =
false;
845void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
847 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
852 assert(!ParentName.empty() &&
"Invalid target region parent name!");
856 if (Mode || IsBareKernel)
857 emitSPMDKernel(
D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
860 emitNonSPMDKernel(
D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
866 llvm::OpenMPIRBuilderConfig Config(
874 llvm_unreachable(
"OpenMP can only handle device code.");
884 "__omp_rtl_debug_kind");
886 "__omp_rtl_assume_teams_oversubscription");
888 "__omp_rtl_assume_threads_oversubscription");
890 "__omp_rtl_assume_no_thread_state");
892 "__omp_rtl_assume_no_nested_parallelism");
896 ProcBindKind ProcBind,
902 llvm::Value *NumThreads,
908 const Expr *NumTeams,
909 const Expr *ThreadLimit,
917 bool PrevIsInTTDRegion = IsInTTDRegion;
918 IsInTTDRegion =
false;
921 CGF,
D, ThreadIDVar, InnermostKind, CodeGen));
922 IsInTTDRegion = PrevIsInTTDRegion;
924 llvm::Function *WrapperFun =
925 createParallelDataSharingWrapper(OutlinedFun,
D);
926 WrapperFunctionsMap[OutlinedFun] = WrapperFun;
938 "expected teams directive.");
943 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(
945 Dir = dyn_cast_or_null<OMPExecutableDirective>(S);
953 for (
const Expr *
E :
C->getVarRefs())
963 "expected teams directive.");
965 for (
const Expr *
E :
C->privates())
978 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
985 if (!LastPrivatesReductions.empty()) {
986 GlobalizedRD = ::buildRecordForGlobalizedVars(
987 CGM.
getContext(), {}, LastPrivatesReductions, MappedDeclsFields,
990 }
else if (!LastPrivatesReductions.empty()) {
991 assert(!TeamAndReductions.first &&
992 "Previous team declaration is not expected.");
993 TeamAndReductions.first =
D.getCapturedStmt(OMPD_teams)->getCapturedDecl();
994 std::swap(TeamAndReductions.second, LastPrivatesReductions);
1001 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
1005 NVPTXPrePostActionTy(
1007 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
1009 :
Loc(
Loc), GlobalizedRD(GlobalizedRD),
1010 MappedDeclsFields(MappedDeclsFields) {}
1015 auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.
CurFn).first;
1016 I->getSecond().MappedParams =
1017 std::make_unique<CodeGenFunction::OMPMapVars>();
1018 DeclToAddrMapTy &
Data = I->getSecond().LocalVarData;
1019 for (
const auto &Pair : MappedDeclsFields) {
1020 assert(Pair.getFirst()->isCanonicalDecl() &&
1021 "Expected canonical declaration");
1022 Data.insert(std::make_pair(Pair.getFirst(), MappedVarData()));
1025 Rt.emitGenericVarsProlog(CGF,
Loc);
1029 .emitGenericVarsEpilog(CGF);
1031 } Action(
Loc, GlobalizedRD, MappedDeclsFields);
1034 CGF,
D, ThreadIDVar, InnermostKind, CodeGen);
1046 const auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
1047 if (I == FunctionGlobalizedDecls.end())
1050 for (
auto &Rec : I->getSecond().LocalVarData) {
1051 const auto *VD = cast<VarDecl>(Rec.first);
1052 bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
1056 llvm::Value *ParValue;
1065 llvm::CallBase *VoidPtr =
1070 VoidPtr->addRetAttr(llvm::Attribute::get(
1076 VoidPtr, Bld.getPtrTy(0), VD->
getName() +
"_on_stack");
1079 Rec.second.PrivateAddr = VarAddr.
getAddress();
1080 Rec.second.GlobalizedVal = VoidPtr;
1085 I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.
getAddress());
1088 VoidPtr->setDebugLoc(DI->SourceLocToDebugLoc(VD->
getLocation()));
1091 for (
const auto *ValueD : I->getSecond().EscapedVariableLengthDecls) {
1092 const auto *VD = cast<VarDecl>(ValueD);
1093 std::pair<llvm::Value *, llvm::Value *> AddrSizePair =
1095 I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(AddrSizePair);
1099 I->getSecond().MappedParams->setVarAddr(CGF, VD,
Base.getAddress());
1101 I->getSecond().MappedParams->apply(CGF);
1106 const auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
1107 if (I == FunctionGlobalizedDecls.end())
1111 return llvm::is_contained(I->getSecond().DelayedVariableLengthDecls, VD);
1114std::pair<llvm::Value *, llvm::Value *>
1122 Size = Bld.CreateNUWAdd(
1124 llvm::Value *AlignVal =
1126 Size = Bld.CreateUDiv(Size, AlignVal);
1127 Size = Bld.CreateNUWMul(Size, AlignVal);
1130 llvm::Value *AllocArgs[] = {Size};
1131 llvm::CallBase *VoidPtr =
1135 VoidPtr->addRetAttr(llvm::Attribute::get(
1138 return std::make_pair(VoidPtr, Size);
1143 const std::pair<llvm::Value *, llvm::Value *> &AddrSizePair) {
1147 {AddrSizePair.first, AddrSizePair.second});
1154 const auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
1155 if (I != FunctionGlobalizedDecls.end()) {
1158 for (
const auto &AddrSizePair :
1159 llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {
1162 {AddrSizePair.first, AddrSizePair.second});
1165 for (
auto &Rec : llvm::reverse(I->getSecond().LocalVarData)) {
1166 const auto *VD = cast<VarDecl>(Rec.first);
1167 I->getSecond().MappedParams->restore(CGF);
1169 llvm::Value *FreeArgs[] = {Rec.second.GlobalizedVal,
1181 llvm::Function *OutlinedFn,
1195 OutlinedFnArgs.push_back(llvm::ConstantPointerNull::get(
CGM.
VoidPtrTy));
1198 OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1199 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1205 llvm::Function *OutlinedFn,
1208 llvm::Value *NumThreads) {
1212 auto &&ParallelGen = [
this,
Loc, OutlinedFn, CapturedVars, IfCond,
1216 llvm::Value *NumThreadsVal = NumThreads;
1217 llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn];
1218 llvm::Value *ID = llvm::ConstantPointerNull::get(
CGM.
Int8PtrTy);
1221 llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn,
CGM.
Int8PtrTy);
1229 llvm::ArrayType::get(
CGM.
VoidPtrTy, CapturedVars.size()),
1230 "captured_vars_addrs");
1232 if (!CapturedVars.empty()) {
1236 for (llvm::Value *
V : CapturedVars) {
1239 if (
V->getType()->isIntegerTy())
1249 llvm::Value *IfCondVal =
nullptr;
1254 IfCondVal = llvm::ConstantInt::get(CGF.
Int32Ty, 1);
1257 NumThreadsVal = llvm::ConstantInt::get(CGF.
Int32Ty, -1);
1259 NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.
Int32Ty),
1261 assert(IfCondVal &&
"Expected a value");
1263 llvm::Value *Args[] = {
1268 llvm::ConstantInt::get(CGF.
Int32Ty, -1),
1271 Bld.CreateBitOrPointerCast(CapturedVarsAddrs.
emitRawPointer(CGF),
1273 llvm::ConstantInt::get(
CGM.
SizeTy, CapturedVars.size())};
1289 llvm::Value *Args[] = {
1290 llvm::ConstantPointerNull::get(
1292 llvm::ConstantInt::get(CGF.
Int32Ty, 0,
true)};
1329 CGM.
getModule(), OMPRTL___kmpc_warp_active_thread_mask));
1331 llvm::Value *ThreadID = RT.getGPUThreadID(CGF);
1334 llvm::Value *TeamWidth = RT.getGPUNumThreads(CGF);
1347 llvm::Value *CmpLoopBound = CGF.
Builder.CreateICmpSLT(CounterVal, TeamWidth);
1348 CGF.
Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB);
1354 llvm::Value *CmpThreadToCounter =
1355 CGF.
Builder.CreateICmpEQ(ThreadID, CounterVal);
1356 CGF.
Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB);
1375 llvm::Value *IncCounterVal =
1389 "Cast type must sized.");
1391 "Val type must sized.");
1393 if (ValTy == CastTy)
1397 return CGF.
Builder.CreateBitCast(Val, LLVMCastTy);
1399 return CGF.
Builder.CreateIntCast(Val, LLVMCastTy,
1666 if (Options.SimpleReduction) {
1667 assert(!TeamsReduction && !ParallelReduction &&
1668 "Invalid reduction selection in emitReduction.");
1669 (void)ParallelReduction;
1671 ReductionOps, Options);
1675 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> VarFieldMap;
1678 for (
const Expr *DRE : Privates) {
1679 PrivatesReductions[Cnt] = cast<DeclRefExpr>(DRE)->getDecl();
1682 const RecordDecl *ReductionRec = ::buildRecordForGlobalizedVars(
1686 TeamsReductions.push_back(ReductionRec);
1691 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1694 InsertPointTy CodeGenIP(CGF.
Builder.GetInsertBlock(),
1695 CGF.
Builder.GetInsertPoint());
1696 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(
1703 llvm::Type *ElementType;
1705 llvm::Value *PrivateVariable;
1706 llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy AtomicReductionGen =
nullptr;
1708 const auto *RHSVar =
1709 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[Idx])->getDecl());
1711 const auto *LHSVar =
1712 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[Idx])->getDecl());
1714 llvm::OpenMPIRBuilder::EvalKind EvalKind;
1717 EvalKind = llvm::OpenMPIRBuilder::EvalKind::Scalar;
1720 EvalKind = llvm::OpenMPIRBuilder::EvalKind::Complex;
1723 EvalKind = llvm::OpenMPIRBuilder::EvalKind::Aggregate;
1726 auto ReductionGen = [&](InsertPointTy CodeGenIP,
unsigned I,
1727 llvm::Value **LHSPtr, llvm::Value **RHSPtr,
1728 llvm::Function *NewFunc) {
1729 CGF.
Builder.restoreIP(CodeGenIP);
1730 auto *CurFn = CGF.
CurFn;
1731 CGF.
CurFn = NewFunc;
1734 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()))
1735 .emitRawPointer(CGF);
1737 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()))
1738 .emitRawPointer(CGF);
1741 cast<DeclRefExpr>(LHSExprs[I]),
1742 cast<DeclRefExpr>(RHSExprs[I]));
1746 return InsertPointTy(CGF.
Builder.GetInsertBlock(),
1747 CGF.
Builder.GetInsertPoint());
1749 ReductionInfos.emplace_back(llvm::OpenMPIRBuilder::ReductionInfo(
1750 ElementType,
Variable, PrivateVariable, EvalKind,
1751 nullptr, ReductionGen, AtomicReductionGen));
1755 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
1757 OmpLoc, AllocaIP, CodeGenIP, ReductionInfos,
false, TeamsReduction,
1758 DistributeReduction, llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang,
1760 C.getLangOpts().OpenMPCUDAReductionBufNum, RTLoc);
1761 assert(AfterIP &&
"unexpected error creating GPU reductions");
1762 CGF.
Builder.restoreIP(*AfterIP);
1768 const VarDecl *NativeParam)
const {
1773 const Type *NonQualTy = QC.
strip(ArgType);
1775 if (
const auto *
Attr = FD->
getAttr<OMPCaptureKindAttr>()) {
1776 if (
Attr->getCaptureKind() == OMPC_map) {
1783 enum { NVPTX_local_addr = 5 };
1786 if (isa<ImplicitParamDecl>(NativeParam))
1801 const VarDecl *TargetParam)
const {
1802 assert(NativeParam != TargetParam &&
1804 "Native arg must not be the same as target arg.");
1808 const Type *NonQualTy = QC.
strip(NativeParamType);
1810 unsigned NativePointeeAddrSpace =
1818 llvm::PointerType::get(CGF.
getLLVMContext(), NativePointeeAddrSpace));
1822 return NativeParamAddr;
1829 TargetArgs.reserve(Args.size());
1830 auto *FnType = OutlinedFn.getFunctionType();
1831 for (
unsigned I = 0,
E = Args.size(); I <
E; ++I) {
1832 if (FnType->isVarArg() && FnType->getNumParams() <= I) {
1833 TargetArgs.append(std::next(Args.begin(), I), Args.end());
1836 llvm::Type *TargetType = FnType->getParamType(I);
1837 llvm::Value *NativeArg = Args[I];
1838 if (!TargetType->isPointerTy()) {
1839 TargetArgs.emplace_back(NativeArg);
1842 TargetArgs.emplace_back(
1852llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(
1855 const auto &CS = *
D.getCapturedStmt(OMPD_parallel);
1869 WrapperArgs.emplace_back(&ParallelLevelArg);
1870 WrapperArgs.emplace_back(&WrapperArg);
1875 auto *Fn = llvm::Function::Create(
1877 Twine(OutlinedParallelFn->getName(),
"_wrapper"), &
CGM.
getModule());
1885 Fn->addFnAttr(llvm::Attribute::NoInline);
1888 Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
1889 Fn->setDoesNotRecurse();
1895 const auto *RD = CS.getCapturedRecordDecl();
1896 auto CurField = RD->field_begin();
1908 auto CI = CS.capture_begin();
1914 llvm::Value *GlobalArgsPtr = GlobalArgs.
getPointer();
1915 llvm::Value *DataSharingArgs[] = {GlobalArgsPtr};
1923 if (CS.capture_size() > 0 ||
1934 Src, Bld.getPtrTy(0), CGF.
SizeTy);
1939 cast<OMPLoopDirective>(
D).getLowerBoundVariable()->getExprLoc());
1940 Args.emplace_back(LB);
1949 cast<OMPLoopDirective>(
D).getUpperBoundVariable()->getExprLoc());
1950 Args.emplace_back(UB);
1953 if (CS.capture_size() > 0) {
1955 for (
unsigned I = 0,
E = CS.capture_size(); I <
E; ++I, ++CI, ++CurField) {
1956 QualType ElemTy = CurField->getType();
1965 if (CI->capturesVariableByCopy() &&
1966 !CI->getCapturedVar()->getType()->isAnyPointerType()) {
1970 Args.emplace_back(Arg);
1984 assert(
D &&
"Expected function or captured|block decl.");
1985 assert(FunctionGlobalizedDecls.count(CGF.
CurFn) == 0 &&
1986 "Function is registered already.");
1987 assert((!TeamAndReductions.first || TeamAndReductions.first ==
D) &&
1988 "Team is set but not processed.");
1989 const Stmt *Body =
nullptr;
1990 bool NeedToDelayGlobalization =
false;
1991 if (
const auto *FD = dyn_cast<FunctionDecl>(
D)) {
1992 Body = FD->getBody();
1993 }
else if (
const auto *BD = dyn_cast<BlockDecl>(
D)) {
1994 Body = BD->getBody();
1995 }
else if (
const auto *CD = dyn_cast<CapturedDecl>(
D)) {
1996 Body = CD->getBody();
1998 if (NeedToDelayGlobalization &&
2004 CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);
2005 VarChecker.Visit(Body);
2007 VarChecker.getGlobalizedRecord(IsInTTDRegion);
2008 TeamAndReductions.first =
nullptr;
2009 TeamAndReductions.second.clear();
2011 VarChecker.getEscapedVariableLengthDecls();
2013 VarChecker.getDelayedVariableLengthDecls();
2014 if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty() &&
2015 DelayedVariableLengthDecls.empty())
2017 auto I = FunctionGlobalizedDecls.try_emplace(CGF.
CurFn).first;
2018 I->getSecond().MappedParams =
2019 std::make_unique<CodeGenFunction::OMPMapVars>();
2020 I->getSecond().EscapedParameters.insert(
2021 VarChecker.getEscapedParameters().begin(),
2022 VarChecker.getEscapedParameters().end());
2023 I->getSecond().EscapedVariableLengthDecls.append(
2024 EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end());
2025 I->getSecond().DelayedVariableLengthDecls.append(
2026 DelayedVariableLengthDecls.begin(), DelayedVariableLengthDecls.end());
2027 DeclToAddrMapTy &
Data = I->getSecond().LocalVarData;
2028 for (
const ValueDecl *VD : VarChecker.getEscapedDecls()) {
2030 Data.insert(std::make_pair(VD, MappedVarData()));
2032 if (!NeedToDelayGlobalization) {
2035 GlobalizationScope() =
default;
2039 .emitGenericVarsEpilog(CGF);
2048 if (VD && VD->
hasAttr<OMPAllocateDeclAttr>()) {
2049 const auto *A = VD->
getAttr<OMPAllocateDeclAttr>();
2051 switch (A->getAllocatorType()) {
2052 case OMPAllocateDeclAttr::OMPNullMemAlloc:
2053 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
2054 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
2055 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
2057 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
2059 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
2062 case OMPAllocateDeclAttr::OMPConstMemAlloc:
2065 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
2068 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
2069 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
2073 auto *GV =
new llvm::GlobalVariable(
2075 llvm::GlobalValue::InternalLinkage, llvm::PoisonValue::get(VarTy),
2077 nullptr, llvm::GlobalValue::NotThreadLocal,
2092 auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
2093 if (I == FunctionGlobalizedDecls.end())
2095 auto VDI = I->getSecond().LocalVarData.find(VD);
2096 if (VDI != I->getSecond().LocalVarData.end())
2097 return VDI->second.PrivateAddr;
2102 auto VDI = I->getSecond().LocalVarData.find(
2103 cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl())
2104 ->getCanonicalDecl());
2105 if (VDI != I->getSecond().LocalVarData.end())
2106 return VDI->second.PrivateAddr;
2114 FunctionGlobalizedDecls.erase(CGF.
CurFn);
2121 llvm::Value *&Chunk)
const {
2124 ScheduleKind = OMPC_DIST_SCHEDULE_static;
2126 RT.getGPUNumThreads(CGF),
2128 S.getIterationVariable()->getType(), S.getBeginLoc());
2132 CGF, S, ScheduleKind, Chunk);
2138 const Expr *&ChunkExpr)
const {
2139 ScheduleKind = OMPC_SCHEDULE_static;
2141 llvm::APInt ChunkSize(32, 1);
2150 " Expected target-based directive.");
2155 if (!
C.capturesVariable())
2157 const VarDecl *VD =
C.getCapturedVar();
2158 const auto *RD = VD->
getType()
2162 if (!RD || !RD->isLambda())
2171 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
2173 RD->getCaptureFields(Captures, ThisCapture);
2183 const ValueDecl *VD = LC.getCapturedVar();
2188 auto It = Captures.find(VD);
2189 assert(It != Captures.end() &&
"Found lambda capture without field.");
2203 if (!VD || !VD->
hasAttr<OMPAllocateDeclAttr>())
2205 const auto *A = VD->
getAttr<OMPAllocateDeclAttr>();
2206 switch(A->getAllocatorType()) {
2207 case OMPAllocateDeclAttr::OMPNullMemAlloc:
2208 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
2210 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
2211 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
2212 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
2213 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
2214 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
2217 case OMPAllocateDeclAttr::OMPConstMemAlloc:
2220 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
2223 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
2224 llvm_unreachable(
"Expected predefined allocator for the variables with the "
2235 if (Feature.getValue()) {
2247 for (
const OMPClause *Clause :
D->clauselists()) {
2248 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
2261 llvm::raw_svector_ostream Out(Buffer);
2263 <<
" does not support unified addressing";
2264 CGM.
Error(Clause->getBeginLoc(), Out.str());
2339 llvm_unreachable(
"Unexpected GPU arch.");
2349 const char *LocSize =
"__kmpc_get_hardware_num_threads_in_block";
2350 llvm::Function *F = M->getFunction(LocSize);
2352 F = llvm::Function::Create(llvm::FunctionType::get(CGF.
Int32Ty, {},
false),
2353 llvm::GlobalVariable::ExternalLinkage, LocSize,
2356 return Bld.CreateCall(F, {},
"nvptx_num_threads");
2363 CGM.
getModule(), OMPRTL___kmpc_get_hardware_thread_id_in_block),
static void getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl * > &Vars)
Get list of reduction variables from the teams ... directives.
static llvm::Value * castValueToType(CodeGenFunction &CGF, llvm::Value *Val, QualType ValTy, QualType CastTy, SourceLocation Loc)
Cast value to the specified type.
static void getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl * > &Vars)
Get list of lastprivate variables from the teams distribute ... or teams {distribute ....
static bool hasNestedSPMDDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner (nested) SPMD construct, if any.
static bool supportsSPMDExecutionMode(ASTContext &Ctx, const OMPExecutableDirective &D)
static OffloadArch getOffloadArch(CodeGenModule &CGM)
This file defines OpenMP nodes for declarative directives.
This file defines OpenMP AST classes for clauses.
static std::pair< ValueDecl *, bool > getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc, SourceRange &ERange, bool AllowArraySection=false, StringRef DiagType="")
This file defines OpenMP AST classes for executable directives and clauses.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
QualType getUIntPtrType() const
Return a type compatible with "uintptr_t" (C99 7.18.1.4), as defined by the target.
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
const TargetInfo & getTargetInfo() const
QualType getAddrSpaceQualType(QualType T, LangAS AddressSpace) const
Return the uniqued reference to the type for an address space qualified type with the specified type ...
unsigned getTargetAddressSpace(LangAS AS) const
Attr - This represents one attribute.
A class which contains all the information about a particular captured value.
BlockExpr - Adaptor class for mixing a BlockDecl with expressions.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Describes the capture of either a variable, or 'this', or variable-length array type.
This captures a statement into a function.
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
CharUnits - This is an opaque type for sizes expressed in character units.
bool isZero() const
isZero - Test whether the quantity equals zero.
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
CGFunctionInfo - Class to encapsulate the information about a function definition.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits inlined function for the specified OpenMP teams.
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
DataSharingMode
Target codegen is specialized based on two data-sharing modes: CUDA, in which the local variables are...
@ DS_CUDA
CUDA data sharing mode.
@ DS_Generic
Generic data-sharing mode.
void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const override
Choose a default value for the dist_schedule clause.
Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) override
Gets the OpenMP-specific address of the local variable.
void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override
Emits OpenMP-specific function prolog.
void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const override
Choose a default value for the schedule clause.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
This function ought to emit, in the general case, a call to.
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
void getKmpcFreeShared(CodeGenFunction &CGF, const std::pair< llvm::Value *, llvm::Value * > &AddrSizePair) override
Get call to __kmpc_free_shared.
CGOpenMPRuntimeGPU(CodeGenModule &CGM)
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits inlined function for the specified OpenMP parallel.
void functionFinished(CodeGenFunction &CGF) override
Cleans up references to the objects in finished function.
llvm::Value * getGPUThreadID(CodeGenFunction &CGF)
Get the id of the current thread on the GPU.
void processRequiresDirective(const OMPRequiresDecl *D) override
Perform check on requires decl to ensure that target architecture supports unified addressing.
bool isDelayedVariableLengthDecl(CodeGenFunction &CGF, const VarDecl *VD) const override
Declare generalized virtual functions which need to be defined by all specializations of OpenMPGPURun...
void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const override
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
ExecutionMode
Defines the execution mode.
@ EM_NonSPMD
Non-SPMD execution mode (1 master thread, others are workers).
@ EM_Unknown
Unknown execution mode (orphaned directive).
@ EM_SPMD
SPMD execution mode (all threads are worker threads).
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
llvm::Value * getGPUNumThreads(CodeGenFunction &CGF)
Get the maximum number of threads in a block of the GPU.
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
std::pair< llvm::Value *, llvm::Value * > getKmpcAllocShared(CodeGenFunction &CGF, const VarDecl *VD) override
Get call to __kmpc_alloc_shared.
bool isGPU() const override
Returns true if the current target is a GPU.
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const override
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Helper to determine the min/max number of threads/teams for D.
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
virtual void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const
Choose default schedule type and chunk value for the dist_schedule clause.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
CapturedRegionKind getKind() const
bool isCXXThisExprCaptured() const
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
CGCapturedStmtInfo * CapturedStmtInfo
LValue MakeNaturalAlignPointeeRawAddrLValue(llvm::Value *V, QualType T)
Same as MakeNaturalAlignPointeeAddrLValue except that the pointer is known to be unsigned.
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
llvm::Type * ConvertTypeForMem(QualType T)
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
const TargetInfo & getTarget() const
llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Location)
Converts Location to a DebugLoc, if debug information is enabled.
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
CGDebugInfo * getDebugInfo()
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
ASTContext & getContext() const
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
CodeGenTypes & getTypes() const
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
llvm::Value * LoadCXXThis()
LoadCXXThis - Load the value of 'this'.
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
llvm::LLVMContext & getLLVMContext()
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
This class organizes the cross-function state that is used while generating LLVM code.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
const LangOptions & getLangOpts() const
CodeGenTypes & getTypes()
const TargetInfo & getTarget() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
ASTContext & getContext() const
llvm::LLVMContext & getLLVMContext()
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
unsigned getTargetAddressSpace(QualType T) const
llvm::Type * ConvertTypeForMem(QualType T)
ConvertTypeForMem - Convert type T into a llvm::Type.
Information for lazily generating a cleanup.
FunctionArgList - Type for representing both the decl and type of parameters to a function.
LValue - This represents an lvalue references.
Address getAddress() const
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
An abstract representation of an aligned address.
llvm::Value * getPointer() const
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void setAction(PrePostActionTy &Action) const
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
void addDecl(Decl *D)
Add the declaration D into this context.
A reference to a declared variable, function, enum, etc.
DeclStmt - Adaptor class for mixing declarations with statements and expressions.
Decl - This represents one declaration (or definition), e.g.
attr_iterator attr_end() const
bool isCanonicalDecl() const
Whether this particular Decl is a canonical one.
attr_iterator attr_begin() const
SourceLocation getLocation() const
DeclContext * getDeclContext()
SourceLocation getBeginLoc() const LLVM_READONLY
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
SourceLocation getBeginLoc() const LLVM_READONLY
This represents one expression.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
bool isLValue() const
isLValue - True if this expression is an "l-value" according to the rules of the current language.
Represents a member of a struct/union/class.
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
GlobalDecl - represents a global declaration.
ImplicitCastExpr - Allows us to explicitly represent implicit type conversions, which have no direct ...
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Describes the capture of a variable or of this, or of a C++1y init-capture.
A C++ lambda expression, which produces a function object (of unspecified type) that can be invoked l...
std::string OMPHostIRFile
Name of the IR file that contains the result of the OpenMP target host code generation.
IdentifierInfo * getIdentifier() const
Get the identifier that names this declaration, if there is one.
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
This is a basic class for representing single OpenMP clause.
This is a basic class for representing single OpenMP executable directive.
OpenMPDirectiveKind getDirectiveKind() const
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause * > Clauses)
This represents clause 'lastprivate' in the '#pragma omp ...' directives.
This is a common base class for loop directives ('omp simd', 'omp for', 'omp for simd' etc....
This represents clause 'reduction' in the '#pragma omp ...' directives.
This represents '#pragma omp requires...' directive.
This represents 'ompx_bare' clause in the '#pragma omp target teams ...' directive.
static ParmVarDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, StorageClass S, Expr *DefArg)
PointerType - C99 6.7.5.1 - Pointer Declarators.
A (possibly-)qualified type.
LangAS getAddressSpace() const
Return the address space of this type.
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
QualType getCanonicalType() const
A qualifier set is used to build a set of qualifiers.
const Type * strip(QualType type)
Collect any qualifiers on the given type and return an unqualified type.
QualType apply(const ASTContext &Context, QualType QT) const
Apply the collected qualifiers to the given type.
void addAddressSpace(LangAS space)
Represents a struct/union/class.
virtual void completeDefinition()
Note that the definition of this type is now complete.
Scope - A scope is a transient data structure that is used while parsing the program.
Encodes a location in the source.
RetTy Visit(PTR(Stmt) S, ParamTys... P)
Stmt - This represents one statement.
void startDefinition()
Starts the definition of this tag declaration.
unsigned getNewAlign() const
Return the largest alignment for which a suitably-sized allocation with '::operator new(size_t)' is g...
TargetOptions & getTargetOpts() const
Retrieve the target options.
virtual const llvm::omp::GV & getGridValue() const
virtual bool hasFeature(StringRef Feature) const
Determine whether the given target has the given feature.
llvm::StringMap< bool > FeatureMap
The map of which features have been enabled disabled based on the command line.
The base class of the type hierarchy.
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
bool isReferenceType() const
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
bool isLValueReferenceType() const
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
UnaryOperator - This represents the unary-expression's (except sizeof and alignof),...
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
bool isInitCapture() const
Whether this variable is the implicit variable for a lambda init-capture.
Represents a variable declaration or definition.
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
bool isInitCapture() const
Whether this variable is the implicit variable for a lambda init-capture.
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
The JSON file list parser is used to communicate input to InstallAPI.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
@ ICIS_NoInit
No in-class initializer.
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
bool isOpenMPPrivate(OpenMPClauseKind Kind)
Checks if the specified clause is one of private clauses like 'private', 'firstprivate',...
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
OffloadArch StringToOffloadArch(llvm::StringRef S)
bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of the composite or combined directives that need loop ...
LangAS
Defines the address space values used by the address space qualifier of QualType.
const char * OffloadArchToString(OffloadArch A)
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
LangAS getLangASFromTargetAS(unsigned TargetAS)
@ CXXThis
Parameter for C++ 'this' argument.
@ Other
Other implicit parameter.
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
llvm::PointerType * VoidPtrTy
llvm::IntegerType * SizeTy
llvm::PointerType * VoidPtrPtrTy
llvm::IntegerType * Int32Ty
llvm::PointerType * Int8PtrTy