22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/Frontend/OpenMP/OMPGridValues.h"
24#include "llvm/Support/MathExtras.h"
27using namespace CodeGen;
28using namespace llvm::omp;
33 llvm::FunctionCallee EnterCallee =
nullptr;
35 llvm::FunctionCallee ExitCallee =
nullptr;
38 llvm::BasicBlock *ContBlock =
nullptr;
41 NVPTXActionTy(llvm::FunctionCallee EnterCallee,
43 llvm::FunctionCallee ExitCallee,
45 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
50 llvm::Value *CallBool = CGF.
Builder.CreateIsNotNull(EnterRes);
54 CGF.
Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
72class ExecutionRuntimeModesRAII {
81 : ExecMode(ExecMode) {
82 SavedExecMode = ExecMode;
85 ~ExecutionRuntimeModesRAII() { ExecMode = SavedExecMode; }
90 if (
const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) {
91 const Expr *
Base = ASE->getBase()->IgnoreParenImpCasts();
92 while (
const auto *TempASE = dyn_cast<ArraySubscriptExpr>(
Base))
93 Base = TempASE->getBase()->IgnoreParenImpCasts();
95 }
else if (
auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr)) {
96 const Expr *
Base = OASE->getBase()->IgnoreParenImpCasts();
97 while (
const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(
Base))
98 Base = TempOASE->getBase()->IgnoreParenImpCasts();
99 while (
const auto *TempASE = dyn_cast<ArraySubscriptExpr>(
Base))
100 Base = TempASE->getBase()->IgnoreParenImpCasts();
104 if (
const auto *DE = dyn_cast<DeclRefExpr>(RefExpr))
105 return cast<ValueDecl>(DE->getDecl()->getCanonicalDecl());
106 const auto *ME = cast<MemberExpr>(RefExpr);
107 return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());
110static RecordDecl *buildRecordForGlobalizedVars(
113 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
117 if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())
121 GlobalizedVars.emplace_back(
C.getDeclAlign(D), D);
122 for (
const ValueDecl *D : EscapedDeclsForTeams)
123 GlobalizedVars.emplace_back(
C.getDeclAlign(D), D);
129 RecordDecl *GlobalizedRD =
C.buildImplicitRecord(
"_globalized_locals_ty");
132 EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end());
133 for (
const auto &Pair : GlobalizedVars) {
137 Type =
C.getPointerType(
Type.getNonReferenceType());
142 if (SingleEscaped.count(VD)) {
157 llvm::APInt ArraySize(32, BufSize);
158 Type =
C.getConstantArrayType(
Type, ArraySize,
nullptr,
159 ArraySizeModifier::Normal, 0);
167 llvm::APInt Align(32, Pair.first.getQuantity());
168 Field->addAttr(AlignedAttr::CreateImplicit(
171 C.getIntTypeForBitwidth(32, 0),
173 {}, AlignedAttr::GNU_aligned));
176 MappedDeclsFields.try_emplace(VD, Field);
183class CheckVarsEscapingDeclContext final
186 llvm::SetVector<const ValueDecl *> EscapedDecls;
187 llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls;
188 llvm::SetVector<const ValueDecl *> DelayedVariableLengthDecls;
191 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
192 bool AllEscaped =
false;
193 bool IsForCombinedParallelRegion =
false;
195 void markAsEscaped(
const ValueDecl *VD) {
197 if (!isa<VarDecl>(VD) ||
198 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
205 bool IsCaptured =
false;
206 if (
auto *CSI = CGF.CapturedStmtInfo) {
207 if (
const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {
211 if (!IsForCombinedParallelRegion) {
214 const auto *
Attr = FD->getAttr<OMPCaptureKindAttr>();
217 if (((
Attr->getCaptureKind() != OMPC_map) &&
219 ((
Attr->getCaptureKind() == OMPC_map) &&
220 !FD->getType()->isAnyPointerType()))
223 if (!FD->getType()->isReferenceType()) {
225 "Parameter captured by value with variably modified type");
226 EscapedParameters.insert(VD);
227 }
else if (!IsForCombinedParallelRegion) {
232 if ((!CGF.CapturedStmtInfo ||
233 (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&
241 EscapedVariableLengthDecls.insert(VD);
243 DelayedVariableLengthDecls.insert(VD);
245 EscapedDecls.insert(VD);
248 void VisitValueDecl(
const ValueDecl *VD) {
251 if (
const auto *VarD = dyn_cast<VarDecl>(VD)) {
252 if (!isa<ParmVarDecl>(VarD) && VarD->hasInit()) {
253 const bool SavedAllEscaped = AllEscaped;
255 Visit(VarD->getInit());
256 AllEscaped = SavedAllEscaped;
262 bool IsCombinedParallelRegion) {
266 if (
C.capturesVariable() && !
C.capturesVariableByCopy()) {
268 bool SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion;
269 if (IsCombinedParallelRegion) {
273 IsForCombinedParallelRegion =
false;
276 C->getClauseKind() == OMPC_reduction ||
277 C->getClauseKind() == OMPC_linear ||
278 C->getClauseKind() == OMPC_private)
281 if (
const auto *PC = dyn_cast<OMPFirstprivateClause>(
C))
282 Vars = PC->getVarRefs();
283 else if (
const auto *PC = dyn_cast<OMPLastprivateClause>(
C))
284 Vars = PC->getVarRefs();
286 llvm_unreachable(
"Unexpected clause.");
287 for (
const auto *E : Vars) {
291 IsForCombinedParallelRegion =
true;
295 if (IsForCombinedParallelRegion)
300 if (isa<OMPCapturedExprDecl>(VD))
302 IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion;
307 void buildRecordForGlobalizedVars(
bool IsInTTDRegion) {
308 assert(!GlobalizedRD &&
309 "Record for globalized variables is built already.");
311 unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
313 EscapedDeclsForTeams = EscapedDecls.getArrayRef();
315 EscapedDeclsForParallel = EscapedDecls.getArrayRef();
316 GlobalizedRD = ::buildRecordForGlobalizedVars(
317 CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,
318 MappedDeclsFields, WarpSize);
324 : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {
326 virtual ~CheckVarsEscapingDeclContext() =
default;
327 void VisitDeclStmt(
const DeclStmt *S) {
330 for (
const Decl *D : S->decls())
331 if (
const auto *VD = dyn_cast_or_null<ValueDecl>(D))
345 if (CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown) {
346 VisitStmt(S->getCapturedStmt());
349 VisitOpenMPCapturedStmt(
351 CaptureRegions.back() == OMPD_parallel &&
359 if (
C.capturesVariable() && !
C.capturesVariableByCopy()) {
362 if (isa<OMPCapturedExprDecl>(VD))
371 if (
C.capturesVariable()) {
381 void VisitBlockExpr(
const BlockExpr *E) {
386 const VarDecl *VD =
C.getVariable();
393 void VisitCallExpr(
const CallExpr *E) {
399 if (Arg->isLValue()) {
400 const bool SavedAllEscaped = AllEscaped;
403 AllEscaped = SavedAllEscaped;
416 if (isa<OMPCapturedExprDecl>(VD))
425 const bool SavedAllEscaped = AllEscaped;
428 AllEscaped = SavedAllEscaped;
437 const bool SavedAllEscaped = AllEscaped;
440 AllEscaped = SavedAllEscaped;
445 void VisitExpr(
const Expr *E) {
448 bool SavedAllEscaped = AllEscaped;
454 AllEscaped = SavedAllEscaped;
456 void VisitStmt(
const Stmt *S) {
459 for (
const Stmt *Child : S->children())
466 const RecordDecl *getGlobalizedRecord(
bool IsInTTDRegion) {
468 buildRecordForGlobalizedVars(IsInTTDRegion);
474 assert(GlobalizedRD &&
475 "Record for globalized variables must be generated already.");
476 return MappedDeclsFields.lookup(VD);
481 return EscapedDecls.getArrayRef();
486 const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters()
const {
487 return EscapedParameters;
493 return EscapedVariableLengthDecls.getArrayRef();
499 return DelayedVariableLengthDecls.getArrayRef();
509 unsigned LaneIDBits =
512 return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits,
"nvptx_warp_id");
520 unsigned LaneIDBits =
522 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
523 unsigned LaneIDMask = ~0u >> (32u - LaneIDBits);
525 return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask),
530CGOpenMPRuntimeGPU::getExecutionMode()
const {
531 return CurrentExecutionMode;
535CGOpenMPRuntimeGPU::getDataSharingMode()
const {
536 return CurrentDataSharingMode;
547 if (
const auto *NestedDir =
548 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
554 if (DKind == OMPD_teams) {
555 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
560 if (
const auto *NND =
561 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
562 DKind = NND->getDirectiveKind();
568 case OMPD_target_teams:
570 case OMPD_target_simd:
571 case OMPD_target_parallel:
572 case OMPD_target_parallel_for:
573 case OMPD_target_parallel_for_simd:
574 case OMPD_target_teams_distribute:
575 case OMPD_target_teams_distribute_simd:
576 case OMPD_target_teams_distribute_parallel_for:
577 case OMPD_target_teams_distribute_parallel_for_simd:
580 case OMPD_parallel_for:
581 case OMPD_parallel_master:
582 case OMPD_parallel_sections:
584 case OMPD_parallel_for_simd:
586 case OMPD_cancellation_point:
588 case OMPD_threadprivate:
606 case OMPD_target_data:
607 case OMPD_target_exit_data:
608 case OMPD_target_enter_data:
609 case OMPD_distribute:
610 case OMPD_distribute_simd:
611 case OMPD_distribute_parallel_for:
612 case OMPD_distribute_parallel_for_simd:
613 case OMPD_teams_distribute:
614 case OMPD_teams_distribute_simd:
615 case OMPD_teams_distribute_parallel_for:
616 case OMPD_teams_distribute_parallel_for_simd:
617 case OMPD_target_update:
618 case OMPD_declare_simd:
619 case OMPD_declare_variant:
620 case OMPD_begin_declare_variant:
621 case OMPD_end_declare_variant:
622 case OMPD_declare_target:
623 case OMPD_end_declare_target:
624 case OMPD_declare_reduction:
625 case OMPD_declare_mapper:
627 case OMPD_taskloop_simd:
628 case OMPD_master_taskloop:
629 case OMPD_master_taskloop_simd:
630 case OMPD_parallel_master_taskloop:
631 case OMPD_parallel_master_taskloop_simd:
635 llvm_unreachable(
"Unexpected directive.");
645 switch (DirectiveKind) {
647 case OMPD_target_teams:
649 case OMPD_target_teams_loop:
650 case OMPD_target_parallel_loop:
651 case OMPD_target_parallel:
652 case OMPD_target_parallel_for:
653 case OMPD_target_parallel_for_simd:
654 case OMPD_target_teams_distribute_parallel_for:
655 case OMPD_target_teams_distribute_parallel_for_simd:
656 case OMPD_target_simd:
657 case OMPD_target_teams_distribute_simd:
659 case OMPD_target_teams_distribute:
663 case OMPD_parallel_for:
664 case OMPD_parallel_master:
665 case OMPD_parallel_sections:
667 case OMPD_parallel_for_simd:
669 case OMPD_cancellation_point:
671 case OMPD_threadprivate:
689 case OMPD_target_data:
690 case OMPD_target_exit_data:
691 case OMPD_target_enter_data:
692 case OMPD_distribute:
693 case OMPD_distribute_simd:
694 case OMPD_distribute_parallel_for:
695 case OMPD_distribute_parallel_for_simd:
696 case OMPD_teams_distribute:
697 case OMPD_teams_distribute_simd:
698 case OMPD_teams_distribute_parallel_for:
699 case OMPD_teams_distribute_parallel_for_simd:
700 case OMPD_target_update:
701 case OMPD_declare_simd:
702 case OMPD_declare_variant:
703 case OMPD_begin_declare_variant:
704 case OMPD_end_declare_variant:
705 case OMPD_declare_target:
706 case OMPD_end_declare_target:
707 case OMPD_declare_reduction:
708 case OMPD_declare_mapper:
710 case OMPD_taskloop_simd:
711 case OMPD_master_taskloop:
712 case OMPD_master_taskloop_simd:
713 case OMPD_parallel_master_taskloop:
714 case OMPD_parallel_master_taskloop_simd:
721 "Unknown programming model for OpenMP directive on NVPTX target.");
725 StringRef ParentName,
726 llvm::Function *&OutlinedFn,
727 llvm::Constant *&OutlinedFnID,
730 ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode,
EM_NonSPMD);
731 EntryFunctionState EST;
732 WrapperFunctionsMap.clear();
735 assert(!IsBareKernel &&
"bare kernel should not be at generic mode");
739 CGOpenMPRuntimeGPU::EntryFunctionState &EST;
743 NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST,
748 RT.emitKernelInit(D, CGF, EST,
false);
750 RT.setLocThreadIdInsertPt(CGF,
true);
755 RT.emitKernelDeinit(CGF, EST,
false);
759 IsInTTDRegion =
true;
761 IsOffloadEntry, CodeGen);
762 IsInTTDRegion =
false;
767 EntryFunctionState &EST,
bool IsSPMD) {
768 int32_t MinThreadsVal = 1, MaxThreadsVal = -1, MinTeamsVal = 1,
771 MinTeamsVal, MaxTeamsVal);
775 Bld, IsSPMD, MinThreadsVal, MaxThreadsVal, MinTeamsVal, MaxTeamsVal));
777 emitGenericVarsProlog(CGF, EST.Loc);
781 EntryFunctionState &EST,
784 emitGenericVarsEpilog(CGF);
789 "_openmp_teams_reduction_type_$_", RecordDecl::TagKind::Union);
791 for (
const RecordDecl *TeamReductionRec : TeamsReductions) {
792 QualType RecTy =
C.getRecordType(TeamReductionRec);
802 QualType StaticTy =
C.getRecordType(StaticRD);
803 llvm::Type *LLVMReductionsBufferTy =
807 TeamsReductions.empty()
809 : DL.getTypeAllocSize(LLVMReductionsBufferTy).getFixedValue();
811 OMPBuilder.createTargetDeinit(Bld, ReductionDataSize,
812 C.getLangOpts().OpenMPCUDAReductionBufNum);
813 TeamsReductions.clear();
817 StringRef ParentName,
818 llvm::Function *&OutlinedFn,
819 llvm::Constant *&OutlinedFnID,
822 ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode,
EM_SPMD);
823 EntryFunctionState EST;
830 CGOpenMPRuntimeGPU::EntryFunctionState &EST;
832 DataSharingMode Mode;
837 CGOpenMPRuntimeGPU::EntryFunctionState &EST,
839 : RT(RT), EST(EST), IsBareKernel(IsBareKernel),
840 Mode(RT.CurrentDataSharingMode), D(D) {}
843 RT.CurrentDataSharingMode = DataSharingMode::DS_CUDA;
846 RT.emitKernelInit(D, CGF, EST,
true);
848 RT.setLocThreadIdInsertPt(CGF,
true);
852 RT.CurrentDataSharingMode = Mode;
855 RT.clearLocThreadIdInsertPt(CGF);
856 RT.emitKernelDeinit(CGF, EST,
true);
858 } Action(*
this, EST, IsBareKernel, D);
860 IsInTTDRegion =
true;
862 IsOffloadEntry, CodeGen);
863 IsInTTDRegion =
false;
866void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
868 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
873 assert(!ParentName.empty() &&
"Invalid target region parent name!");
877 if (Mode || IsBareKernel)
878 emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
881 emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
887 llvm::OpenMPIRBuilderConfig Config(
895 llvm_unreachable(
"OpenMP can only handle device code.");
905 "__omp_rtl_debug_kind");
907 "__omp_rtl_assume_teams_oversubscription");
909 "__omp_rtl_assume_threads_oversubscription");
911 "__omp_rtl_assume_no_thread_state");
913 "__omp_rtl_assume_no_nested_parallelism");
917 ProcBindKind ProcBind,
923 llvm::Value *NumThreads,
929 const Expr *NumTeams,
930 const Expr *ThreadLimit,
938 bool PrevIsInTTDRegion = IsInTTDRegion;
939 IsInTTDRegion =
false;
942 CGF, D, ThreadIDVar, InnermostKind, CodeGen));
943 IsInTTDRegion = PrevIsInTTDRegion;
945 llvm::Function *WrapperFun =
946 createParallelDataSharingWrapper(OutlinedFun, D);
947 WrapperFunctionsMap[OutlinedFun] = WrapperFun;
959 "expected teams directive.");
966 Dir = dyn_cast_or_null<OMPExecutableDirective>(S);
974 for (
const Expr *E :
C->getVarRefs())
984 "expected teams directive.");
986 for (
const Expr *E :
C->privates())
999 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
1006 if (!LastPrivatesReductions.empty()) {
1007 GlobalizedRD = ::buildRecordForGlobalizedVars(
1009 MappedDeclsFields, WarpSize);
1011 }
else if (!LastPrivatesReductions.empty()) {
1012 assert(!TeamAndReductions.first &&
1013 "Previous team declaration is not expected.");
1015 std::swap(TeamAndReductions.second, LastPrivatesReductions);
1022 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
1026 NVPTXPrePostActionTy(
1028 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
1030 : Loc(Loc), GlobalizedRD(GlobalizedRD),
1031 MappedDeclsFields(MappedDeclsFields) {}
1036 auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.
CurFn).first;
1037 I->getSecond().MappedParams =
1038 std::make_unique<CodeGenFunction::OMPMapVars>();
1039 DeclToAddrMapTy &
Data = I->getSecond().LocalVarData;
1040 for (
const auto &Pair : MappedDeclsFields) {
1041 assert(Pair.getFirst()->isCanonicalDecl() &&
1042 "Expected canonical declaration");
1043 Data.insert(std::make_pair(Pair.getFirst(), MappedVarData()));
1046 Rt.emitGenericVarsProlog(CGF, Loc);
1050 .emitGenericVarsEpilog(CGF);
1052 } Action(Loc, GlobalizedRD, MappedDeclsFields);
1055 CGF, D, ThreadIDVar, InnermostKind, CodeGen);
1067 const auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
1068 if (I == FunctionGlobalizedDecls.end())
1071 for (
auto &Rec : I->getSecond().LocalVarData) {
1072 const auto *VD = cast<VarDecl>(Rec.first);
1073 bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
1077 llvm::Value *ParValue;
1086 llvm::CallBase *VoidPtr =
1091 VoidPtr->addRetAttr(llvm::Attribute::get(
1098 VoidPtr, VarPtrTy, VD->
getName() +
"_on_stack");
1100 Rec.second.PrivateAddr = VarAddr.
getAddress(CGF);
1101 Rec.second.GlobalizedVal = VoidPtr;
1106 I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.
getAddress(CGF));
1109 VoidPtr->setDebugLoc(DI->SourceLocToDebugLoc(VD->
getLocation()));
1112 for (
const auto *ValueD : I->getSecond().EscapedVariableLengthDecls) {
1113 const auto *VD = cast<VarDecl>(ValueD);
1114 std::pair<llvm::Value *, llvm::Value *> AddrSizePair =
1116 I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(AddrSizePair);
1120 I->getSecond().MappedParams->setVarAddr(CGF, VD,
Base.getAddress(CGF));
1122 I->getSecond().MappedParams->apply(CGF);
1127 const auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
1128 if (I == FunctionGlobalizedDecls.end())
1132 return llvm::is_contained(I->getSecond().DelayedVariableLengthDecls, VD);
1135std::pair<llvm::Value *, llvm::Value *>
1143 Size = Bld.CreateNUWAdd(
1145 llvm::Value *AlignVal =
1147 Size = Bld.CreateUDiv(Size, AlignVal);
1148 Size = Bld.CreateNUWMul(Size, AlignVal);
1151 llvm::Value *AllocArgs[] = {Size};
1152 llvm::CallBase *VoidPtr =
1156 VoidPtr->addRetAttr(llvm::Attribute::get(
1159 return std::make_pair(VoidPtr, Size);
1164 const std::pair<llvm::Value *, llvm::Value *> &AddrSizePair) {
1168 {AddrSizePair.first, AddrSizePair.second});
1175 const auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
1176 if (I != FunctionGlobalizedDecls.end()) {
1179 for (
const auto &AddrSizePair :
1180 llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {
1183 {AddrSizePair.first, AddrSizePair.second});
1186 for (
auto &Rec : llvm::reverse(I->getSecond().LocalVarData)) {
1187 const auto *VD = cast<VarDecl>(Rec.first);
1188 I->getSecond().MappedParams->restore(CGF);
1190 llvm::Value *FreeArgs[] = {Rec.second.GlobalizedVal,
1202 llvm::Function *OutlinedFn,
1216 OutlinedFnArgs.push_back(llvm::ConstantPointerNull::get(
CGM.
VoidPtrTy));
1219 OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1220 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1226 llvm::Function *OutlinedFn,
1229 llvm::Value *NumThreads) {
1233 auto &&ParallelGen = [
this, Loc, OutlinedFn, CapturedVars, IfCond,
1237 llvm::Value *NumThreadsVal = NumThreads;
1238 llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn];
1239 llvm::Value *ID = llvm::ConstantPointerNull::get(
CGM.
Int8PtrTy);
1242 llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn,
CGM.
Int8PtrTy);
1250 llvm::ArrayType::get(
CGM.
VoidPtrTy, CapturedVars.size()),
1251 "captured_vars_addrs");
1253 if (!CapturedVars.empty()) {
1257 for (llvm::Value *
V : CapturedVars) {
1260 if (
V->getType()->isIntegerTy())
1270 llvm::Value *IfCondVal =
nullptr;
1275 IfCondVal = llvm::ConstantInt::get(CGF.
Int32Ty, 1);
1278 NumThreadsVal = llvm::ConstantInt::get(CGF.
Int32Ty, -1);
1280 NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.
Int32Ty),
1282 assert(IfCondVal &&
"Expected a value");
1284 llvm::Value *Args[] = {
1289 llvm::ConstantInt::get(CGF.
Int32Ty, -1),
1292 Bld.CreateBitOrPointerCast(CapturedVarsAddrs.
getPointer(),
1294 llvm::ConstantInt::get(
CGM.
SizeTy, CapturedVars.size())};
1310 llvm::Value *Args[] = {
1311 llvm::ConstantPointerNull::get(
1313 llvm::ConstantInt::get(CGF.
Int32Ty, 0,
true)};
1350 CGM.
getModule(), OMPRTL___kmpc_warp_active_thread_mask));
1352 llvm::Value *ThreadID = RT.getGPUThreadID(CGF);
1355 llvm::Value *TeamWidth = RT.getGPUNumThreads(CGF);
1368 llvm::Value *CmpLoopBound = CGF.
Builder.CreateICmpSLT(CounterVal, TeamWidth);
1369 CGF.
Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB);
1375 llvm::Value *CmpThreadToCounter =
1376 CGF.
Builder.CreateICmpEQ(ThreadID, CounterVal);
1377 CGF.
Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB);
1396 llvm::Value *IncCounterVal =
1410 "Cast type must sized.");
1412 "Val type must sized.");
1414 if (ValTy == CastTy)
1418 return CGF.
Builder.CreateBitCast(Val, LLVMCastTy);
1420 return CGF.
Builder.CreateIntCast(Val, LLVMCastTy,
1437 llvm::Value *Offset,
1446 assert(Size.getQuantity() <= 8 &&
1447 "Unsupported bitwidth in shuffle instruction.");
1449 RuntimeFunction ShuffleFn = Size.getQuantity() <= 4
1450 ? OMPRTL___kmpc_shuffle_int32
1451 : OMPRTL___kmpc_shuffle_int64;
1455 Size.getQuantity() <= 4 ? 32 : 64, 1);
1456 llvm::Value *ElemCast =
castValueToType(CGF, Elem, ElemType, CastTy, Loc);
1457 llvm::Value *WarpSize =
1461 OMPBuilder.getOrCreateRuntimeFunction(CGM.
getModule(), ShuffleFn),
1462 {ElemCast, Offset, WarpSize});
1487 for (
int IntSize = 8; IntSize >= 1; IntSize /= 2) {
1497 ElemPtr, IntTy->getPointerTo(), IntTy);
1498 if (Size.getQuantity() / IntSize > 1) {
1502 llvm::BasicBlock *CurrentBB = Bld.GetInsertBlock();
1504 llvm::PHINode *PhiSrc =
1505 Bld.CreatePHI(Ptr.
getType(), 2);
1506 PhiSrc->addIncoming(Ptr.
getPointer(), CurrentBB);
1507 llvm::PHINode *PhiDest =
1508 Bld.CreatePHI(ElemPtr.
getType(), 2);
1509 PhiDest->addIncoming(ElemPtr.
getPointer(), CurrentBB);
1513 llvm::Value *PtrDiff = Bld.CreatePtrDiff(
1517 Bld.CreateCondBr(Bld.CreateICmpSGT(PtrDiff, Bld.getInt64(IntSize - 1)),
1525 IntType, Offset, Loc);
1531 PhiSrc->addIncoming(LocalPtr.
getPointer(), ThenBB);
1532 PhiDest->addIncoming(LocalElemPtr.
getPointer(), ThenBB);
1541 IntType, Offset, Loc);
1548 Size = Size % IntSize;
1553enum CopyAction :
unsigned {
1579 llvm::Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
1584 for (
const Expr *Private : Privates) {
1589 bool ShuffleInElement =
false;
1592 bool UpdateDestListPtr =
false;
1593 QualType PrivatePtrType =
C.getPointerType(Private->getType());
1594 llvm::Type *PrivateLlvmPtrType = CGF.
ConvertType(PrivatePtrType);
1597 case RemoteLaneToThread: {
1608 CGF.
CreateMemTemp(Private->getType(),
".omp.reduction.element");
1609 ShuffleInElement =
true;
1610 UpdateDestListPtr =
true;
1639 if (ShuffleInElement) {
1640 shuffleAndStore(CGF, SrcElementAddr, DestElementAddr, Private->getType(),
1641 RemoteLaneOffset, Private->getExprLoc());
1646 SrcElementAddr,
false, Private->getType(),
1651 Elem, DestElementAddr,
false, Private->getType(),
1658 Private->getExprLoc());
1678 if (UpdateDestListPtr) {
1681 DestElementPtrAddr,
false,
1716 C.getIntTypeForBitwidth(32,
true),
1719 Args.push_back(&ReduceListArg);
1720 Args.push_back(&NumWarpsArg);
1725 llvm::GlobalValue::InternalLinkage,
1726 "_omp_reduction_inter_warp_copy_func", &M);
1728 Fn->setDoesNotRecurse();
1741 StringRef TransferMediumName =
1742 "__openmp_nvptx_data_transfer_temporary_storage";
1743 llvm::GlobalVariable *TransferMedium =
1744 M.getGlobalVariable(TransferMediumName);
1746 if (!TransferMedium) {
1747 auto *Ty = llvm::ArrayType::get(CGM.
Int32Ty, WarpSize);
1749 TransferMedium =
new llvm::GlobalVariable(
1750 M, Ty,
false, llvm::GlobalVariable::WeakAnyLinkage,
1751 llvm::UndefValue::get(Ty), TransferMediumName,
1752 nullptr, llvm::GlobalVariable::NotThreadLocal,
1753 SharedAddressSpace);
1770 AddrReduceListArg,
false,
C.VoidPtrTy, Loc,
1772 ElemTy->getPointerTo()),
1776 for (
const Expr *Private : Privates) {
1781 unsigned RealTySize =
1782 C.getTypeSizeInChars(Private->getType())
1783 .alignTo(
C.getTypeAlignInChars(Private->getType()))
1785 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /=2) {
1786 unsigned NumIters = RealTySize / TySize;
1789 QualType CType =
C.getIntTypeForBitwidth(
1793 llvm::Value *Cnt =
nullptr;
1795 llvm::BasicBlock *PrecondBB =
nullptr;
1796 llvm::BasicBlock *ExitBB =
nullptr;
1809 Bld.CreateICmpULT(Cnt, llvm::ConstantInt::get(CGM.
IntTy, NumIters));
1810 Bld.CreateCondBr(Cmp, BodyBB, ExitBB);
1822 llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID,
"warp_master");
1823 Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
1831 Address ElemPtr(ElemPtrPtr, CopyType, Align);
1837 llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP(
1838 TransferMedium->getValueType(), TransferMedium,
1839 {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID});
1842 Address MediumPtr(MediumPtrVal, CopyType, Align);
1847 ElemPtr,
false, CType, Loc,
1854 Bld.CreateBr(MergeBB);
1857 Bld.CreateBr(MergeBB);
1875 AddrNumWarpsArg,
false,
C.IntTy, Loc);
1878 llvm::Value *IsActiveThread =
1879 Bld.CreateICmpULT(ThreadID, NumWarpsVal,
"is_active_thread");
1880 Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
1885 llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP(
1886 TransferMedium->getValueType(), TransferMedium,
1887 {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID});
1889 Address SrcMediumPtr(SrcMediumPtrVal, CopyType, Align);
1894 TargetElemPtrPtr,
false,
C.VoidPtrTy, Loc);
1895 Address TargetElemPtr(TargetElemPtrVal, CopyType, Align);
1897 TargetElemPtr = Bld.
CreateGEP(TargetElemPtr, Cnt);
1900 llvm::Value *SrcMediumValue =
1904 Bld.CreateBr(W0MergeBB);
1907 Bld.CreateBr(W0MergeBB);
1912 Cnt = Bld.CreateNSWAdd(Cnt, llvm::ConstantInt::get(CGM.
IntTy, 1));
1918 RealTySize %= TySize;
2011 Args.push_back(&ReduceListArg);
2012 Args.push_back(&LaneIDArg);
2013 Args.push_back(&RemoteLaneOffsetArg);
2014 Args.push_back(&AlgoVerArg);
2018 auto *Fn = llvm::Function::Create(
2020 "_omp_reduction_shuffle_and_reduce_func", &CGM.
getModule());
2022 Fn->setDoesNotRecurse();
2035 ElemTy->getPointerTo()),
2053 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.remote_reduce_list");
2059 LocalReduceList, RemoteReduceList,
2060 {RemoteLaneOffsetArgVal,
2085 llvm::Value *CondAlgo0 = Bld.CreateIsNull(AlgoVerArgVal);
2087 llvm::Value *Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
2088 llvm::Value *CondAlgo1 = Bld.CreateAnd(
2089 Algo1, Bld.CreateICmpULT(LaneIDArgVal, RemoteLaneOffsetArgVal));
2091 llvm::Value *Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2));
2092 llvm::Value *CondAlgo2 = Bld.CreateAnd(
2093 Algo2, Bld.CreateIsNull(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1))));
2094 CondAlgo2 = Bld.CreateAnd(
2095 CondAlgo2, Bld.CreateICmpSGT(RemoteLaneOffsetArgVal, Bld.getInt16(0)));
2097 llvm::Value *CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1);
2098 CondReduce = Bld.CreateOr(CondReduce, CondAlgo2);
2103 Bld.CreateCondBr(CondReduce, ThenBB, ElseBB);
2112 CGF, Loc, ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr});
2113 Bld.CreateBr(MergeBB);
2116 Bld.CreateBr(MergeBB);
2122 Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
2123 llvm::Value *CondCopy = Bld.CreateAnd(
2124 Algo1, Bld.CreateICmpUGE(LaneIDArgVal, RemoteLaneOffsetArgVal));
2129 Bld.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
2133 RemoteReduceList, LocalReduceList);
2134 Bld.CreateBr(CpyMergeBB);
2137 Bld.CreateBr(CpyMergeBB);
2155 const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
2169 Args.push_back(&BufferArg);
2170 Args.push_back(&IdxArg);
2171 Args.push_back(&ReduceListArg);
2175 auto *Fn = llvm::Function::Create(
2177 "_omp_reduction_list_to_global_copy_func", &CGM.
getModule());
2179 Fn->setDoesNotRecurse();
2192 ElemTy->getPointerTo()),
2194 QualType StaticTy =
C.getRecordType(TeamReductionRec);
2195 llvm::Type *LLVMReductionsBufferTy =
2199 LLVMReductionsBufferTy->getPointerTo());
2204 for (
const Expr *Private : Privates) {
2212 ElemPtrPtr, ElemTy->getPointerTo());
2214 Address(ElemPtrPtr, ElemTy,
C.getTypeAlignInChars(Private->getType()));
2215 const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
2217 const FieldDecl *FD = VarFieldMap.lookup(VD);
2218 llvm::Value *BufferPtr =
2219 Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
2229 ElemPtr,
false, Private->
getType(), Loc,
2266 const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
2268 llvm::Function *ReduceFn) {
2281 Args.push_back(&BufferArg);
2282 Args.push_back(&IdxArg);
2283 Args.push_back(&ReduceListArg);
2287 auto *Fn = llvm::Function::Create(
2289 "_omp_reduction_list_to_global_reduce_func", &CGM.
getModule());
2291 Fn->setDoesNotRecurse();
2298 QualType StaticTy =
C.getRecordType(TeamReductionRec);
2299 llvm::Type *LLVMReductionsBufferTy =
2303 LLVMReductionsBufferTy->getPointerTo());
2308 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.red_list");
2309 auto IPriv = Privates.begin();
2314 for (
unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) {
2317 const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
2318 const FieldDecl *FD = VarFieldMap.lookup(VD);
2319 llvm::Value *BufferPtr =
2320 Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
2326 if ((*IPriv)->getType()->isVariablyModifiedType()) {
2330 llvm::Value *Size = CGF.
Builder.CreateIntCast(
2341 llvm::Value *GlobalReduceList = ReductionList.
getPointer();
2344 AddrReduceListArg,
false,
C.VoidPtrTy, Loc);
2346 CGF, Loc, ReduceFn, {GlobalReduceList, ReducedPtr});
2361 const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
2375 Args.push_back(&BufferArg);
2376 Args.push_back(&IdxArg);
2377 Args.push_back(&ReduceListArg);
2381 auto *Fn = llvm::Function::Create(
2383 "_omp_reduction_global_to_list_copy_func", &CGM.
getModule());
2385 Fn->setDoesNotRecurse();
2398 ElemTy->getPointerTo()),
2400 QualType StaticTy =
C.getRecordType(TeamReductionRec);
2401 llvm::Type *LLVMReductionsBufferTy =
2405 LLVMReductionsBufferTy->getPointerTo());
2411 for (
const Expr *Private : Privates) {
2419 ElemPtrPtr, ElemTy->getPointerTo());
2421 Address(ElemPtrPtr, ElemTy,
C.getTypeAlignInChars(Private->getType()));
2422 const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
2424 const FieldDecl *FD = VarFieldMap.lookup(VD);
2425 llvm::Value *BufferPtr =
2426 Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
2473 const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
2475 llvm::Function *ReduceFn) {
2488 Args.push_back(&BufferArg);
2489 Args.push_back(&IdxArg);
2490 Args.push_back(&ReduceListArg);
2494 auto *Fn = llvm::Function::Create(
2496 "_omp_reduction_global_to_list_reduce_func", &CGM.
getModule());
2498 Fn->setDoesNotRecurse();
2505 QualType StaticTy =
C.getRecordType(TeamReductionRec);
2506 llvm::Type *LLVMReductionsBufferTy =
2510 LLVMReductionsBufferTy->getPointerTo());
2515 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.red_list");
2516 auto IPriv = Privates.begin();
2521 for (
unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) {
2524 const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
2525 const FieldDecl *FD = VarFieldMap.lookup(VD);
2526 llvm::Value *BufferPtr =
2527 Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
2533 if ((*IPriv)->getType()->isVariablyModifiedType()) {
2537 llvm::Value *Size = CGF.
Builder.CreateIntCast(
2548 llvm::Value *GlobalReduceList = ReductionList.
getPointer();
2551 AddrReduceListArg,
false,
C.VoidPtrTy, Loc);
2553 CGF, Loc, ReduceFn, {ReducedPtr, GlobalReduceList});
2812 if (Options.SimpleReduction) {
2813 assert(!TeamsReduction && !ParallelReduction &&
2814 "Invalid reduction selection in emitReduction.");
2816 ReductionOps, Options);
2820 assert((TeamsReduction || ParallelReduction) &&
2821 "Invalid reduction selection in emitReduction.");
2823 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> VarFieldMap;
2826 for (
const Expr *DRE : Privates) {
2827 PrivatesReductions[Cnt] = cast<DeclRefExpr>(DRE)->getDecl();
2832 const RecordDecl *ReductionRec = ::buildRecordForGlobalizedVars(
2833 CGM.
getContext(), PrivatesReductions, std::nullopt, VarFieldMap, 1);
2844 auto Size = RHSExprs.size();
2845 for (
const Expr *E : Privates) {
2850 llvm::APInt ArraySize(32, Size);
2851 QualType ReductionArrayTy =
C.getConstantArrayType(
2855 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.red_list");
2856 auto IPriv = Privates.begin();
2858 for (
unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
2864 if ((*IPriv)->getType()->isVariablyModifiedType()) {
2868 llvm::Value *Size = CGF.
Builder.CreateIntCast(
2882 Privates, LHSExprs, RHSExprs, ReductionOps);
2883 llvm::Value *ReductionDataSize =
2888 CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
2889 llvm::Value *InterWarpCopyFn =
2892 if (ParallelReduction) {
2893 llvm::Value *Args[] = {RTLoc, ReductionDataSize, RL, ShuffleAndReduceFn,
2898 CGM.
getModule(), OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2),
2901 assert(TeamsReduction &&
"expected teams reduction.");
2902 TeamsReductions.push_back(ReductionRec);
2905 CGM.
getModule(), OMPRTL___kmpc_reduction_get_fixed_buffer),
2906 {},
"_openmp_teams_reductions_buffer_$_$ptr");
2908 CGM, Privates, ReductionArrayTy, Loc, ReductionRec, VarFieldMap);
2910 CGM, Privates, ReductionArrayTy, Loc, ReductionRec, VarFieldMap,
2913 CGM, Privates, ReductionArrayTy, Loc, ReductionRec, VarFieldMap);
2915 CGM, Privates, ReductionArrayTy, Loc, ReductionRec, VarFieldMap,
2918 llvm::Value *Args[] = {
2920 KernelTeamsReductionPtr,
2921 CGF.
Builder.getInt32(
C.getLangOpts().OpenMPCUDAReductionBufNum),
2926 GlobalToBufferCpyFn,
2927 GlobalToBufferRedFn,
2928 BufferToGlobalCpyFn,
2929 BufferToGlobalRedFn};
2933 CGM.
getModule(), OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2),
2940 llvm::Value *Cond = CGF.
Builder.CreateICmpEQ(
2941 Res, llvm::ConstantInt::get(
CGM.
Int32Ty, 1));
2942 CGF.
Builder.CreateCondBr(Cond, ThenBB, ExitBB);
2951 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps,
2953 auto IPriv = Privates.begin();
2954 auto ILHS = LHSExprs.begin();
2955 auto IRHS = RHSExprs.begin();
2956 for (
const Expr *E : ReductionOps) {
2958 cast<DeclRefExpr>(*IRHS));
2973 const VarDecl *NativeParam)
const {
2978 const Type *NonQualTy = QC.
strip(ArgType);
2980 if (
const auto *
Attr = FD->
getAttr<OMPCaptureKindAttr>()) {
2981 if (
Attr->getCaptureKind() == OMPC_map) {
2988 enum { NVPTX_local_addr = 5 };
2991 if (isa<ImplicitParamDecl>(NativeParam))
3006 const VarDecl *TargetParam)
const {
3007 assert(NativeParam != TargetParam &&
3009 "Native arg must not be the same as target arg.");
3013 const Type *NonQualTy = QC.
strip(NativeParamType);
3015 unsigned NativePointeeAddrSpace =
3027 llvm::PointerType::get(CGF.
getLLVMContext(), NativePointeeAddrSpace));
3031 return NativeParamAddr;
3038 TargetArgs.reserve(Args.size());
3039 auto *FnType = OutlinedFn.getFunctionType();
3040 for (
unsigned I = 0, E = Args.size(); I < E; ++I) {
3041 if (FnType->isVarArg() && FnType->getNumParams() <= I) {
3042 TargetArgs.append(std::next(Args.begin(), I), Args.end());
3045 llvm::Type *TargetType = FnType->getParamType(I);
3046 llvm::Value *NativeArg = Args[I];
3047 if (!TargetType->isPointerTy()) {
3048 TargetArgs.emplace_back(NativeArg);
3054 TargetArgs.emplace_back(
3064llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(
3081 WrapperArgs.emplace_back(&ParallelLevelArg);
3082 WrapperArgs.emplace_back(&WrapperArg);
3087 auto *Fn = llvm::Function::Create(
3089 Twine(OutlinedParallelFn->getName(),
"_wrapper"), &
CGM.
getModule());
3097 Fn->addFnAttr(llvm::Attribute::NoInline);
3100 Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
3101 Fn->setDoesNotRecurse();
3107 const auto *RD = CS.getCapturedRecordDecl();
3108 auto CurField = RD->field_begin();
3120 auto CI = CS.capture_begin();
3126 llvm::Value *GlobalArgsPtr = GlobalArgs.
getPointer();
3127 llvm::Value *DataSharingArgs[] = {GlobalArgsPtr};
3135 if (CS.capture_size() > 0 ||
3151 cast<OMPLoopDirective>(D).getLowerBoundVariable()->getExprLoc());
3152 Args.emplace_back(LB);
3161 cast<OMPLoopDirective>(D).getUpperBoundVariable()->getExprLoc());
3162 Args.emplace_back(UB);
3165 if (CS.capture_size() > 0) {
3167 for (
unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) {
3168 QualType ElemTy = CurField->getType();
3177 if (CI->capturesVariableByCopy() &&
3178 !CI->getCapturedVar()->getType()->isAnyPointerType()) {
3182 Args.emplace_back(Arg);
3196 assert(D &&
"Expected function or captured|block decl.");
3197 assert(FunctionGlobalizedDecls.count(CGF.
CurFn) == 0 &&
3198 "Function is registered already.");
3199 assert((!TeamAndReductions.first || TeamAndReductions.first == D) &&
3200 "Team is set but not processed.");
3201 const Stmt *Body =
nullptr;
3202 bool NeedToDelayGlobalization =
false;
3203 if (
const auto *FD = dyn_cast<FunctionDecl>(D)) {
3204 Body = FD->getBody();
3205 }
else if (
const auto *BD = dyn_cast<BlockDecl>(D)) {
3206 Body = BD->getBody();
3207 }
else if (
const auto *CD = dyn_cast<CapturedDecl>(D)) {
3208 Body = CD->getBody();
3210 if (NeedToDelayGlobalization &&
3216 CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);
3217 VarChecker.Visit(Body);
3219 VarChecker.getGlobalizedRecord(IsInTTDRegion);
3220 TeamAndReductions.first =
nullptr;
3221 TeamAndReductions.second.clear();
3223 VarChecker.getEscapedVariableLengthDecls();
3225 VarChecker.getDelayedVariableLengthDecls();
3226 if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty() &&
3227 DelayedVariableLengthDecls.empty())
3229 auto I = FunctionGlobalizedDecls.try_emplace(CGF.
CurFn).first;
3230 I->getSecond().MappedParams =
3231 std::make_unique<CodeGenFunction::OMPMapVars>();
3232 I->getSecond().EscapedParameters.insert(
3233 VarChecker.getEscapedParameters().begin(),
3234 VarChecker.getEscapedParameters().end());
3235 I->getSecond().EscapedVariableLengthDecls.append(
3236 EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end());
3237 I->getSecond().DelayedVariableLengthDecls.append(
3238 DelayedVariableLengthDecls.begin(), DelayedVariableLengthDecls.end());
3239 DeclToAddrMapTy &
Data = I->getSecond().LocalVarData;
3240 for (
const ValueDecl *VD : VarChecker.getEscapedDecls()) {
3242 Data.insert(std::make_pair(VD, MappedVarData()));
3244 if (!NeedToDelayGlobalization) {
3247 GlobalizationScope() =
default;
3251 .emitGenericVarsEpilog(CGF);
3260 if (VD && VD->
hasAttr<OMPAllocateDeclAttr>()) {
3261 const auto *A = VD->
getAttr<OMPAllocateDeclAttr>();
3263 switch (A->getAllocatorType()) {
3266 case OMPAllocateDeclAttr::OMPNullMemAlloc:
3267 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
3268 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
3269 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
3270 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
3273 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
3276 case OMPAllocateDeclAttr::OMPConstMemAlloc:
3279 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
3282 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
3283 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
3287 auto *GV =
new llvm::GlobalVariable(
3289 llvm::GlobalValue::InternalLinkage, llvm::PoisonValue::get(VarTy),
3291 nullptr, llvm::GlobalValue::NotThreadLocal,
3306 auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
3307 if (I == FunctionGlobalizedDecls.end())
3309 auto VDI = I->getSecond().LocalVarData.find(VD);
3310 if (VDI != I->getSecond().LocalVarData.end())
3311 return VDI->second.PrivateAddr;
3316 auto VDI = I->getSecond().LocalVarData.find(
3317 cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl())
3318 ->getCanonicalDecl());
3319 if (VDI != I->getSecond().LocalVarData.end())
3320 return VDI->second.PrivateAddr;
3328 FunctionGlobalizedDecls.erase(CGF.
CurFn);
3335 llvm::Value *&Chunk)
const {
3338 ScheduleKind = OMPC_DIST_SCHEDULE_static;
3340 RT.getGPUNumThreads(CGF),
3342 S.getIterationVariable()->getType(), S.getBeginLoc());
3346 CGF, S, ScheduleKind, Chunk);
3352 const Expr *&ChunkExpr)
const {
3353 ScheduleKind = OMPC_SCHEDULE_static;
3355 llvm::APInt ChunkSize(32, 1);
3364 " Expected target-based directive.");
3369 if (!
C.capturesVariable())
3371 const VarDecl *VD =
C.getCapturedVar();
3372 const auto *RD = VD->
getType()
3376 if (!RD || !RD->isLambda())
3385 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
3387 RD->getCaptureFields(Captures, ThisCapture);
3397 const ValueDecl *VD = LC.getCapturedVar();
3402 auto It = Captures.find(VD);
3403 assert(It != Captures.end() &&
"Found lambda capture without field.");
3417 if (!VD || !VD->
hasAttr<OMPAllocateDeclAttr>())
3419 const auto *A = VD->
getAttr<OMPAllocateDeclAttr>();
3420 switch(A->getAllocatorType()) {
3421 case OMPAllocateDeclAttr::OMPNullMemAlloc:
3422 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
3424 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
3425 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
3426 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
3427 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
3428 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
3431 case OMPAllocateDeclAttr::OMPConstMemAlloc:
3434 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
3437 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
3438 llvm_unreachable(
"Expected predefined allocator for the variables with the "
3449 if (Feature.getValue()) {
3463 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
3476 llvm::raw_svector_ostream Out(Buffer);
3478 <<
" does not support unified addressing";
3479 CGM.
Error(Clause->getBeginLoc(), Out.str());
3542 llvm_unreachable(
"Unexpected Cuda arch.");
3552 const char *LocSize =
"__kmpc_get_hardware_num_threads_in_block";
3553 llvm::Function *F = M->getFunction(LocSize);
3555 F = llvm::Function::Create(
3556 llvm::FunctionType::get(CGF.
Int32Ty, std::nullopt,
false),
3557 llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.
CGM.
getModule());
3559 return Bld.CreateCall(F, std::nullopt,
"nvptx_num_threads");
3566 CGM.
getModule(), OMPRTL___kmpc_get_hardware_thread_id_in_block),
static llvm::Value * getNVPTXLaneID(CodeGenFunction &CGF)
Get the id of the current lane in the Warp.
static CudaArch getCudaArch(CodeGenModule &CGM)
static llvm::Value * emitListToGlobalCopyFunction(CodeGenModule &CGM, ArrayRef< const Expr * > Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl * > &VarFieldMap)
This function emits a helper that copies all the reduction variables from the team into the provided ...
static llvm::Value * emitGlobalToListReduceFunction(CodeGenModule &CGM, ArrayRef< const Expr * > Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl * > &VarFieldMap, llvm::Function *ReduceFn)
This function emits a helper that reduces all the reduction variables from the team into the provided...
static llvm::Value * emitInterWarpCopyFunction(CodeGenModule &CGM, ArrayRef< const Expr * > Privates, QualType ReductionArrayTy, SourceLocation Loc)
This function emits a helper that gathers Reduce lists from the first lane of every active warp to la...
static void getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl * > &Vars)
Get list of reduction variables from the teams ... directives.
static llvm::Value * castValueToType(CodeGenFunction &CGF, llvm::Value *Val, QualType ValTy, QualType CastTy, SourceLocation Loc)
Cast value to the specified type.
static void emitReductionListCopy(CopyAction Action, CodeGenFunction &CGF, QualType ReductionArrayTy, ArrayRef< const Expr * > Privates, Address SrcBase, Address DestBase, CopyOptionsTy CopyOptions={nullptr, nullptr, nullptr})
Emit instructions to copy a Reduce list, which contains partially aggregated values,...
static void getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl * > &Vars)
Get list of lastprivate variables from the teams distribute ... or teams {distribute ....
static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr, Address DestAddr, QualType ElemType, llvm::Value *Offset, SourceLocation Loc)
static bool hasNestedSPMDDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner (nested) SPMD construct, if any.
static llvm::Function * emitShuffleAndReduceFunction(CodeGenModule &CGM, ArrayRef< const Expr * > Privates, QualType ReductionArrayTy, llvm::Function *ReduceFn, SourceLocation Loc)
Emit a helper that reduces data across two OpenMP threads (lanes) in the same warp.
static bool supportsSPMDExecutionMode(ASTContext &Ctx, const OMPExecutableDirective &D)
static llvm::Value * emitListToGlobalReduceFunction(CodeGenModule &CGM, ArrayRef< const Expr * > Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl * > &VarFieldMap, llvm::Function *ReduceFn)
This function emits a helper that reduces all the reduction variables from the team into the provided...
static llvm::Value * emitGlobalToListCopyFunction(CodeGenModule &CGM, ArrayRef< const Expr * > Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl * > &VarFieldMap)
This function emits a helper that copies all the reduction variables from the team into the provided ...
static llvm::Value * createRuntimeShuffleFunction(CodeGenFunction &CGF, llvm::Value *Elem, QualType ElemType, llvm::Value *Offset, SourceLocation Loc)
This function creates calls to one of two shuffle functions to copy variables between lanes in a warp...
static llvm::Value * getNVPTXWarpID(CodeGenFunction &CGF)
Get the id of the warp in the block.
This file defines OpenMP nodes for declarative directives.
This file defines OpenMP AST classes for clauses.
static std::pair< ValueDecl *, bool > getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc, SourceRange &ERange, bool AllowArraySection=false, StringRef DiagType="")
This file defines OpenMP AST classes for executable directives and clauses.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
QualType getUIntPtrType() const
Return a type compatible with "uintptr_t" (C99 7.18.1.4), as defined by the target.
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
int64_t toBits(CharUnits CharSize) const
Convert a size in characters to a size in bits.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
const VariableArrayType * getAsVariableArrayType(QualType T) const
const TargetInfo & getTargetInfo() const
QualType getAddrSpaceQualType(QualType T, LangAS AddressSpace) const
Return the uniqued reference to the type for an address space qualified type with the specified type ...
unsigned getTargetAddressSpace(LangAS AS) const
Attr - This represents one attribute.
A class which contains all the information about a particular captured value.
ArrayRef< Capture > captures() const
BlockExpr - Adaptor class for mixing a BlockDecl with expressions.
const BlockDecl * getBlockDecl() const
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Describes the capture of either a variable, or 'this', or variable-length array type.
This captures a statement into a function.
CapturedDecl * getCapturedDecl()
Retrieve the outlined function declaration.
Stmt * getCapturedStmt()
Retrieve the statement being captured.
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
CastKind getCastKind() const
CharUnits - This is an opaque type for sizes expressed in character units.
bool isZero() const
isZero - Test whether the quantity equals zero.
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
CharUnits getAlignment() const
Return the alignment of this pointer.
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
llvm::Value * getPointer() const
llvm::PointerType * getType() const
Return the type of the pointer value.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Address CreateGEP(Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
CGFunctionInfo - Class to encapsulate the information about a function definition.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits inlined function for the specified OpenMP teams.
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
DataSharingMode
Target codegen is specialized based on two data-sharing modes: CUDA, in which the local variables are...
@ DS_CUDA
CUDA data sharing mode.
@ DS_Generic
Generic data-sharing mode.
void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const override
Choose a default value for the dist_schedule clause.
Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) override
Gets the OpenMP-specific address of the local variable.
void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override
Emits OpenMP-specific function prolog.
void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const override
Choose a default value for the schedule clause.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
This function ought to emit, in the general case, a call to.
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
void getKmpcFreeShared(CodeGenFunction &CGF, const std::pair< llvm::Value *, llvm::Value * > &AddrSizePair) override
Get call to __kmpc_free_shared.
CGOpenMPRuntimeGPU(CodeGenModule &CGM)
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits inlined function for the specified OpenMP parallel.
void functionFinished(CodeGenFunction &CGF) override
Cleans up references to the objects in finished function.
llvm::Value * getGPUThreadID(CodeGenFunction &CGF)
Get the id of the current thread on the GPU.
llvm::Value * getGPUWarpSize(CodeGenFunction &CGF)
Get the GPU warp size.
void processRequiresDirective(const OMPRequiresDecl *D) override
Perform check on requires decl to ensure that target architecture supports unified addressing.
void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args=std::nullopt) const override
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
bool isDelayedVariableLengthDecl(CodeGenFunction &CGF, const VarDecl *VD) const override
Declare generalized virtual functions which need to be defined by all specializations of OpenMPGPURun...
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
ExecutionMode
Defines the execution mode.
@ EM_NonSPMD
Non-SPMD execution mode (1 master thread, others are workers).
@ EM_Unknown
Unknown execution mode (orphaned directive).
@ EM_SPMD
SPMD execution mode (all threads are worker threads).
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
llvm::Value * getGPUNumThreads(CodeGenFunction &CGF)
Get the maximum number of threads in a block of the GPU.
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
std::pair< llvm::Value *, llvm::Value * > getKmpcAllocShared(CodeGenFunction &CGF, const VarDecl *VD) override
Get call to __kmpc_alloc_shared.
bool isGPU() const override
Returns true if the current target is a GPU.
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const override
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Helper to determine the min/max number of threads/teams for D.
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
virtual void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const
Choose default schedule type and chunk value for the dist_schedule clause.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args=std::nullopt) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
CapturedRegionKind getKind() const
bool isCXXThisExprCaptured() const
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
CGCapturedStmtInfo * CapturedStmtInfo
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
llvm::Type * ConvertTypeForMem(QualType T)
LValue EmitLValueForField(LValue Base, const FieldDecl *Field)
const TargetInfo & getTarget() const
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
CGDebugInfo * getDebugInfo()
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
Address CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
ASTContext & getContext() const
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
Address CreateMemTemp(QualType T, const Twine &Name="tmp", Address *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertType(QualType T)
CodeGenTypes & getTypes() const
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
llvm::Value * LoadCXXThis()
LoadCXXThis - Load the value of 'this'.
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
llvm::LLVMContext & getLLVMContext()
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
This class organizes the cross-function state that is used while generating LLVM code.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
const LangOptions & getLangOpts() const
CodeGenTypes & getTypes()
const TargetInfo & getTarget() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
ASTContext & getContext() const
llvm::LLVMContext & getLLVMContext()
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
unsigned getTargetAddressSpace(QualType T) const
llvm::Type * ConvertTypeForMem(QualType T, bool ForBitField=false)
ConvertTypeForMem - Convert type T into a llvm::Type.
Information for lazily generating a cleanup.
FunctionArgList - Type for representing both the decl and type of parameters to a function.
LValue - This represents an lvalue references.
Address getAddress(CodeGenFunction &CGF) const
llvm::Value * getPointer(CodeGenFunction &CGF) const
void setAddress(Address address)
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void setAction(PrePostActionTy &Action) const
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
void addDecl(Decl *D)
Add the declaration D into this context.
A reference to a declared variable, function, enum, etc.
DeclStmt - Adaptor class for mixing declarations with statements and expressions.
Decl - This represents one declaration (or definition), e.g.
attr_iterator attr_end() const
bool isCanonicalDecl() const
Whether this particular Decl is a canonical one.
attr_iterator attr_begin() const
SourceLocation getLocation() const
DeclContext * getDeclContext()
SourceLocation getBeginLoc() const LLVM_READONLY
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
SourceLocation getBeginLoc() const LLVM_READONLY
This represents one expression.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
bool isLValue() const
isLValue - True if this expression is an "l-value" according to the rules of the current language.
Represents a member of a struct/union/class.
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
GlobalDecl - represents a global declaration.
ImplicitCastExpr - Allows us to explicitly represent implicit type conversions, which have no direct ...
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Describes the capture of a variable or of this, or of a C++1y init-capture.
A C++ lambda expression, which produces a function object (of unspecified type) that can be invoked l...
bool isInitCapture(const LambdaCapture *Capture) const
Determine whether one of this lambda's captures is an init-capture.
capture_range captures() const
Retrieve this lambda's captures.
std::string OMPHostIRFile
Name of the IR file that contains the result of the OpenMP target host code generation.
IdentifierInfo * getIdentifier() const
Get the identifier that names this declaration, if there is one.
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
This is a basic class for representing single OpenMP clause.
This is a basic class for representing single OpenMP executable directive.
CapturedStmt * getInnermostCapturedStmt()
Get innermost captured statement for the construct.
bool hasAssociatedStmt() const
Returns true if directive has associated statement.
const CapturedStmt * getCapturedStmt(OpenMPDirectiveKind RegionKind) const
Returns the captured statement associated with the component region within the (combined) directive.
OpenMPDirectiveKind getDirectiveKind() const
const Stmt * getAssociatedStmt() const
Returns statement associated with the directive.
SourceLocation getBeginLoc() const
Returns starting location of directive kind.
ArrayRef< OMPClause * > clauses() const
static const SpecificClause * getSingleClause(ArrayRef< OMPClause * > Clauses)
Gets a single clause of the specified kind associated with the current directive iff there is only on...
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause * > Clauses)
This represents clause 'lastprivate' in the '#pragma omp ...' directives.
This is a common base class for loop directives ('omp simd', 'omp for', 'omp for simd' etc....
This represents clause 'reduction' in the '#pragma omp ...' directives.
This represents '#pragma omp requires...' directive.
clauselist_range clauselists()
This represents 'ompx_bare' clause in the '#pragma omp target teams ...' directive.
static ParmVarDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, StorageClass S, Expr *DefArg)
PointerType - C99 6.7.5.1 - Pointer Declarators.
A (possibly-)qualified type.
LangAS getAddressSpace() const
Return the address space of this type.
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
QualType getCanonicalType() const
A qualifier set is used to build a set of qualifiers.
const Type * strip(QualType type)
Collect any qualifiers on the given type and return an unqualified type.
QualType apply(const ASTContext &Context, QualType QT) const
Apply the collected qualifiers to the given type.
void addAddressSpace(LangAS space)
Represents a struct/union/class.
virtual void completeDefinition()
Note that the definition of this type is now complete.
Encodes a location in the source.
RetTy Visit(PTR(Stmt) S, ParamTys... P)
Stmt - This represents one statement.
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
void startDefinition()
Starts the definition of this tag declaration.
unsigned getNewAlign() const
Return the largest alignment for which a suitably-sized allocation with '::operator new(size_t)' is g...
TargetOptions & getTargetOpts() const
Retrieve the target options.
virtual const llvm::omp::GV & getGridValue() const
virtual bool hasFeature(StringRef Feature) const
Determine whether the given target has the given feature.
llvm::StringMap< bool > FeatureMap
The map of which features have been enabled disabled based on the command line.
The base class of the type hierarchy.
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
const T * castAs() const
Member-template castAs<specific type>.
bool isReferenceType() const
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
bool isLValueReferenceType() const
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
UnaryOperator - This represents the unary-expression's (except sizeof and alignof),...
Expr * getSubExpr() const
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
bool isInitCapture() const
Whether this variable is the implicit variable for a lambda init-capture.
Represents a variable declaration or definition.
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
bool isInitCapture() const
Whether this variable is the implicit variable for a lambda init-capture.
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
@ ICIS_NoInit
No in-class initializer.
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
CudaArch StringToCudaArch(llvm::StringRef S)
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
bool isOpenMPPrivate(OpenMPClauseKind Kind)
Checks if the specified clause is one of private clauses like 'private', 'firstprivate',...
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of the composite or combined directives that need loop ...
LangAS
Defines the address space values used by the address space qualifier of QualType.
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
LangAS getLangASFromTargetAS(unsigned TargetAS)
@ CXXThis
Parameter for C++ 'this' argument.
@ Other
Other implicit parameter.
const char * CudaArchToString(CudaArch A)
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
llvm::Value * ScratchpadIndex
llvm::Value * ScratchpadWidth
llvm::Value * RemoteLaneOffset
llvm::PointerType * VoidPtrTy
llvm::IntegerType * Int64Ty
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::IntegerType * SizeTy
llvm::PointerType * VoidPtrPtrTy
llvm::IntegerType * Int32Ty
llvm::IntegerType * IntTy
int
llvm::IntegerType * Int16Ty
llvm::PointerType * Int8PtrTy
CharUnits getPointerAlign() const