22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/Frontend/OpenMP/OMPGridValues.h"
24#include "llvm/Support/MathExtras.h"
27using namespace CodeGen;
28using namespace llvm::omp;
33 llvm::FunctionCallee EnterCallee =
nullptr;
35 llvm::FunctionCallee ExitCallee =
nullptr;
38 llvm::BasicBlock *ContBlock =
nullptr;
41 NVPTXActionTy(llvm::FunctionCallee EnterCallee,
43 llvm::FunctionCallee ExitCallee,
45 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
50 llvm::Value *CallBool = CGF.
Builder.CreateIsNotNull(EnterRes);
54 CGF.
Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
72class ExecutionRuntimeModesRAII {
81 : ExecMode(ExecMode) {
82 SavedExecMode = ExecMode;
85 ~ExecutionRuntimeModesRAII() { ExecMode = SavedExecMode; }
92enum MachineConfiguration :
unsigned {
97 GlobalMemoryAlignment = 128,
102 if (
const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) {
103 const Expr *
Base = ASE->getBase()->IgnoreParenImpCasts();
104 while (
const auto *TempASE = dyn_cast<ArraySubscriptExpr>(
Base))
105 Base = TempASE->getBase()->IgnoreParenImpCasts();
107 }
else if (
auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr)) {
108 const Expr *
Base = OASE->getBase()->IgnoreParenImpCasts();
109 while (
const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(
Base))
110 Base = TempOASE->getBase()->IgnoreParenImpCasts();
111 while (
const auto *TempASE = dyn_cast<ArraySubscriptExpr>(
Base))
112 Base = TempASE->getBase()->IgnoreParenImpCasts();
116 if (
const auto *DE = dyn_cast<DeclRefExpr>(RefExpr))
117 return cast<ValueDecl>(DE->getDecl()->getCanonicalDecl());
118 const auto *ME = cast<MemberExpr>(RefExpr);
119 return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());
123static RecordDecl *buildRecordForGlobalizedVars(
126 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
127 &MappedDeclsFields,
int BufSize) {
129 if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())
133 GlobalizedVars.emplace_back(
135 C.getDeclAlign(D).getQuantity(),
138 for (
const ValueDecl *D : EscapedDeclsForTeams)
139 GlobalizedVars.emplace_back(
C.getDeclAlign(D), D);
140 llvm::stable_sort(GlobalizedVars, [](VarsDataTy L, VarsDataTy R) {
141 return L.first > R.first;
149 RecordDecl *GlobalizedRD =
C.buildImplicitRecord(
"_globalized_locals_ty");
150 GlobalizedRD->startDefinition();
152 EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end());
153 for (
const auto &Pair : GlobalizedVars) {
157 Type =
C.getPointerType(
Type.getNonReferenceType());
162 if (SingleEscaped.count(VD)) {
176 llvm::APInt ArraySize(32, BufSize);
185 llvm::APInt Align(32, std::max(
C.getDeclAlign(VD).getQuantity(),
187 GlobalMemoryAlignment)));
188 Field->addAttr(AlignedAttr::CreateImplicit(
191 C.getIntTypeForBitwidth(32, 0),
193 {}, AlignedAttr::GNU_aligned));
195 GlobalizedRD->addDecl(Field);
196 MappedDeclsFields.try_emplace(VD, Field);
198 GlobalizedRD->completeDefinition();
203class CheckVarsEscapingDeclContext final
206 llvm::SetVector<const ValueDecl *> EscapedDecls;
207 llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls;
210 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
211 bool AllEscaped =
false;
212 bool IsForCombinedParallelRegion =
false;
214 void markAsEscaped(
const ValueDecl *VD) {
216 if (!isa<VarDecl>(VD) ||
217 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
224 if (
auto *CSI = CGF.CapturedStmtInfo) {
225 if (
const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {
228 if (!IsForCombinedParallelRegion) {
231 const auto *
Attr = FD->getAttr<OMPCaptureKindAttr>();
234 if (((
Attr->getCaptureKind() != OMPC_map) &&
236 ((
Attr->getCaptureKind() == OMPC_map) &&
237 !FD->getType()->isAnyPointerType()))
240 if (!FD->getType()->isReferenceType()) {
242 "Parameter captured by value with variably modified type");
243 EscapedParameters.insert(VD);
244 }
else if (!IsForCombinedParallelRegion) {
249 if ((!CGF.CapturedStmtInfo ||
250 (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&
255 EscapedVariableLengthDecls.insert(VD);
257 EscapedDecls.insert(VD);
260 void VisitValueDecl(
const ValueDecl *VD) {
263 if (
const auto *VarD = dyn_cast<VarDecl>(VD)) {
264 if (!isa<ParmVarDecl>(VarD) && VarD->hasInit()) {
265 const bool SavedAllEscaped = AllEscaped;
267 Visit(VarD->getInit());
268 AllEscaped = SavedAllEscaped;
274 bool IsCombinedParallelRegion) {
278 if (
C.capturesVariable() && !
C.capturesVariableByCopy()) {
280 bool SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion;
281 if (IsCombinedParallelRegion) {
285 IsForCombinedParallelRegion =
false;
288 C->getClauseKind() == OMPC_reduction ||
289 C->getClauseKind() == OMPC_linear ||
290 C->getClauseKind() == OMPC_private)
293 if (
const auto *PC = dyn_cast<OMPFirstprivateClause>(
C))
294 Vars = PC->getVarRefs();
295 else if (
const auto *PC = dyn_cast<OMPLastprivateClause>(
C))
296 Vars = PC->getVarRefs();
298 llvm_unreachable(
"Unexpected clause.");
299 for (
const auto *E : Vars) {
303 IsForCombinedParallelRegion =
true;
307 if (IsForCombinedParallelRegion)
312 if (isa<OMPCapturedExprDecl>(VD))
314 IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion;
319 void buildRecordForGlobalizedVars(
bool IsInTTDRegion) {
320 assert(!GlobalizedRD &&
321 "Record for globalized variables is built already.");
323 unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
325 EscapedDeclsForTeams = EscapedDecls.getArrayRef();
327 EscapedDeclsForParallel = EscapedDecls.getArrayRef();
328 GlobalizedRD = ::buildRecordForGlobalizedVars(
329 CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,
330 MappedDeclsFields, WarpSize);
336 : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {
338 virtual ~CheckVarsEscapingDeclContext() =
default;
339 void VisitDeclStmt(
const DeclStmt *S) {
342 for (
const Decl *D : S->decls())
343 if (
const auto *VD = dyn_cast_or_null<ValueDecl>(D))
357 if (CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown) {
358 VisitStmt(S->getCapturedStmt());
361 VisitOpenMPCapturedStmt(
363 CaptureRegions.back() == OMPD_parallel &&
371 if (
C.capturesVariable() && !
C.capturesVariableByCopy()) {
374 if (isa<OMPCapturedExprDecl>(VD))
383 if (
C.capturesVariable()) {
393 void VisitBlockExpr(
const BlockExpr *E) {
398 const VarDecl *VD =
C.getVariable();
405 void VisitCallExpr(
const CallExpr *E) {
411 if (Arg->isLValue()) {
412 const bool SavedAllEscaped = AllEscaped;
415 AllEscaped = SavedAllEscaped;
428 if (isa<OMPCapturedExprDecl>(VD))
437 const bool SavedAllEscaped = AllEscaped;
440 AllEscaped = SavedAllEscaped;
449 const bool SavedAllEscaped = AllEscaped;
452 AllEscaped = SavedAllEscaped;
457 void VisitExpr(
const Expr *E) {
460 bool SavedAllEscaped = AllEscaped;
466 AllEscaped = SavedAllEscaped;
468 void VisitStmt(
const Stmt *S) {
471 for (
const Stmt *Child : S->children())
478 const RecordDecl *getGlobalizedRecord(
bool IsInTTDRegion) {
480 buildRecordForGlobalizedVars(IsInTTDRegion);
486 assert(GlobalizedRD &&
487 "Record for globalized variables must be generated already.");
488 auto I = MappedDeclsFields.find(VD);
489 if (I == MappedDeclsFields.end())
491 return I->getSecond();
496 return EscapedDecls.getArrayRef();
501 const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters()
const {
502 return EscapedParameters;
508 return EscapedVariableLengthDecls.getArrayRef();
518 unsigned LaneIDBits =
521 return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits,
"nvptx_warp_id");
529 unsigned LaneIDBits =
531 unsigned LaneIDMask = ~0u >> (32u - LaneIDBits);
533 return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask),
538CGOpenMPRuntimeGPU::getExecutionMode()
const {
539 return CurrentExecutionMode;
556 if (
const auto *NestedDir =
557 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
563 if (DKind == OMPD_teams) {
564 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
569 if (
const auto *NND =
570 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
571 DKind = NND->getDirectiveKind();
577 case OMPD_target_teams:
579 case OMPD_target_simd:
580 case OMPD_target_parallel:
581 case OMPD_target_parallel_for:
582 case OMPD_target_parallel_for_simd:
583 case OMPD_target_teams_distribute:
584 case OMPD_target_teams_distribute_simd:
585 case OMPD_target_teams_distribute_parallel_for:
586 case OMPD_target_teams_distribute_parallel_for_simd:
589 case OMPD_parallel_for:
590 case OMPD_parallel_master:
591 case OMPD_parallel_sections:
593 case OMPD_parallel_for_simd:
595 case OMPD_cancellation_point:
597 case OMPD_threadprivate:
615 case OMPD_target_data:
616 case OMPD_target_exit_data:
617 case OMPD_target_enter_data:
618 case OMPD_distribute:
619 case OMPD_distribute_simd:
620 case OMPD_distribute_parallel_for:
621 case OMPD_distribute_parallel_for_simd:
622 case OMPD_teams_distribute:
623 case OMPD_teams_distribute_simd:
624 case OMPD_teams_distribute_parallel_for:
625 case OMPD_teams_distribute_parallel_for_simd:
626 case OMPD_target_update:
627 case OMPD_declare_simd:
628 case OMPD_declare_variant:
629 case OMPD_begin_declare_variant:
630 case OMPD_end_declare_variant:
631 case OMPD_declare_target:
632 case OMPD_end_declare_target:
633 case OMPD_declare_reduction:
634 case OMPD_declare_mapper:
636 case OMPD_taskloop_simd:
637 case OMPD_master_taskloop:
638 case OMPD_master_taskloop_simd:
639 case OMPD_parallel_master_taskloop:
640 case OMPD_parallel_master_taskloop_simd:
644 llvm_unreachable(
"Unexpected directive.");
654 switch (DirectiveKind) {
656 case OMPD_target_teams:
658 case OMPD_target_parallel:
659 case OMPD_target_parallel_for:
660 case OMPD_target_parallel_for_simd:
661 case OMPD_target_teams_distribute_parallel_for:
662 case OMPD_target_teams_distribute_parallel_for_simd:
663 case OMPD_target_simd:
664 case OMPD_target_teams_distribute_simd:
666 case OMPD_target_teams_distribute:
670 case OMPD_parallel_for:
671 case OMPD_parallel_master:
672 case OMPD_parallel_sections:
674 case OMPD_parallel_for_simd:
676 case OMPD_cancellation_point:
678 case OMPD_threadprivate:
696 case OMPD_target_data:
697 case OMPD_target_exit_data:
698 case OMPD_target_enter_data:
699 case OMPD_distribute:
700 case OMPD_distribute_simd:
701 case OMPD_distribute_parallel_for:
702 case OMPD_distribute_parallel_for_simd:
703 case OMPD_teams_distribute:
704 case OMPD_teams_distribute_simd:
705 case OMPD_teams_distribute_parallel_for:
706 case OMPD_teams_distribute_parallel_for_simd:
707 case OMPD_target_update:
708 case OMPD_declare_simd:
709 case OMPD_declare_variant:
710 case OMPD_begin_declare_variant:
711 case OMPD_end_declare_variant:
712 case OMPD_declare_target:
713 case OMPD_end_declare_target:
714 case OMPD_declare_reduction:
715 case OMPD_declare_mapper:
717 case OMPD_taskloop_simd:
718 case OMPD_master_taskloop:
719 case OMPD_master_taskloop_simd:
720 case OMPD_parallel_master_taskloop:
721 case OMPD_parallel_master_taskloop_simd:
728 "Unknown programming model for OpenMP directive on NVPTX target.");
732 StringRef ParentName,
733 llvm::Function *&OutlinedFn,
734 llvm::Constant *&OutlinedFnID,
737 ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode,
EM_NonSPMD);
738 EntryFunctionState EST;
739 WrapperFunctionsMap.clear();
743 CGOpenMPRuntimeGPU::EntryFunctionState &EST;
746 NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST)
751 RT.emitKernelInit(CGF, EST,
false);
753 RT.setLocThreadIdInsertPt(CGF,
true);
759 RT.emitKernelDeinit(CGF, EST,
false);
763 IsInTTDRegion =
true;
765 IsOffloadEntry, CodeGen);
766 IsInTTDRegion =
false;
770 EntryFunctionState &EST,
bool IsSPMD) {
772 Bld.restoreIP(
OMPBuilder.createTargetInit(Bld, IsSPMD));
774 emitGenericVarsProlog(CGF, EST.Loc);
778 EntryFunctionState &EST,
781 emitGenericVarsEpilog(CGF);
788 StringRef ParentName,
789 llvm::Function *&OutlinedFn,
790 llvm::Constant *&OutlinedFnID,
793 ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode,
EM_SPMD);
794 EntryFunctionState EST;
799 CGOpenMPRuntimeGPU::EntryFunctionState &EST;
803 CGOpenMPRuntimeGPU::EntryFunctionState &EST)
804 : RT(RT), EST(EST) {}
806 RT.emitKernelInit(CGF, EST,
true);
812 RT.emitKernelDeinit(CGF, EST,
true);
814 } Action(*
this, EST);
816 IsInTTDRegion =
true;
818 IsOffloadEntry, CodeGen);
819 IsInTTDRegion =
false;
830 auto *GVMode =
new llvm::GlobalVariable(
832 llvm::GlobalValue::WeakAnyLinkage,
833 llvm::ConstantInt::get(CGM.
Int8Ty, Mode ? OMP_TGT_EXEC_MODE_SPMD
834 : OMP_TGT_EXEC_MODE_GENERIC),
835 Twine(Name,
"_exec_mode"));
836 GVMode->setVisibility(llvm::GlobalVariable::ProtectedVisibility);
840void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
842 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
847 assert(!ParentName.empty() &&
"Invalid target region parent name!");
851 emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
854 emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
862 llvm::OpenMPIRBuilderConfig Config(
CGM.
getLangOpts().OpenMPIsDevice,
true,
868 llvm_unreachable(
"OpenMP can only handle device code.");
875 "__omp_rtl_debug_kind");
877 "__omp_rtl_assume_teams_oversubscription");
879 "__omp_rtl_assume_threads_oversubscription");
881 "__omp_rtl_assume_no_thread_state");
883 "__omp_rtl_assume_no_nested_parallelism");
887 ProcBindKind ProcBind,
897 llvm::Value *NumThreads,
903 const Expr *NumTeams,
904 const Expr *ThreadLimit,
912 bool PrevIsInTTDRegion = IsInTTDRegion;
913 IsInTTDRegion =
false;
916 CGF, D, ThreadIDVar, InnermostKind, CodeGen));
917 IsInTTDRegion = PrevIsInTTDRegion;
919 llvm::Function *WrapperFun =
920 createParallelDataSharingWrapper(OutlinedFun, D);
921 WrapperFunctionsMap[OutlinedFun] = WrapperFun;
933 "expected teams directive.");
940 Dir = dyn_cast_or_null<OMPExecutableDirective>(S);
948 for (
const Expr *E :
C->getVarRefs())
958 "expected teams directive.");
960 for (
const Expr *E :
C->privates())
973 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
980 if (!LastPrivatesReductions.empty()) {
981 GlobalizedRD = ::buildRecordForGlobalizedVars(
983 MappedDeclsFields, WarpSize);
985 }
else if (!LastPrivatesReductions.empty()) {
986 assert(!TeamAndReductions.first &&
987 "Previous team declaration is not expected.");
989 std::swap(TeamAndReductions.second, LastPrivatesReductions);
996 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
1000 NVPTXPrePostActionTy(
1002 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
1004 : Loc(Loc), GlobalizedRD(GlobalizedRD),
1005 MappedDeclsFields(MappedDeclsFields) {}
1010 auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.
CurFn).first;
1011 I->getSecond().MappedParams =
1012 std::make_unique<CodeGenFunction::OMPMapVars>();
1013 DeclToAddrMapTy &
Data = I->getSecond().LocalVarData;
1014 for (
const auto &Pair : MappedDeclsFields) {
1015 assert(Pair.getFirst()->isCanonicalDecl() &&
1016 "Expected canonical declaration");
1017 Data.insert(std::make_pair(Pair.getFirst(), MappedVarData()));
1020 Rt.emitGenericVarsProlog(CGF, Loc);
1024 .emitGenericVarsEpilog(CGF);
1026 } Action(Loc, GlobalizedRD, MappedDeclsFields);
1029 CGF, D, ThreadIDVar, InnermostKind, CodeGen);
1036 bool WithSPMDCheck) {
1043 const auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
1044 if (I == FunctionGlobalizedDecls.end())
1047 for (
auto &Rec : I->getSecond().LocalVarData) {
1048 const auto *VD = cast<VarDecl>(Rec.first);
1049 bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
1053 llvm::Value *ParValue;
1062 llvm::CallBase *VoidPtr =
1067 VoidPtr->addRetAttr(llvm::Attribute::get(
1074 VoidPtr, VarPtrTy, VD->
getName() +
"_on_stack");
1076 Rec.second.PrivateAddr = VarAddr.
getAddress(CGF);
1077 Rec.second.GlobalizedVal = VoidPtr;
1082 I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.
getAddress(CGF));
1085 VoidPtr->setDebugLoc(DI->SourceLocToDebugLoc(VD->
getLocation()));
1087 for (
const auto *VD : I->getSecond().EscapedVariableLengthDecls) {
1092 Size = Bld.CreateNUWAdd(
1094 llvm::Value *AlignVal =
1097 Size = Bld.CreateUDiv(Size, AlignVal);
1098 Size = Bld.CreateNUWMul(Size, AlignVal);
1102 llvm::CallBase *VoidPtr =
1106 VoidPtr->addRetAttr(
1110 I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(
1111 std::pair<llvm::Value *, llvm::Value *>(
1116 I->getSecond().MappedParams->setVarAddr(CGF, cast<VarDecl>(VD),
1117 Base.getAddress(CGF));
1119 I->getSecond().MappedParams->apply(CGF);
1123 bool WithSPMDCheck) {
1128 const auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
1129 if (I != FunctionGlobalizedDecls.end()) {
1131 for (
const auto &AddrSizePair :
1132 llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {
1135 {AddrSizePair.first, AddrSizePair.second});
1138 for (
auto &Rec : llvm::reverse(I->getSecond().LocalVarData)) {
1139 const auto *VD = cast<VarDecl>(Rec.first);
1140 I->getSecond().MappedParams->restore(CGF);
1142 llvm::Value *FreeArgs[] = {Rec.second.GlobalizedVal,
1154 llvm::Function *OutlinedFn,
1164 OutlinedFnArgs.push_back(ZeroAddr.
getPointer());
1165 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1171 llvm::Function *OutlinedFn,
1174 llvm::Value *NumThreads) {
1178 auto &&ParallelGen = [
this, Loc, OutlinedFn, CapturedVars, IfCond,
1182 llvm::Value *NumThreadsVal = NumThreads;
1183 llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn];
1184 llvm::Value *ID = llvm::ConstantPointerNull::get(
CGM.
Int8PtrTy);
1187 llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn,
CGM.
Int8PtrTy);
1195 llvm::ArrayType::get(
CGM.
VoidPtrTy, CapturedVars.size()),
1196 "captured_vars_addrs");
1198 if (!CapturedVars.empty()) {
1202 for (llvm::Value *
V : CapturedVars) {
1205 if (
V->getType()->isIntegerTy())
1215 llvm::Value *IfCondVal =
nullptr;
1220 IfCondVal = llvm::ConstantInt::get(CGF.
Int32Ty, 1);
1223 NumThreadsVal = llvm::ConstantInt::get(CGF.
Int32Ty, -1);
1225 NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.
Int32Ty),
1227 assert(IfCondVal &&
"Expected a value");
1229 llvm::Value *Args[] = {
1234 llvm::ConstantInt::get(CGF.
Int32Ty, -1),
1237 Bld.CreateBitOrPointerCast(CapturedVarsAddrs.
getPointer(),
1239 llvm::ConstantInt::get(
CGM.
SizeTy, CapturedVars.size())};
1255 llvm::Value *Args[] = {
1256 llvm::ConstantPointerNull::get(
1258 llvm::ConstantInt::get(CGF.
Int32Ty, 0,
true)};
1295 CGM.
getModule(), OMPRTL___kmpc_warp_active_thread_mask));
1313 llvm::Value *CmpLoopBound = CGF.
Builder.CreateICmpSLT(CounterVal, TeamWidth);
1314 CGF.
Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB);
1320 llvm::Value *CmpThreadToCounter =
1321 CGF.
Builder.CreateICmpEQ(ThreadID, CounterVal);
1322 CGF.
Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB);
1341 llvm::Value *IncCounterVal =
1355 "Cast type must sized.");
1357 "Val type must sized.");
1359 if (ValTy == CastTy)
1363 return CGF.
Builder.CreateBitCast(Val, LLVMCastTy);
1365 return CGF.
Builder.CreateIntCast(Val, LLVMCastTy,
1393 assert(Size.getQuantity() <= 8 &&
1394 "Unsupported bitwidth in shuffle instruction.");
1396 RuntimeFunction ShuffleFn = Size.getQuantity() <= 4
1397 ? OMPRTL___kmpc_shuffle_int32
1398 : OMPRTL___kmpc_shuffle_int64;
1402 Size.getQuantity() <= 4 ? 32 : 64, 1);
1403 llvm::Value *ElemCast =
castValueToType(CGF, Elem, ElemType, CastTy, Loc);
1404 llvm::Value *WarpSize =
1408 OMPBuilder.getOrCreateRuntimeFunction(CGM.
getModule(), ShuffleFn),
1409 {ElemCast, Offset, WarpSize});
1434 for (
int IntSize = 8; IntSize >= 1; IntSize /= 2) {
1444 ElemPtr, IntTy->getPointerTo(), IntTy);
1445 if (Size.getQuantity() / IntSize > 1) {
1449 llvm::BasicBlock *CurrentBB = Bld.GetInsertBlock();
1451 llvm::PHINode *PhiSrc =
1452 Bld.CreatePHI(Ptr.
getType(), 2);
1453 PhiSrc->addIncoming(Ptr.
getPointer(), CurrentBB);
1454 llvm::PHINode *PhiDest =
1455 Bld.CreatePHI(ElemPtr.
getType(), 2);
1456 PhiDest->addIncoming(ElemPtr.
getPointer(), CurrentBB);
1460 llvm::Value *PtrDiff = Bld.CreatePtrDiff(
1464 Bld.CreateCondBr(Bld.CreateICmpSGT(PtrDiff, Bld.getInt64(IntSize - 1)),
1478 PhiSrc->addIncoming(LocalPtr.
getPointer(), ThenBB);
1479 PhiDest->addIncoming(LocalElemPtr.
getPointer(), ThenBB);
1495 Size = Size % IntSize;
1500enum CopyAction :
unsigned {
1531 llvm::Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
1532 llvm::Value *ScratchpadIndex = CopyOptions.ScratchpadIndex;
1533 llvm::Value *ScratchpadWidth = CopyOptions.ScratchpadWidth;
1538 unsigned Size = Privates.size();
1539 for (
const Expr *Private : Privates) {
1544 bool ShuffleInElement =
false;
1547 bool UpdateDestListPtr =
false;
1550 bool IncrScratchpadSrc =
false;
1551 bool IncrScratchpadDest =
false;
1552 QualType PrivatePtrType =
C.getPointerType(Private->getType());
1553 llvm::Type *PrivateLlvmPtrType = CGF.
ConvertType(PrivatePtrType);
1556 case RemoteLaneToThread: {
1561 SrcElementPtrAddr, PrivateLlvmPtrType),
1568 CGF.
CreateMemTemp(Private->getType(),
".omp.reduction.element");
1569 ShuffleInElement =
true;
1570 UpdateDestListPtr =
true;
1578 SrcElementPtrAddr, PrivateLlvmPtrType),
1586 DestElementPtrAddr, PrivateLlvmPtrType),
1590 case ThreadToScratchpad: {
1595 SrcElementPtrAddr, PrivateLlvmPtrType),
1600 llvm::Value *ElementSizeInChars = CGF.
getTypeSize(Private->getType());
1601 llvm::Value *CurrentOffset =
1602 Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
1603 llvm::Value *ScratchPadElemAbsolutePtrVal =
1604 Bld.CreateNUWAdd(DestBase.
getPointer(), CurrentOffset);
1605 ScratchPadElemAbsolutePtrVal =
1606 Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.
VoidPtrTy);
1607 DestElementAddr =
Address(ScratchPadElemAbsolutePtrVal, CGF.
Int8Ty,
1608 C.getTypeAlignInChars(Private->getType()));
1609 IncrScratchpadDest =
true;
1612 case ScratchpadToThread: {
1615 llvm::Value *ElementSizeInChars = CGF.
getTypeSize(Private->getType());
1616 llvm::Value *CurrentOffset =
1617 Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
1618 llvm::Value *ScratchPadElemAbsolutePtrVal =
1619 Bld.CreateNUWAdd(SrcBase.
getPointer(), CurrentOffset);
1620 ScratchPadElemAbsolutePtrVal =
1621 Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.
VoidPtrTy);
1622 SrcElementAddr =
Address(ScratchPadElemAbsolutePtrVal, CGF.
Int8Ty,
1623 C.getTypeAlignInChars(Private->getType()));
1624 IncrScratchpadSrc =
true;
1630 CGF.
CreateMemTemp(Private->getType(),
".omp.reduction.element");
1631 UpdateDestListPtr =
true;
1645 if (ShuffleInElement) {
1646 shuffleAndStore(CGF, SrcElementAddr, DestElementAddr, Private->getType(),
1647 RemoteLaneOffset, Private->getExprLoc());
1652 SrcElementAddr,
false, Private->getType(),
1657 Elem, DestElementAddr,
false, Private->getType(),
1664 Private->getExprLoc());
1684 if (UpdateDestListPtr) {
1687 DestElementPtrAddr,
false,
1694 if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) {
1697 llvm::Value *ScratchpadBasePtr =
1699 llvm::Value *ElementSizeInChars = CGF.
getTypeSize(Private->getType());
1700 ScratchpadBasePtr = Bld.CreateNUWAdd(
1702 Bld.CreateNUWMul(ScratchpadWidth, ElementSizeInChars));
1705 ScratchpadBasePtr = Bld.CreateNUWSub(
1706 ScratchpadBasePtr, llvm::ConstantInt::get(CGM.
SizeTy, 1));
1707 ScratchpadBasePtr = Bld.CreateUDiv(
1709 llvm::ConstantInt::get(CGM.
SizeTy, GlobalMemoryAlignment));
1710 ScratchpadBasePtr = Bld.CreateNUWAdd(
1711 ScratchpadBasePtr, llvm::ConstantInt::get(CGM.
SizeTy, 1));
1712 ScratchpadBasePtr = Bld.CreateNUWMul(
1714 llvm::ConstantInt::get(CGM.
SizeTy, GlobalMemoryAlignment));
1716 if (IncrScratchpadDest)
1755 C.getIntTypeForBitwidth(32,
true),
1758 Args.push_back(&ReduceListArg);
1759 Args.push_back(&NumWarpsArg);
1764 llvm::GlobalValue::InternalLinkage,
1765 "_omp_reduction_inter_warp_copy_func", &M);
1767 Fn->setDoesNotRecurse();
1780 StringRef TransferMediumName =
1781 "__openmp_nvptx_data_transfer_temporary_storage";
1782 llvm::GlobalVariable *TransferMedium =
1783 M.getGlobalVariable(TransferMediumName);
1785 if (!TransferMedium) {
1786 auto *Ty = llvm::ArrayType::get(CGM.
Int32Ty, WarpSize);
1788 TransferMedium =
new llvm::GlobalVariable(
1789 M, Ty,
false, llvm::GlobalVariable::WeakAnyLinkage,
1790 llvm::UndefValue::get(Ty), TransferMediumName,
1791 nullptr, llvm::GlobalVariable::NotThreadLocal,
1792 SharedAddressSpace);
1809 AddrReduceListArg,
false,
C.VoidPtrTy, Loc,
1811 ElemTy->getPointerTo()),
1815 for (
const Expr *Private : Privates) {
1820 unsigned RealTySize =
1821 C.getTypeSizeInChars(Private->getType())
1822 .alignTo(
C.getTypeAlignInChars(Private->getType()))
1824 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /=2) {
1825 unsigned NumIters = RealTySize / TySize;
1828 QualType CType =
C.getIntTypeForBitwidth(
1832 llvm::Value *Cnt =
nullptr;
1834 llvm::BasicBlock *PrecondBB =
nullptr;
1835 llvm::BasicBlock *ExitBB =
nullptr;
1848 Bld.CreateICmpULT(Cnt, llvm::ConstantInt::get(CGM.
IntTy, NumIters));
1849 Bld.CreateCondBr(Cmp, BodyBB, ExitBB);
1861 llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID,
"warp_master");
1862 Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
1877 llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP(
1878 TransferMedium->getValueType(), TransferMedium,
1879 {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID});
1885 CopyType->getPointerTo(
1886 MediumPtrVal->getType()->getPointerAddressSpace())),
1892 ElemPtr,
false, CType, Loc,
1899 Bld.CreateBr(MergeBB);
1902 Bld.CreateBr(MergeBB);
1920 AddrNumWarpsArg,
false,
C.IntTy, Loc);
1923 llvm::Value *IsActiveThread =
1924 Bld.CreateICmpULT(ThreadID, NumWarpsVal,
"is_active_thread");
1925 Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
1930 llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP(
1931 TransferMedium->getValueType(), TransferMedium,
1932 {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID});
1937 CopyType->getPointerTo(
1938 SrcMediumPtrVal->getType()->getPointerAddressSpace())),
1944 TargetElemPtrPtr,
false,
C.VoidPtrTy, Loc);
1948 TargetElemPtr = Bld.
CreateGEP(TargetElemPtr, Cnt);
1951 llvm::Value *SrcMediumValue =
1955 Bld.CreateBr(W0MergeBB);
1958 Bld.CreateBr(W0MergeBB);
1963 Cnt = Bld.CreateNSWAdd(Cnt, llvm::ConstantInt::get(CGM.
IntTy, 1));
1969 RealTySize %= TySize;
2062 Args.push_back(&ReduceListArg);
2063 Args.push_back(&LaneIDArg);
2064 Args.push_back(&RemoteLaneOffsetArg);
2065 Args.push_back(&AlgoVerArg);
2069 auto *Fn = llvm::Function::Create(
2071 "_omp_reduction_shuffle_and_reduce_func", &CGM.
getModule());
2073 Fn->setDoesNotRecurse();
2086 ElemTy->getPointerTo()),
2104 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.remote_reduce_list");
2110 LocalReduceList, RemoteReduceList,
2111 {RemoteLaneOffsetArgVal,
2136 llvm::Value *CondAlgo0 = Bld.CreateIsNull(AlgoVerArgVal);
2138 llvm::Value *Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
2139 llvm::Value *CondAlgo1 = Bld.CreateAnd(
2140 Algo1, Bld.CreateICmpULT(LaneIDArgVal, RemoteLaneOffsetArgVal));
2142 llvm::Value *Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2));
2143 llvm::Value *CondAlgo2 = Bld.CreateAnd(
2144 Algo2, Bld.CreateIsNull(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1))));
2145 CondAlgo2 = Bld.CreateAnd(
2146 CondAlgo2, Bld.CreateICmpSGT(RemoteLaneOffsetArgVal, Bld.getInt16(0)));
2148 llvm::Value *CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1);
2149 CondReduce = Bld.CreateOr(CondReduce, CondAlgo2);
2154 Bld.CreateCondBr(CondReduce, ThenBB, ElseBB);
2163 CGF, Loc, ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr});
2164 Bld.CreateBr(MergeBB);
2167 Bld.CreateBr(MergeBB);
2173 Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
2174 llvm::Value *CondCopy = Bld.CreateAnd(
2175 Algo1, Bld.CreateICmpUGE(LaneIDArgVal, RemoteLaneOffsetArgVal));
2180 Bld.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
2184 RemoteReduceList, LocalReduceList);
2185 Bld.CreateBr(CpyMergeBB);
2188 Bld.CreateBr(CpyMergeBB);
2206 const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
2220 Args.push_back(&BufferArg);
2221 Args.push_back(&IdxArg);
2222 Args.push_back(&ReduceListArg);
2226 auto *Fn = llvm::Function::Create(
2228 "_omp_reduction_list_to_global_copy_func", &CGM.
getModule());
2230 Fn->setDoesNotRecurse();
2243 ElemTy->getPointerTo()),
2245 QualType StaticTy =
C.getRecordType(TeamReductionRec);
2246 llvm::Type *LLVMReductionsBufferTy =
2250 LLVMReductionsBufferTy->getPointerTo());
2251 llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.
Int32Ty),
2256 for (
const Expr *Private : Privates) {
2264 ElemPtrPtr, ElemTy->getPointerTo());
2266 Address(ElemPtrPtr, ElemTy,
C.getTypeAlignInChars(Private->getType()));
2267 const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
2269 const FieldDecl *FD = VarFieldMap.lookup(VD);
2273 llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobAddr.
getElementType(),
2281 ElemPtr,
false, Private->
getType(), Loc,
2318 const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
2320 llvm::Function *ReduceFn) {
2333 Args.push_back(&BufferArg);
2334 Args.push_back(&IdxArg);
2335 Args.push_back(&ReduceListArg);
2339 auto *Fn = llvm::Function::Create(
2341 "_omp_reduction_list_to_global_reduce_func", &CGM.
getModule());
2343 Fn->setDoesNotRecurse();
2350 QualType StaticTy =
C.getRecordType(TeamReductionRec);
2351 llvm::Type *LLVMReductionsBufferTy =
2355 LLVMReductionsBufferTy->getPointerTo());
2360 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.red_list");
2361 auto IPriv = Privates.begin();
2362 llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.
Int32Ty),
2367 for (
unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) {
2370 const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
2371 const FieldDecl *FD = VarFieldMap.lookup(VD);
2375 llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(
2379 if ((*IPriv)->getType()->isVariablyModifiedType()) {
2383 llvm::Value *Size = CGF.
Builder.CreateIntCast(
2394 llvm::Value *GlobalReduceList =
2398 AddrReduceListArg,
false,
C.VoidPtrTy, Loc);
2400 CGF, Loc, ReduceFn, {GlobalReduceList, ReducedPtr});
2415 const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
2429 Args.push_back(&BufferArg);
2430 Args.push_back(&IdxArg);
2431 Args.push_back(&ReduceListArg);
2435 auto *Fn = llvm::Function::Create(
2437 "_omp_reduction_global_to_list_copy_func", &CGM.
getModule());
2439 Fn->setDoesNotRecurse();
2452 ElemTy->getPointerTo()),
2454 QualType StaticTy =
C.getRecordType(TeamReductionRec);
2455 llvm::Type *LLVMReductionsBufferTy =
2459 LLVMReductionsBufferTy->getPointerTo());
2461 llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.
Int32Ty),
2466 for (
const Expr *Private : Privates) {
2474 ElemPtrPtr, ElemTy->getPointerTo());
2476 Address(ElemPtrPtr, ElemTy,
C.getTypeAlignInChars(Private->getType()));
2477 const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
2479 const FieldDecl *FD = VarFieldMap.lookup(VD);
2483 llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobAddr.
getElementType(),
2528 const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
2530 llvm::Function *ReduceFn) {
2543 Args.push_back(&BufferArg);
2544 Args.push_back(&IdxArg);
2545 Args.push_back(&ReduceListArg);
2549 auto *Fn = llvm::Function::Create(
2551 "_omp_reduction_global_to_list_reduce_func", &CGM.
getModule());
2553 Fn->setDoesNotRecurse();
2560 QualType StaticTy =
C.getRecordType(TeamReductionRec);
2561 llvm::Type *LLVMReductionsBufferTy =
2565 LLVMReductionsBufferTy->getPointerTo());
2570 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.red_list");
2571 auto IPriv = Privates.begin();
2572 llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.
Int32Ty),
2577 for (
unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) {
2580 const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
2581 const FieldDecl *FD = VarFieldMap.lookup(VD);
2585 llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(
2589 if ((*IPriv)->getType()->isVariablyModifiedType()) {
2593 llvm::Value *Size = CGF.
Builder.CreateIntCast(
2604 llvm::Value *GlobalReduceList =
2608 AddrReduceListArg,
false,
C.VoidPtrTy, Loc);
2610 CGF, Loc, ReduceFn, {ReducedPtr, GlobalReduceList});
2869 if (Options.SimpleReduction) {
2870 assert(!TeamsReduction && !ParallelReduction &&
2871 "Invalid reduction selection in emitReduction.");
2873 ReductionOps, Options);
2877 assert((TeamsReduction || ParallelReduction) &&
2878 "Invalid reduction selection in emitReduction.");
2891 auto Size = RHSExprs.size();
2892 for (
const Expr *E : Privates) {
2897 llvm::APInt ArraySize(32, Size);
2902 CGF.
CreateMemTemp(ReductionArrayTy,
".omp.reduction.red_list");
2903 auto IPriv = Privates.begin();
2905 for (
unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
2911 if ((*IPriv)->getType()->isVariablyModifiedType()) {
2915 llvm::Value *Size = CGF.
Builder.CreateIntCast(
2929 Privates, LHSExprs, RHSExprs, ReductionOps);
2930 llvm::Value *ReductionArrayTySize = CGF.
getTypeSize(ReductionArrayTy);
2932 CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
2933 llvm::Value *InterWarpCopyFn =
2936 if (ParallelReduction) {
2937 llvm::Value *Args[] = {RTLoc,
2939 CGF.
Builder.getInt32(RHSExprs.size()),
2940 ReductionArrayTySize,
2947 CGM.
getModule(), OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2),
2950 assert(TeamsReduction &&
"expected teams reduction.");
2951 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> VarFieldMap;
2954 for (
const Expr *DRE : Privates) {
2955 PrivatesReductions[Cnt] = cast<DeclRefExpr>(DRE)->getDecl();
2958 const RecordDecl *TeamReductionRec = ::buildRecordForGlobalizedVars(
2959 CGM.
getContext(), PrivatesReductions, std::nullopt, VarFieldMap,
2960 C.getLangOpts().OpenMPCUDAReductionBufNum);
2961 TeamsReductions.push_back(TeamReductionRec);
2962 if (!KernelTeamsReductionPtr) {
2963 KernelTeamsReductionPtr =
new llvm::GlobalVariable(
2965 llvm::GlobalValue::InternalLinkage,
nullptr,
2966 "_openmp_teams_reductions_buffer_$_$ptr");
2970 false,
C.getPointerType(
C.VoidPtrTy), Loc);
2972 CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap);
2974 CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap,
2977 CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap);
2979 CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap,
2982 llvm::Value *Args[] = {
2986 CGF.
Builder.getInt32(
C.getLangOpts().OpenMPCUDAReductionBufNum),
2990 GlobalToBufferCpyFn,
2991 GlobalToBufferRedFn,
2992 BufferToGlobalCpyFn,
2993 BufferToGlobalRedFn};
2997 CGM.
getModule(), OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2),
3004 llvm::Value *Cond = CGF.
Builder.CreateICmpEQ(
3005 Res, llvm::ConstantInt::get(
CGM.
Int32Ty, 1));
3006 CGF.
Builder.CreateCondBr(Cond, ThenBB, ExitBB);
3015 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps,
3017 auto IPriv = Privates.begin();
3018 auto ILHS = LHSExprs.begin();
3019 auto IRHS = RHSExprs.begin();
3020 for (
const Expr *E : ReductionOps) {
3022 cast<DeclRefExpr>(*IRHS));
3028 llvm::Value *EndArgs[] = {ThreadId};
3030 NVPTXActionTy Action(
3031 nullptr, std::nullopt,
3033 CGM.
getModule(), OMPRTL___kmpc_nvptx_end_reduce_nowait),
3044 const VarDecl *NativeParam)
const {
3049 const Type *NonQualTy = QC.
strip(ArgType);
3051 if (
const auto *
Attr = FD->
getAttr<OMPCaptureKindAttr>()) {
3052 if (
Attr->getCaptureKind() == OMPC_map) {
3059 enum { NVPTX_local_addr = 5 };
3062 if (isa<ImplicitParamDecl>(NativeParam))
3077 const VarDecl *TargetParam)
const {
3078 assert(NativeParam != TargetParam &&
3080 "Native arg must not be the same as target arg.");
3084 const Type *NonQualTy = QC.
strip(NativeParamType);
3086 unsigned NativePointeeAddrSpace =
3093 TargetAddr, llvm::PointerType::getWithSamePointeeType(
3094 cast<llvm::PointerType>(TargetAddr->getType()), 0));
3097 TargetAddr, llvm::PointerType::getWithSamePointeeType(
3098 cast<llvm::PointerType>(TargetAddr->getType()),
3099 NativePointeeAddrSpace));
3103 return NativeParamAddr;
3110 TargetArgs.reserve(Args.size());
3111 auto *FnType = OutlinedFn.getFunctionType();
3112 for (
unsigned I = 0, E = Args.size(); I < E; ++I) {
3113 if (FnType->isVarArg() && FnType->getNumParams() <= I) {
3114 TargetArgs.append(std::next(Args.begin(), I), Args.end());
3117 llvm::Type *TargetType = FnType->getParamType(I);
3118 llvm::Value *NativeArg = Args[I];
3119 if (!TargetType->isPointerTy()) {
3120 TargetArgs.emplace_back(NativeArg);
3124 NativeArg, llvm::PointerType::getWithSamePointeeType(
3125 cast<llvm::PointerType>(NativeArg->getType()), 0));
3126 TargetArgs.emplace_back(
3136llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(
3153 WrapperArgs.emplace_back(&ParallelLevelArg);
3154 WrapperArgs.emplace_back(&WrapperArg);
3159 auto *Fn = llvm::Function::Create(
3161 Twine(OutlinedParallelFn->getName(),
"_wrapper"), &
CGM.
getModule());
3169 Fn->addFnAttr(llvm::Attribute::NoInline);
3172 Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
3173 Fn->setDoesNotRecurse();
3179 const auto *RD = CS.getCapturedRecordDecl();
3180 auto CurField = RD->field_begin();
3192 auto CI = CS.capture_begin();
3198 llvm::Value *GlobalArgsPtr = GlobalArgs.
getPointer();
3199 llvm::Value *DataSharingArgs[] = {GlobalArgsPtr};
3207 if (CS.capture_size() > 0 ||
3223 cast<OMPLoopDirective>(D).getLowerBoundVariable()->getExprLoc());
3224 Args.emplace_back(LB);
3233 cast<OMPLoopDirective>(D).getUpperBoundVariable()->getExprLoc());
3234 Args.emplace_back(UB);
3237 if (CS.capture_size() > 0) {
3239 for (
unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) {
3240 QualType ElemTy = CurField->getType();
3249 if (CI->capturesVariableByCopy() &&
3250 !CI->getCapturedVar()->getType()->isAnyPointerType()) {
3254 Args.emplace_back(Arg);
3268 assert(D &&
"Expected function or captured|block decl.");
3269 assert(FunctionGlobalizedDecls.count(CGF.
CurFn) == 0 &&
3270 "Function is registered already.");
3271 assert((!TeamAndReductions.first || TeamAndReductions.first == D) &&
3272 "Team is set but not processed.");
3273 const Stmt *Body =
nullptr;
3274 bool NeedToDelayGlobalization =
false;
3275 if (
const auto *FD = dyn_cast<FunctionDecl>(D)) {
3276 Body = FD->getBody();
3277 }
else if (
const auto *BD = dyn_cast<BlockDecl>(D)) {
3278 Body = BD->getBody();
3279 }
else if (
const auto *CD = dyn_cast<CapturedDecl>(D)) {
3280 Body = CD->getBody();
3282 if (NeedToDelayGlobalization &&
3288 CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);
3289 VarChecker.Visit(Body);
3291 VarChecker.getGlobalizedRecord(IsInTTDRegion);
3292 TeamAndReductions.first =
nullptr;
3293 TeamAndReductions.second.clear();
3295 VarChecker.getEscapedVariableLengthDecls();
3296 if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty())
3298 auto I = FunctionGlobalizedDecls.try_emplace(CGF.
CurFn).first;
3299 I->getSecond().MappedParams =
3300 std::make_unique<CodeGenFunction::OMPMapVars>();
3301 I->getSecond().EscapedParameters.insert(
3302 VarChecker.getEscapedParameters().begin(),
3303 VarChecker.getEscapedParameters().end());
3304 I->getSecond().EscapedVariableLengthDecls.append(
3305 EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end());
3306 DeclToAddrMapTy &
Data = I->getSecond().LocalVarData;
3307 for (
const ValueDecl *VD : VarChecker.getEscapedDecls()) {
3309 Data.insert(std::make_pair(VD, MappedVarData()));
3311 if (!NeedToDelayGlobalization) {
3312 emitGenericVarsProlog(CGF, D->
getBeginLoc(),
true);
3314 GlobalizationScope() =
default;
3318 .emitGenericVarsEpilog(CGF,
true);
3327 if (VD && VD->
hasAttr<OMPAllocateDeclAttr>()) {
3328 const auto *A = VD->
getAttr<OMPAllocateDeclAttr>();
3330 switch (A->getAllocatorType()) {
3333 case OMPAllocateDeclAttr::OMPNullMemAlloc:
3334 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
3335 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
3336 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
3337 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
3340 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
3343 case OMPAllocateDeclAttr::OMPConstMemAlloc:
3346 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
3349 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
3350 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
3354 auto *GV =
new llvm::GlobalVariable(
3356 llvm::GlobalValue::InternalLinkage, llvm::PoisonValue::get(VarTy),
3358 nullptr, llvm::GlobalValue::NotThreadLocal,
3373 auto I = FunctionGlobalizedDecls.find(CGF.
CurFn);
3374 if (I == FunctionGlobalizedDecls.end())
3376 auto VDI = I->getSecond().LocalVarData.find(VD);
3377 if (VDI != I->getSecond().LocalVarData.end())
3378 return VDI->second.PrivateAddr;
3383 auto VDI = I->getSecond().LocalVarData.find(
3384 cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl())
3385 ->getCanonicalDecl());
3386 if (VDI != I->getSecond().LocalVarData.end())
3387 return VDI->second.PrivateAddr;
3395 FunctionGlobalizedDecls.erase(CGF.
CurFn);
3402 llvm::Value *&Chunk)
const {
3405 ScheduleKind = OMPC_DIST_SCHEDULE_static;
3409 S.getIterationVariable()->getType(), S.getBeginLoc());
3413 CGF, S, ScheduleKind, Chunk);
3419 const Expr *&ChunkExpr)
const {
3420 ScheduleKind = OMPC_SCHEDULE_static;
3422 llvm::APInt ChunkSize(32, 1);
3431 " Expected target-based directive.");
3436 if (!
C.capturesVariable())
3438 const VarDecl *VD =
C.getCapturedVar();
3439 const auto *RD = VD->
getType()
3443 if (!RD || !RD->isLambda())
3452 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
3454 RD->getCaptureFields(Captures, ThisCapture);
3464 const ValueDecl *VD = LC.getCapturedVar();
3469 auto It = Captures.find(VD);
3470 assert(It != Captures.end() &&
"Found lambda capture without field.");
3484 if (!VD || !VD->
hasAttr<OMPAllocateDeclAttr>())
3486 const auto *A = VD->
getAttr<OMPAllocateDeclAttr>();
3487 switch(A->getAllocatorType()) {
3488 case OMPAllocateDeclAttr::OMPNullMemAlloc:
3489 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
3491 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
3492 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
3493 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
3494 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
3495 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
3498 case OMPAllocateDeclAttr::OMPConstMemAlloc:
3501 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
3504 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
3505 llvm_unreachable(
"Expected predefined allocator for the variables with the "
3516 if (Feature.getValue()) {
3530 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
3543 llvm::raw_svector_ostream Out(Buffer);
3545 <<
" does not support unified addressing";
3546 CGM.
Error(Clause->getBeginLoc(), Out.str());
3605 llvm_unreachable(
"Unexpected Cuda arch.");
3614 if (!TeamsReductions.empty()) {
3617 "_openmp_teams_reduction_type_$_", RecordDecl::TagKind::TTK_Union);
3619 for (
const RecordDecl *TeamReductionRec : TeamsReductions) {
3620 QualType RecTy =
C.getRecordType(TeamReductionRec);
3630 QualType StaticTy =
C.getRecordType(StaticRD);
3631 llvm::Type *LLVMReductionsBufferTy =
3636 auto *GV =
new llvm::GlobalVariable(
3638 false, llvm::GlobalValue::InternalLinkage,
3639 llvm::Constant::getNullValue(LLVMReductionsBufferTy),
3640 "_openmp_teams_reductions_buffer_$_");
3641 KernelTeamsReductionPtr->setInitializer(
3642 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV,
3651 const char *LocSize =
"__kmpc_get_hardware_num_threads_in_block";
3652 llvm::Function *F = M->getFunction(LocSize);
3654 F = llvm::Function::Create(
3655 llvm::FunctionType::get(CGF.
Int32Ty, std::nullopt,
false),
3656 llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.
CGM.
getModule());
3658 return Bld.CreateCall(F, std::nullopt,
"nvptx_num_threads");
3665 CGM.
getModule(), OMPRTL___kmpc_get_hardware_thread_id_in_block),
static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, bool Mode)
static llvm::Value * getNVPTXLaneID(CodeGenFunction &CGF)
Get the id of the current lane in the Warp.
static CudaArch getCudaArch(CodeGenModule &CGM)
static llvm::Value * emitListToGlobalCopyFunction(CodeGenModule &CGM, ArrayRef< const Expr * > Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl * > &VarFieldMap)
This function emits a helper that copies all the reduction variables from the team into the provided ...
static llvm::Value * emitGlobalToListReduceFunction(CodeGenModule &CGM, ArrayRef< const Expr * > Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl * > &VarFieldMap, llvm::Function *ReduceFn)
This function emits a helper that reduces all the reduction variables from the team into the provided...
static llvm::Value * emitInterWarpCopyFunction(CodeGenModule &CGM, ArrayRef< const Expr * > Privates, QualType ReductionArrayTy, SourceLocation Loc)
This function emits a helper that gathers Reduce lists from the first lane of every active warp to la...
static void getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl * > &Vars)
Get list of reduction variables from the teams ... directives.
static llvm::Value * castValueToType(CodeGenFunction &CGF, llvm::Value *Val, QualType ValTy, QualType CastTy, SourceLocation Loc)
Cast value to the specified type.
static void emitReductionListCopy(CopyAction Action, CodeGenFunction &CGF, QualType ReductionArrayTy, ArrayRef< const Expr * > Privates, Address SrcBase, Address DestBase, CopyOptionsTy CopyOptions={nullptr, nullptr, nullptr})
Emit instructions to copy a Reduce list, which contains partially aggregated values,...
static void getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl * > &Vars)
Get list of lastprivate variables from the teams distribute ... or teams {distribute ....
static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr, Address DestAddr, QualType ElemType, llvm::Value *Offset, SourceLocation Loc)
static bool hasNestedSPMDDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner (nested) SPMD construct, if any.
static llvm::Function * emitShuffleAndReduceFunction(CodeGenModule &CGM, ArrayRef< const Expr * > Privates, QualType ReductionArrayTy, llvm::Function *ReduceFn, SourceLocation Loc)
Emit a helper that reduces data across two OpenMP threads (lanes) in the same warp.
static bool supportsSPMDExecutionMode(ASTContext &Ctx, const OMPExecutableDirective &D)
static llvm::Value * emitListToGlobalReduceFunction(CodeGenModule &CGM, ArrayRef< const Expr * > Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl * > &VarFieldMap, llvm::Function *ReduceFn)
This function emits a helper that reduces all the reduction variables from the team into the provided...
static CGOpenMPRuntimeGPU::DataSharingMode getDataSharingMode(CodeGenModule &CGM)
static llvm::Value * emitGlobalToListCopyFunction(CodeGenModule &CGM, ArrayRef< const Expr * > Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl * > &VarFieldMap)
This function emits a helper that copies all the reduction variables from the team into the provided ...
static llvm::Value * createRuntimeShuffleFunction(CodeGenFunction &CGF, llvm::Value *Elem, QualType ElemType, llvm::Value *Offset, SourceLocation Loc)
This function creates calls to one of two shuffle functions to copy variables between lanes in a warp...
static llvm::Value * getNVPTXWarpID(CodeGenFunction &CGF)
Get the id of the warp in the block.
This file defines OpenMP nodes for declarative directives.
This file defines OpenMP AST classes for clauses.
static std::pair< ValueDecl *, bool > getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc, SourceRange &ERange, bool AllowArraySection=false, StringRef DiagType="")
This file defines OpenMP AST classes for executable directives and clauses.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
QualType getUIntPtrType() const
Return a type compatible with "uintptr_t" (C99 7.18.1.4), as defined by the target.
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
int64_t toBits(CharUnits CharSize) const
Convert a size in characters to a size in bits.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
const VariableArrayType * getAsVariableArrayType(QualType T) const
const TargetInfo & getTargetInfo() const
QualType getAddrSpaceQualType(QualType T, LangAS AddressSpace) const
Return the uniqued reference to the type for an address space qualified type with the specified type ...
unsigned getTargetAddressSpace(LangAS AS) const
Attr - This represents one attribute.
A class which contains all the information about a particular captured value.
ArrayRef< Capture > captures() const
BlockExpr - Adaptor class for mixing a BlockDecl with expressions.
const BlockDecl * getBlockDecl() const
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Describes the capture of either a variable, or 'this', or variable-length array type.
This captures a statement into a function.
CapturedDecl * getCapturedDecl()
Retrieve the outlined function declaration.
Stmt * getCapturedStmt()
Retrieve the statement being captured.
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
CastKind getCastKind() const
CharUnits - This is an opaque type for sizes expressed in character units.
bool isZero() const
isZero - Test whether the quantity equals zero.
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
CharUnits getAlignment() const
Return the alignment of this pointer.
llvm::Type * getElementType() const
Return the type of the values stored in this address.
unsigned getAddressSpace() const
Return the address space that this address resides in.
llvm::Value * getPointer() const
llvm::PointerType * getType() const
Return the type of the pointer value.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Address CreateElementBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Cast the element type of the given address to a different type, preserving information like the align...
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Address CreateGEP(Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
CGFunctionInfo - Class to encapsulate the information about a function definition.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits inlined function for the specified OpenMP teams.
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
DataSharingMode
Target codegen is specialized based on two data-sharing modes: CUDA, in which the local variables are...
@ Generic
Generic data-sharing mode.
@ CUDA
CUDA data sharing mode.
void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const override
Choose a default value for the dist_schedule clause.
Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) override
Gets the OpenMP-specific address of the local variable.
void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override
Emits OpenMP-specific function prolog.
void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const override
Choose a default value for the schedule clause.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
This function ought to emit, in the general case, a call to.
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
CGOpenMPRuntimeGPU(CodeGenModule &CGM)
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits inlined function for the specified OpenMP parallel.
void functionFinished(CodeGenFunction &CGF) override
Cleans up references to the objects in finished function.
llvm::Value * getGPUThreadID(CodeGenFunction &CGF)
Get the id of the current thread on the GPU.
llvm::Value * getGPUWarpSize(CodeGenFunction &CGF)
Declare generalized virtual functions which need to be defined by all specializations of OpenMPGPURun...
void processRequiresDirective(const OMPRequiresDecl *D) override
Perform check on requires decl to ensure that target architecture supports unified addressing.
void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args=std::nullopt) const override
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
ExecutionMode
Defines the execution mode.
@ EM_NonSPMD
Non-SPMD execution mode (1 master thread, others are workers).
@ EM_Unknown
Unknown execution mode (orphaned directive).
@ EM_SPMD
SPMD execution mode (all threads are worker threads).
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
llvm::Value * getGPUNumThreads(CodeGenFunction &CGF)
Get the maximum number of threads in a block of the GPU.
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const override
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
virtual void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const
Choose default schedule type and chunk value for the dist_schedule clause.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args=std::nullopt) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
CapturedRegionKind getKind() const
bool isCXXThisExprCaptured() const
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
CGCapturedStmtInfo * CapturedStmtInfo
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
llvm::Type * ConvertTypeForMem(QualType T)
LValue EmitLValueForField(LValue Base, const FieldDecl *Field)
const TargetInfo & getTarget() const
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
CGDebugInfo * getDebugInfo()
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
Address CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
ASTContext & getContext() const
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
Address CreateMemTemp(QualType T, const Twine &Name="tmp", Address *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertType(QualType T)
CodeGenTypes & getTypes() const
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
llvm::Value * EmitCastToVoidPtr(llvm::Value *value)
Emit a cast to void* in the appropriate address space.
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
llvm::Value * LoadCXXThis()
LoadCXXThis - Load the value of 'this'.
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
This class organizes the cross-function state that is used while generating LLVM code.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
const LangOptions & getLangOpts() const
CodeGenTypes & getTypes()
const TargetInfo & getTarget() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
ASTContext & getContext() const
llvm::LLVMContext & getLLVMContext()
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
unsigned getTargetAddressSpace(QualType T) const
llvm::Type * ConvertTypeForMem(QualType T, bool ForBitField=false)
ConvertTypeForMem - Convert type T into a llvm::Type.
Information for lazily generating a cleanup.
FunctionArgList - Type for representing both the decl and type of parameters to a function.
LValue - This represents an lvalue references.
Address getAddress(CodeGenFunction &CGF) const
llvm::Value * getPointer(CodeGenFunction &CGF) const
void setAddress(Address address)
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Exit(CodeGenFunction &CGF)
virtual void Enter(CodeGenFunction &CGF)
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void setAction(PrePostActionTy &Action) const
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
void addDecl(Decl *D)
Add the declaration D into this context.
A reference to a declared variable, function, enum, etc.
DeclStmt - Adaptor class for mixing declarations with statements and expressions.
Decl - This represents one declaration (or definition), e.g.
attr_iterator attr_end() const
bool isCanonicalDecl() const
Whether this particular Decl is a canonical one.
attr_iterator attr_begin() const
SourceLocation getLocation() const
DeclContext * getDeclContext()
SourceLocation getBeginLoc() const LLVM_READONLY
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
SourceLocation getBeginLoc() const LLVM_READONLY
This represents one expression.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
bool isLValue() const
isLValue - True if this expression is an "l-value" according to the rules of the current language.
Represents a member of a struct/union/class.
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
GlobalDecl - represents a global declaration.
ImplicitCastExpr - Allows us to explicitly represent implicit type conversions, which have no direct ...
@ Other
Other implicit parameter.
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Describes the capture of a variable or of this, or of a C++1y init-capture.
A C++ lambda expression, which produces a function object (of unspecified type) that can be invoked l...
bool isInitCapture(const LambdaCapture *Capture) const
Determine whether one of this lambda's captures is an init-capture.
capture_range captures() const
Retrieve this lambda's captures.
std::string OMPHostIRFile
Name of the IR file that contains the result of the OpenMP target host code generation.
IdentifierInfo * getIdentifier() const
Get the identifier that names this declaration, if there is one.
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
This is a basic class for representing single OpenMP clause.
This is a basic class for representing single OpenMP executable directive.
CapturedStmt * getInnermostCapturedStmt()
Get innermost captured statement for the construct.
bool hasAssociatedStmt() const
Returns true if directive has associated statement.
const CapturedStmt * getCapturedStmt(OpenMPDirectiveKind RegionKind) const
Returns the captured statement associated with the component region within the (combined) directive.
OpenMPDirectiveKind getDirectiveKind() const
const Stmt * getAssociatedStmt() const
Returns statement associated with the directive.
SourceLocation getBeginLoc() const
Returns starting location of directive kind.
ArrayRef< OMPClause * > clauses() const
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause * > Clauses)
This represents clause 'lastprivate' in the '#pragma omp ...' directives.
This is a common base class for loop directives ('omp simd', 'omp for', 'omp for simd' etc....
This represents clause 'reduction' in the '#pragma omp ...' directives.
This represents '#pragma omp requires...' directive.
clauselist_range clauselists()
static ParmVarDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, StorageClass S, Expr *DefArg)
PointerType - C99 6.7.5.1 - Pointer Declarators.
A (possibly-)qualified type.
LangAS getAddressSpace() const
Return the address space of this type.
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
QualType getCanonicalType() const
A qualifier set is used to build a set of qualifiers.
const Type * strip(QualType type)
Collect any qualifiers on the given type and return an unqualified type.
QualType apply(const ASTContext &Context, QualType QT) const
Apply the collected qualifiers to the given type.
void addAddressSpace(LangAS space)
Represents a struct/union/class.
virtual void completeDefinition()
Note that the definition of this type is now complete.
Encodes a location in the source.
RetTy Visit(PTR(Stmt) S, ParamTys... P)
Stmt - This represents one statement.
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
void startDefinition()
Starts the definition of this tag declaration.
unsigned getNewAlign() const
Return the largest alignment for which a suitably-sized allocation with '::operator new(size_t)' is g...
TargetOptions & getTargetOpts() const
Retrieve the target options.
virtual const llvm::omp::GV & getGridValue() const
virtual bool hasFeature(StringRef Feature) const
Determine whether the given target has the given feature.
llvm::StringMap< bool > FeatureMap
The map of which features have been enabled disabled based on the command line.
The base class of the type hierarchy.
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
const T * castAs() const
Member-template castAs<specific type>.
bool isReferenceType() const
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
bool isLValueReferenceType() const
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
UnaryOperator - This represents the unary-expression's (except sizeof and alignof),...
Expr * getSubExpr() const
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
bool isInitCapture() const
Whether this variable is the implicit variable for a lambda init-capture.
Represents a variable declaration or definition.
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
bool isInitCapture() const
Whether this variable is the implicit variable for a lambda init-capture.
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
@ ICIS_NoInit
No in-class initializer.
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
CudaArch StringToCudaArch(llvm::StringRef S)
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
bool isOpenMPPrivate(OpenMPClauseKind Kind)
Checks if the specified clause is one of private clauses like 'private', 'firstprivate',...
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ C
Languages that the frontend can parse and compile.
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of the composite or combined directives that need loop ...
LangAS
Defines the address space values used by the address space qualifier of QualType.
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
LangAS getLangASFromTargetAS(unsigned TargetAS)
const char * CudaArchToString(CudaArch A)
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
llvm::Value * ScratchpadIndex
llvm::Value * ScratchpadWidth
llvm::Value * RemoteLaneOffset
llvm::PointerType * VoidPtrTy
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::IntegerType * SizeTy
llvm::PointerType * VoidPtrPtrTy
llvm::IntegerType * Int32Ty
llvm::IntegerType * IntTy
int
llvm::IntegerType * Int16Ty
llvm::PointerType * Int8PtrTy
CharUnits getPointerAlign() const