clang 17.0.0git
|
#include "CGOpenMPRuntimeGPU.h"
#include "CodeGenFunction.h"
#include "clang/AST/Attr.h"
#include "clang/AST/DeclOpenMP.h"
#include "clang/AST/OpenMPClause.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/Cuda.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
#include "llvm/Support/MathExtras.h"
Go to the source code of this file.
Classes | |
struct | CopyOptionsTy |
Functions | |
static llvm::Value * | getNVPTXWarpID (CodeGenFunction &CGF) |
Get the id of the warp in the block. | |
static llvm::Value * | getNVPTXLaneID (CodeGenFunction &CGF) |
Get the id of the current lane in the Warp. | |
static CGOpenMPRuntimeGPU::DataSharingMode | getDataSharingMode (CodeGenModule &CGM) |
static bool | hasNestedSPMDDirective (ASTContext &Ctx, const OMPExecutableDirective &D) |
Check for inner (nested) SPMD construct, if any. | |
static bool | supportsSPMDExecutionMode (ASTContext &Ctx, const OMPExecutableDirective &D) |
static void | setPropertyExecutionMode (CodeGenModule &CGM, StringRef Name, bool Mode) |
static void | getDistributeLastprivateVars (ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl * > &Vars) |
Get list of lastprivate variables from the teams distribute ... or teams {distribute ...} directives. | |
static void | getTeamsReductionVars (ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl * > &Vars) |
Get list of reduction variables from the teams ... directives. | |
static llvm::Value * | castValueToType (CodeGenFunction &CGF, llvm::Value *Val, QualType ValTy, QualType CastTy, SourceLocation Loc) |
Cast value to the specified type. | |
static llvm::Value * | createRuntimeShuffleFunction (CodeGenFunction &CGF, llvm::Value *Elem, QualType ElemType, llvm::Value *Offset, SourceLocation Loc) |
This function creates calls to one of two shuffle functions to copy variables between lanes in a warp. | |
static void | shuffleAndStore (CodeGenFunction &CGF, Address SrcAddr, Address DestAddr, QualType ElemType, llvm::Value *Offset, SourceLocation Loc) |
static void | emitReductionListCopy (CopyAction Action, CodeGenFunction &CGF, QualType ReductionArrayTy, ArrayRef< const Expr * > Privates, Address SrcBase, Address DestBase, CopyOptionsTy CopyOptions={nullptr, nullptr, nullptr}) |
Emit instructions to copy a Reduce list, which contains partially aggregated values, in the specified direction. | |
static llvm::Value * | emitInterWarpCopyFunction (CodeGenModule &CGM, ArrayRef< const Expr * > Privates, QualType ReductionArrayTy, SourceLocation Loc) |
This function emits a helper that gathers Reduce lists from the first lane of every active warp to lanes in the first warp. | |
static llvm::Function * | emitShuffleAndReduceFunction (CodeGenModule &CGM, ArrayRef< const Expr * > Privates, QualType ReductionArrayTy, llvm::Function *ReduceFn, SourceLocation Loc) |
Emit a helper that reduces data across two OpenMP threads (lanes) in the same warp. | |
static llvm::Value * | emitListToGlobalCopyFunction (CodeGenModule &CGM, ArrayRef< const Expr * > Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl * > &VarFieldMap) |
This function emits a helper that copies all the reduction variables from the team into the provided global buffer for the reduction variables. | |
static llvm::Value * | emitListToGlobalReduceFunction (CodeGenModule &CGM, ArrayRef< const Expr * > Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl * > &VarFieldMap, llvm::Function *ReduceFn) |
This function emits a helper that reduces all the reduction variables from the team into the provided global buffer for the reduction variables. | |
static llvm::Value * | emitGlobalToListCopyFunction (CodeGenModule &CGM, ArrayRef< const Expr * > Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl * > &VarFieldMap) |
This function emits a helper that copies all the reduction variables from the team into the provided global buffer for the reduction variables. | |
static llvm::Value * | emitGlobalToListReduceFunction (CodeGenModule &CGM, ArrayRef< const Expr * > Privates, QualType ReductionArrayTy, SourceLocation Loc, const RecordDecl *TeamReductionRec, const llvm::SmallDenseMap< const ValueDecl *, const FieldDecl * > &VarFieldMap, llvm::Function *ReduceFn) |
This function emits a helper that reduces all the reduction variables from the team into the provided global buffer for the reduction variables. | |
static CudaArch | getCudaArch (CodeGenModule &CGM) |
|
static |
Cast value to the specified type.
Definition at line 1351 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenFunction::Builder, clang::CodeGen::CodeGenFunction::ConvertTypeForMem(), clang::CodeGen::CodeGenFunction::CreateMemTemp(), clang::CodeGen::CGBuilderTy::CreatePointerBitCastOrAddrSpaceCast(), clang::CodeGen::CodeGenFunction::EmitLoadOfScalar(), clang::CodeGen::CodeGenFunction::EmitStoreOfScalar(), clang::CodeGen::Address::getAddressSpace(), clang::CodeGen::CodeGenFunction::getContext(), clang::CodeGen::Address::getType(), clang::ASTContext::getTypeSizeInChars(), clang::Type::hasSignedIntegerRepresentation(), clang::Type::isIntegerType(), clang::CharUnits::isZero(), and clang::CodeGen::Type.
Referenced by createRuntimeShuffleFunction().
|
static |
This function creates calls to one of two shuffle functions to copy variables between lanes in a warp.
Definition at line 1381 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenFunction::Builder, castValueToType(), clang::CodeGen::CodeGenFunction::CGM, clang::CodeGen::CodeGenFunction::EmitRuntimeCall(), clang::CodeGen::CodeGenFunction::getContext(), clang::CodeGen::CGOpenMPRuntimeGPU::getGPUWarpSize(), clang::ASTContext::getIntTypeForBitwidth(), clang::CodeGen::CodeGenModule::getModule(), clang::CodeGen::CGOpenMPRuntime::getOMPBuilder(), clang::CodeGen::CodeGenModule::getOpenMPRuntime(), clang::ASTContext::getTypeSizeInChars(), and clang::CodeGen::CodeGenTypeCache::Int16Ty.
Referenced by shuffleAndStore().
|
static |
This function emits a helper that copies all the reduction variables from the team into the provided global buffer for the reduction variables.
void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data) For all data entries D in reduce_data: Copy buffer.D[Idx] to local D;
Definition at line 2411 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenTypes::arrangeBuiltinFunctionDeclaration(), clang::CodeGen::CodeGenFunction::Builder, clang::C, clang::CodeGen::CodeGenFunction::ConvertTypeForMem(), clang::CodeGen::CodeGenTypes::ConvertTypeForMem(), clang::CodeGen::CGBuilderTy::CreateConstArrayGEP(), clang::CodeGen::CGBuilderTy::CreatePointerBitCastOrAddrSpaceCast(), clang::CodeGen::AggValueSlot::DoesNotOverlap, clang::CodeGen::CodeGenFunction::EmitAggregateCopy(), clang::CodeGen::CodeGenFunction::EmitLoadOfComplex(), clang::CodeGen::CodeGenFunction::EmitLoadOfScalar(), clang::CodeGen::CodeGenFunction::EmitLValueForField(), clang::CodeGen::CodeGenFunction::EmitStoreOfComplex(), clang::CodeGen::CodeGenFunction::EmitStoreOfScalar(), clang::CodeGen::CodeGenFunction::FinishFunction(), clang::CodeGen::LValue::getAddress(), clang::CodeGen::CodeGenFunction::GetAddrOfLocalVar(), clang::CodeGen::Address::getAlignment(), clang::CodeGen::CodeGenModule::getContext(), clang::CodeGen::Address::getElementType(), clang::CodeGen::CodeGenFunction::getEvaluationKind(), clang::CodeGen::CodeGenTypes::GetFunctionType(), clang::CodeGen::CodeGenModule::getModule(), clang::CodeGen::Address::getPointer(), clang::CodeGen::CodeGenTypeCache::getPointerAlign(), clang::CodeGen::Address::getType(), clang::CodeGen::LValue::getType(), clang::CodeGen::CodeGenModule::getTypes(), clang::CodeGen::CodeGenTypeCache::Int32Ty, clang::CodeGen::CodeGenFunction::MakeAddrLValue(), clang::CodeGen::CodeGenFunction::MakeNaturalAlignAddrLValue(), clang::ImplicitParamDecl::Other, clang::CodeGen::LValue::setAddress(), clang::CodeGen::CodeGenModule::SetInternalFunctionAttributes(), clang::CodeGen::CodeGenFunction::StartFunction(), clang::CodeGen::TEK_Aggregate, clang::CodeGen::TEK_Complex, clang::CodeGen::TEK_Scalar, clang::CodeGen::Type, and V.
Referenced by clang::CodeGen::CGOpenMPRuntimeGPU::emitReduction().
|
static |
This function emits a helper that reduces all the reduction variables from the team into the provided global buffer for the reduction variables.
void global_to_list_reduce_func(void <em>buffer, int Idx, void *reduce_data) void *GlobPtrs[]; GlobPtrs[0] = (void)&buffer.D0[Idx]; ... GlobPtrs[N] = (void*)&buffer.DN[Idx]; reduce_function(reduce_data, GlobPtrs);
Definition at line 2524 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenTypes::arrangeBuiltinFunctionDeclaration(), clang::CodeGen::CodeGenFunction::Builder, clang::C, clang::CodeGen::CodeGenTypes::ConvertTypeForMem(), clang::CodeGen::CGBuilderTy::CreateConstArrayGEP(), clang::CodeGen::CodeGenFunction::CreateMemTemp(), clang::CodeGen::CGBuilderTy::CreatePointerBitCastOrAddrSpaceCast(), clang::CodeGen::CGBuilderTy::CreateStore(), clang::CodeGen::CodeGenFunction::EmitCastToVoidPtr(), clang::CodeGen::CodeGenFunction::EmitLoadOfScalar(), clang::CodeGen::CodeGenFunction::EmitLValueForField(), clang::CodeGen::CGOpenMPRuntime::emitOutlinedFunctionCall(), clang::CodeGen::CodeGenFunction::EmitStoreOfScalar(), clang::CodeGen::CodeGenFunction::FinishFunction(), clang::CodeGen::LValue::getAddress(), clang::CodeGen::CodeGenFunction::GetAddrOfLocalVar(), clang::ASTContext::getAsVariableArrayType(), clang::CodeGen::CodeGenFunction::getContext(), clang::CodeGen::CodeGenModule::getContext(), clang::CodeGen::Address::getElementType(), clang::CodeGen::CodeGenTypes::GetFunctionType(), clang::CodeGen::CodeGenModule::getModule(), clang::CodeGen::CodeGenModule::getOpenMPRuntime(), clang::CodeGen::Address::getPointer(), clang::CodeGen::CodeGenModule::getTypes(), clang::CodeGen::CodeGenFunction::getVLASize(), clang::CodeGen::CodeGenTypeCache::Int32Ty, clang::CodeGen::CodeGenFunction::MakeNaturalAlignAddrLValue(), clang::CodeGen::CodeGenFunction::VlaSizePair::NumElts, clang::ImplicitParamDecl::Other, clang::CodeGen::CodeGenModule::SetInternalFunctionAttributes(), clang::CodeGen::CodeGenTypeCache::SizeTy, clang::CodeGen::CodeGenFunction::StartFunction(), and clang::CodeGen::CodeGenTypeCache::VoidPtrTy.
Referenced by clang::CodeGen::CGOpenMPRuntimeGPU::emitReduction().
|
static |
This function emits a helper that gathers Reduce lists from the first lane of every active warp to lanes in the first warp.
void inter_warp_copy_func(void* reduce_data, num_warps) shared smem[warp_size]; For all data entries D in reduce_data: sync If (I am the first lane in each warp) Copy my local D to smem[warp_id] sync if (I am the first warp) Copy smem[thread_id] to my local D
Definition at line 1740 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenModule::addCompilerUsedGlobal(), clang::CodeGen::CodeGenTypes::arrangeBuiltinFunctionDeclaration(), clang::CodeGen::CodeGenFunction::Builder, clang::C, clang::CodeGen::CodeGenFunction::CGM, clang::CodeGen::CodeGenFunction::ConvertTypeForMem(), clang::CodeGen::CodeGenFunction::createBasicBlock(), clang::CodeGen::CGBuilderTy::CreateConstArrayGEP(), clang::CodeGen::CGBuilderTy::CreateElementBitCast(), clang::CodeGen::ApplyDebugLocation::CreateEmpty(), clang::CodeGen::CGBuilderTy::CreateGEP(), clang::CodeGen::CodeGenFunction::CreateMemTemp(), clang::CodeGen::CGBuilderTy::CreatePointerBitCastOrAddrSpaceCast(), clang::cuda_shared, clang::CodeGen::CGOpenMPRuntime::emitBarrierCall(), clang::CodeGen::CodeGenFunction::EmitBlock(), clang::CodeGen::CodeGenFunction::EmitBranch(), clang::CodeGen::CodeGenFunction::EmitLoadOfScalar(), clang::CodeGen::CodeGenFunction::EmitStoreOfScalar(), clang::CodeGen::CodeGenFunction::FinishFunction(), clang::CharUnits::fromQuantity(), clang::CodeGen::CodeGenFunction::GetAddrOfLocalVar(), clang::CodeGen::CodeGenModule::getContext(), clang::CodeGen::CodeGenTypes::GetFunctionType(), clang::CodeGen::CGOpenMPRuntimeGPU::getGPUThreadID(), clang::TargetInfo::getGridValue(), clang::CodeGen::CodeGenModule::getModule(), getNVPTXLaneID(), getNVPTXWarpID(), clang::CodeGen::CodeGenModule::getOpenMPRuntime(), clang::CodeGen::CodeGenTypeCache::getPointerAlign(), clang::CodeGen::CodeGenFunction::getTarget(), clang::CodeGen::CodeGenModule::getTypes(), clang::CodeGen::CodeGenTypeCache::Int32Ty, clang::CodeGen::CodeGenTypeCache::Int8Ty, clang::CodeGen::CodeGenTypeCache::IntTy, clang::CodeGen::Address::invalid(), clang::ImplicitParamDecl::Other, clang::CodeGen::CodeGenModule::SetInternalFunctionAttributes(), clang::CodeGen::CodeGenFunction::StartFunction(), and clang::CodeGen::Type.
Referenced by clang::CodeGen::CGOpenMPRuntimeGPU::emitReduction().
|
static |
This function emits a helper that copies all the reduction variables from the team into the provided global buffer for the reduction variables.
void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data) For all data entries D in reduce_data: Copy local D to buffer.D[Idx]
Definition at line 2202 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenTypes::arrangeBuiltinFunctionDeclaration(), clang::CodeGen::CodeGenFunction::Builder, clang::C, clang::CodeGen::CodeGenFunction::ConvertTypeForMem(), clang::CodeGen::CodeGenTypes::ConvertTypeForMem(), clang::CodeGen::CGBuilderTy::CreateConstArrayGEP(), clang::CodeGen::CGBuilderTy::CreatePointerBitCastOrAddrSpaceCast(), clang::CodeGen::AggValueSlot::DoesNotOverlap, clang::CodeGen::CodeGenFunction::EmitAggregateCopy(), clang::CodeGen::CodeGenFunction::EmitLoadOfComplex(), clang::CodeGen::CodeGenFunction::EmitLoadOfScalar(), clang::CodeGen::CodeGenFunction::EmitLValueForField(), clang::CodeGen::CodeGenFunction::EmitStoreOfComplex(), clang::CodeGen::CodeGenFunction::EmitStoreOfScalar(), clang::CodeGen::CodeGenFunction::FinishFunction(), clang::CodeGen::LValue::getAddress(), clang::CodeGen::CodeGenFunction::GetAddrOfLocalVar(), clang::CodeGen::Address::getAlignment(), clang::CodeGen::CodeGenModule::getContext(), clang::CodeGen::Address::getElementType(), clang::CodeGen::CodeGenFunction::getEvaluationKind(), clang::CodeGen::CodeGenTypes::GetFunctionType(), clang::CodeGen::CodeGenModule::getModule(), clang::CodeGen::Address::getPointer(), clang::CodeGen::CodeGenTypeCache::getPointerAlign(), clang::CodeGen::Address::getType(), clang::CodeGen::LValue::getType(), clang::CodeGen::CodeGenModule::getTypes(), clang::CodeGen::CodeGenTypeCache::Int32Ty, clang::CodeGen::CodeGenFunction::MakeAddrLValue(), clang::CodeGen::CodeGenFunction::MakeNaturalAlignAddrLValue(), clang::ImplicitParamDecl::Other, clang::CodeGen::LValue::setAddress(), clang::CodeGen::CodeGenModule::SetInternalFunctionAttributes(), clang::CodeGen::CodeGenFunction::StartFunction(), clang::CodeGen::TEK_Aggregate, clang::CodeGen::TEK_Complex, clang::CodeGen::TEK_Scalar, clang::CodeGen::Type, and V.
Referenced by clang::CodeGen::CGOpenMPRuntimeGPU::emitReduction().
|
static |
This function emits a helper that reduces all the reduction variables from the team into the provided global buffer for the reduction variables.
void list_to_global_reduce_func(void <em>buffer, int Idx, void *reduce_data) void *GlobPtrs[]; GlobPtrs[0] = (void)&buffer.D0[Idx]; ... GlobPtrs[N] = (void*)&buffer.DN[Idx]; reduce_function(GlobPtrs, reduce_data);
Definition at line 2314 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenTypes::arrangeBuiltinFunctionDeclaration(), clang::CodeGen::CodeGenFunction::Builder, clang::C, clang::CodeGen::CodeGenTypes::ConvertTypeForMem(), clang::CodeGen::CGBuilderTy::CreateConstArrayGEP(), clang::CodeGen::CodeGenFunction::CreateMemTemp(), clang::CodeGen::CGBuilderTy::CreatePointerBitCastOrAddrSpaceCast(), clang::CodeGen::CGBuilderTy::CreateStore(), clang::CodeGen::CodeGenFunction::EmitCastToVoidPtr(), clang::CodeGen::CodeGenFunction::EmitLoadOfScalar(), clang::CodeGen::CodeGenFunction::EmitLValueForField(), clang::CodeGen::CGOpenMPRuntime::emitOutlinedFunctionCall(), clang::CodeGen::CodeGenFunction::EmitStoreOfScalar(), clang::CodeGen::CodeGenFunction::FinishFunction(), clang::CodeGen::LValue::getAddress(), clang::CodeGen::CodeGenFunction::GetAddrOfLocalVar(), clang::ASTContext::getAsVariableArrayType(), clang::CodeGen::CodeGenFunction::getContext(), clang::CodeGen::CodeGenModule::getContext(), clang::CodeGen::Address::getElementType(), clang::CodeGen::CodeGenTypes::GetFunctionType(), clang::CodeGen::CodeGenModule::getModule(), clang::CodeGen::CodeGenModule::getOpenMPRuntime(), clang::CodeGen::Address::getPointer(), clang::CodeGen::CodeGenModule::getTypes(), clang::CodeGen::CodeGenFunction::getVLASize(), clang::CodeGen::CodeGenTypeCache::Int32Ty, clang::CodeGen::CodeGenFunction::MakeNaturalAlignAddrLValue(), clang::CodeGen::CodeGenFunction::VlaSizePair::NumElts, clang::ImplicitParamDecl::Other, clang::CodeGen::CodeGenModule::SetInternalFunctionAttributes(), clang::CodeGen::CodeGenTypeCache::SizeTy, clang::CodeGen::CodeGenFunction::StartFunction(), and clang::CodeGen::CodeGenTypeCache::VoidPtrTy.
Referenced by clang::CodeGen::CGOpenMPRuntimeGPU::emitReduction().
|
static |
Emit instructions to copy a Reduce list, which contains partially aggregated values, in the specified direction.
Definition at line 1522 of file CGOpenMPRuntimeGPU.cpp.
Referenced by emitShuffleAndReduceFunction().
|
static |
Emit a helper that reduces data across two OpenMP threads (lanes) in the same warp.
It uses shuffle instructions to copy over data from a remote lane's stack. The reduction algorithm performed is specified by the fourth parameter.
Algorithm Versions. Full Warp Reduce (argument value 0): This algorithm assumes that all 32 lanes are active and gathers data from these 32 lanes, producing a single resultant value. Contiguous Partial Warp Reduce (argument value 1): This algorithm assumes that only a contiguous subset of lanes are active. This happens for the last warp in a parallel region when the user specified num_threads is not an integer multiple of
Terminology Reduce element: Reduce element refers to the individual data field with primitive data types to be combined and reduced across threads. Reduce list: Reduce list refers to a collection of local, thread-private reduce elements. Remote Reduce list: Remote Reduce list refers to a collection of remote (relative to the current thread) reduce elements.
We distinguish between three states of threads that are important to the implementation of this function. Alive threads: Threads in a warp executing the SIMT instruction, as distinguished from threads that are inactive due to divergent control flow. Active threads: The minimal set of threads that has to be alive upon entry to this function. The computation is correct iff active threads are alive. Some threads are alive but they are not active because they do not contribute to the computation in any useful manner. Turning them off may introduce control flow overheads without any tangible benefits. Effective threads: In order to comply with the argument requirements of the shuffle function, we must keep all lanes holding data alive. But at most half of them perform value aggregation; we refer to this half of threads as effective. The other half is simply handing off their data.
Procedure Value shuffle: In this step active threads transfer data from higher lane positions in the warp to lower lane positions, creating Remote Reduce list. Value aggregation: In this step, effective threads combine their thread local Reduce list with Remote Reduce list and store the result in the thread local Reduce list. Value copy: In this step, we deal with the assumption made by algorithm 2 (i.e. contiguity assumption). When we have an odd number of lanes active, say 2k+1, only k threads will be effective and therefore k new values will be produced. However, the Reduce list owned by the (2k+1)th thread is ignored in the value aggregation. Therefore we copy the Reduce list from the (2k+1)th lane to (k+1)th lane so that the contiguity assumption still holds.
Definition at line 2044 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenTypes::arrangeBuiltinFunctionDeclaration(), clang::CodeGen::CodeGenFunction::Builder, clang::C, clang::CodeGen::CodeGenFunction::ConvertTypeForMem(), clang::CodeGen::CodeGenFunction::createBasicBlock(), clang::CodeGen::CodeGenFunction::CreateMemTemp(), clang::CodeGen::CGBuilderTy::CreatePointerBitCastOrAddrSpaceCast(), clang::CodeGen::CodeGenFunction::EmitBlock(), clang::CodeGen::CodeGenFunction::EmitLoadOfScalar(), clang::CodeGen::CGOpenMPRuntime::emitOutlinedFunctionCall(), emitReductionListCopy(), clang::CodeGen::CodeGenFunction::FinishFunction(), clang::CodeGen::CodeGenFunction::GetAddrOfLocalVar(), clang::CodeGen::CodeGenModule::getContext(), clang::CodeGen::CodeGenTypes::GetFunctionType(), clang::CodeGen::CodeGenModule::getModule(), clang::CodeGen::CodeGenModule::getOpenMPRuntime(), clang::CodeGen::Address::getPointer(), clang::CodeGen::CodeGenTypeCache::getPointerAlign(), clang::CodeGen::CodeGenModule::getTypes(), clang::ImplicitParamDecl::Other, clang::CodeGen::CodeGenModule::SetInternalFunctionAttributes(), clang::CodeGen::CodeGenFunction::StartFunction(), and clang::CodeGen::CodeGenTypeCache::VoidPtrTy.
Referenced by clang::CodeGen::CGOpenMPRuntimeGPU::emitReduction().
|
static |
Definition at line 3512 of file CGOpenMPRuntimeGPU.cpp.
References clang::TargetOptions::FeatureMap, clang::CodeGen::CodeGenModule::getTarget(), clang::TargetInfo::getTargetOpts(), clang::TargetInfo::hasFeature(), clang::StringToCudaArch(), and clang::UNKNOWN.
Referenced by clang::CodeGen::CGOpenMPRuntimeGPU::processRequiresDirective().
|
static |
Definition at line 543 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CGOpenMPRuntimeGPU::CUDA, clang::CodeGen::CGOpenMPRuntimeGPU::Generic, and clang::CodeGen::CodeGenModule::getLangOpts().
Referenced by clang::CodeGen::CGOpenMPRuntimeGPU::emitFunctionProlog(), and clang::CodeGen::CGOpenMPRuntimeGPU::getAddressOfLocalVariable().
|
static |
Get list of lastprivate variables from the teams distribute ... or teams {distribute ...} directives.
Definition at line 930 of file CGOpenMPRuntimeGPU.cpp.
References clang::C, clang::CapturedStmt::getCapturedStmt(), clang::OMPExecutableDirective::getClausesOfKind(), clang::OMPExecutableDirective::getDirectiveKind(), clang::OMPExecutableDirective::getInnermostCapturedStmt(), getPrivateItem(), clang::CodeGen::CGOpenMPRuntime::getSingleCompoundChild(), clang::Stmt::IgnoreContainers(), clang::isOpenMPDistributeDirective(), and clang::isOpenMPTeamsDirective().
Referenced by clang::CodeGen::CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction().
|
static |
Get the id of the current lane in the Warp.
We assume that the warp size is 32, which is always the case on the NVPTX device, to generate more efficient code.
Definition at line 527 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenFunction::Builder, clang::CodeGen::CodeGenFunction::CGM, clang::TargetInfo::getGridValue(), clang::CodeGen::CodeGenModule::getOpenMPRuntime(), and clang::CodeGen::CodeGenFunction::getTarget().
Referenced by emitInterWarpCopyFunction().
|
static |
Get the id of the warp in the block.
We assume that the warp size is 32, which is always the case on the NVPTX device, to generate more efficient code.
Definition at line 516 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenFunction::Builder, clang::CodeGen::CodeGenFunction::CGM, clang::TargetInfo::getGridValue(), clang::CodeGen::CodeGenModule::getOpenMPRuntime(), and clang::CodeGen::CodeGenFunction::getTarget().
Referenced by emitInterWarpCopyFunction().
|
static |
Get list of reduction variables from the teams ... directives.
Definition at line 955 of file CGOpenMPRuntimeGPU.cpp.
References clang::C, clang::OMPExecutableDirective::getClausesOfKind(), clang::OMPExecutableDirective::getDirectiveKind(), getPrivateItem(), and clang::isOpenMPTeamsDirective().
Referenced by clang::CodeGen::CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction().
|
static |
Check for inner (nested) SPMD construct, if any.
Definition at line 549 of file CGOpenMPRuntimeGPU.cpp.
References clang::CapturedStmt::getCapturedStmt(), clang::OMPExecutableDirective::getDirectiveKind(), clang::OMPExecutableDirective::getInnermostCapturedStmt(), clang::CodeGen::CGOpenMPRuntime::getSingleCompoundChild(), clang::Stmt::IgnoreContainers(), and clang::isOpenMPParallelDirective().
Referenced by supportsSPMDExecutionMode().
|
static |
Definition at line 828 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenModule::addCompilerUsedGlobal(), clang::CodeGen::CodeGenModule::getModule(), and clang::CodeGen::CodeGenTypeCache::Int8Ty.
|
static |
Definition at line 1414 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenFunction::Builder, clang::CodeGen::CodeGenFunction::ConvertTypeForMem(), clang::CodeGen::CodeGenFunction::createBasicBlock(), clang::CodeGen::CGBuilderTy::CreateConstGEP(), clang::CodeGen::CGBuilderTy::CreatePointerBitCastOrAddrSpaceCast(), createRuntimeShuffleFunction(), clang::CodeGen::CodeGenFunction::EmitBlock(), clang::CodeGen::CodeGenFunction::EmitBranch(), clang::CodeGen::CodeGenFunction::EmitLoadOfScalar(), clang::CodeGen::CodeGenFunction::EmitStoreOfScalar(), clang::CharUnits::fromQuantity(), clang::CodeGen::Address::getAlignment(), clang::CodeGen::CodeGenFunction::getContext(), clang::CodeGen::Address::getElementType(), clang::ASTContext::getIntTypeForBitwidth(), clang::CodeGen::Address::getPointer(), clang::CodeGen::Address::getType(), clang::ASTContext::getTypeSizeInChars(), clang::CodeGen::CodeGenTypeCache::Int8Ty, Offset, clang::ASTContext::toBits(), clang::CodeGen::Type, and clang::CodeGen::CodeGenTypeCache::VoidPtrTy.
|
static |
Definition at line 651 of file CGOpenMPRuntimeGPU.cpp.
References clang::OMPExecutableDirective::getDirectiveKind(), and hasNestedSPMDDirective().