clang  14.0.0git
Classes | Public Types | Public Member Functions | Protected Member Functions | List of all members
clang::CodeGen::CGOpenMPRuntimeGPU Class Reference

#include "/home/buildbot/as-worker-4/publish-doxygen-docs/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h"

Inheritance diagram for clang::CodeGen::CGOpenMPRuntimeGPU:
Inheritance graph
[legend]
Collaboration diagram for clang::CodeGen::CGOpenMPRuntimeGPU:
Collaboration graph
[legend]

Public Types

enum  ExecutionMode { EM_SPMD, EM_NonSPMD, EM_Unknown }
 Defines the execution mode. More...
 
enum  DataSharingMode { CUDA, Generic }
 Target codegen is specialized based on two data-sharing modes: CUDA, in which the local variables are actually global threadlocal, and Generic, in which the local variables are placed in global memory if they may escape their declaration context. More...
 

Public Member Functions

 CGOpenMPRuntimeGPU (CodeGenModule &CGM)
 
void clear () override
 
llvm::ValuegetGPUWarpSize (CodeGenFunction &CGF)
 Declare generalized virtual functions which need to be defined by all specializations of OpenMPGPURuntime Targets like AMDGCN and NVPTX. More...
 
llvm::ValuegetGPUThreadID (CodeGenFunction &CGF)
 Get the id of the current thread on the GPU. More...
 
llvm::ValuegetGPUNumThreads (CodeGenFunction &CGF)
 Get the maximum number of threads in a block of the GPU. More...
 
virtual void emitProcBindClause (CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
 Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generate code for 'proc_bind' clause. More...
 
virtual void emitNumThreadsClause (CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
 Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads) to generate code for 'num_threads' clause. More...
 
void emitNumTeamsClause (CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
 This function ought to emit, in the general case, a call to. More...
 
llvm::Function * emitParallelOutlinedFunction (const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
 Emits inlined function for the specified OpenMP parallel. More...
 
llvm::Function * emitTeamsOutlinedFunction (const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
 Emits inlined function for the specified OpenMP teams. More...
 
void emitTeamsCall (CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
 Emits code for teams call of the OutlinedFn with variables captured in a record which address is stored in CapturedStruct. More...
 
void emitParallelCall (CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond) override
 Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which address is stored in CapturedStruct. More...
 
void emitBarrierCall (CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
 Emit an implicit/explicit barrier for OpenMP threads. More...
 
void emitCriticalRegion (CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
 Emits a critical region. More...
 
virtual void emitReduction (CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
 Emit a code for reduction clause. More...
 
llvm::FunctionCallee createNVPTXRuntimeFunction (unsigned Function)
 Returns specified OpenMP runtime function for the current OpenMP implementation. More...
 
const VarDecltranslateParameter (const FieldDecl *FD, const VarDecl *NativeParam) const override
 Translates the native parameter of outlined function if this is required for target. More...
 
Address getParameterAddress (CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
 Gets the address of the native argument basing on the address of the target-specific parameter. More...
 
void emitOutlinedFunctionCall (CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args=llvm::None) const override
 Emits call of the outlined function with the provided arguments, translating these arguments to correct target-specific arguments. More...
 
void emitFunctionProlog (CodeGenFunction &CGF, const Decl *D) override
 Emits OpenMP-specific function prolog. More...
 
Address getAddressOfLocalVariable (CodeGenFunction &CGF, const VarDecl *VD) override
 Gets the OpenMP-specific address of the local variable. More...
 
void functionFinished (CodeGenFunction &CGF) override
 Cleans up references to the objects in finished function. More...
 
void getDefaultDistScheduleAndChunk (CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const override
 Choose a default value for the dist_schedule clause. More...
 
void getDefaultScheduleAndChunk (CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const override
 Choose a default value for the schedule clause. More...
 
void adjustTargetSpecificDataForLambdas (CodeGenFunction &CGF, const OMPExecutableDirective &D) const override
 Adjust some parameters for the target-based directives, like addresses of the variables captured by reference in lambdas. More...
 
void processRequiresDirective (const OMPRequiresDecl *D) override
 Perform check on requires decl to ensure that target architecture supports unified addressing. More...
 
bool hasAllocateAttributeForGlobalVar (const VarDecl *VD, LangAS &AS) override
 Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and translates it into the corresponding address space. More...
 
- Public Member Functions inherited from clang::CodeGen::CGOpenMPRuntime
llvm::OpenMPIRBuilder & getOMPBuilder ()
 
 CGOpenMPRuntime (CodeGenModule &CGM)
 
virtual ~CGOpenMPRuntime ()
 
void emitIfClause (CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
 Emits code for OpenMP 'if' clause using specified CodeGen function. More...
 
std::string getName (ArrayRef< StringRef > Parts) const
 Get the platform-specific name separator. More...
 
virtual void emitUserDefinedReduction (CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
 Emit code for the specified user defined reduction construct. More...
 
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction (const OMPDeclareReductionDecl *D)
 Get combiner/initializer for the specified user-defined reduction, if any. More...
 
void emitUserDefinedMapper (const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
 Emit the function for the user defined mapper construct. More...
 
llvm::Function * getOrCreateUserDefinedMapperFunc (const OMPDeclareMapperDecl *D)
 Get the function for the specified user-defined mapper. More...
 
virtual llvm::Function * emitTaskOutlinedFunction (const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
 Emits outlined function for the OpenMP task directive D. More...
 
virtual void emitMasterRegion (CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
 Emits a master region. More...
 
virtual void emitMaskedRegion (CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
 Emits a masked region. More...
 
virtual void emitTaskyieldCall (CodeGenFunction &CGF, SourceLocation Loc)
 Emits code for a taskyield directive. More...
 
virtual void emitTaskgroupRegion (CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
 Emit a taskgroup region. More...
 
virtual void emitSingleRegion (CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
 Emits a single region. More...
 
virtual void emitOrderedRegion (CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
 Emit an ordered region. More...
 
virtual bool isStaticNonchunked (OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
 Check if the specified ScheduleKind is static non-chunked. More...
 
virtual bool isStaticNonchunked (OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const
 Check if the specified ScheduleKind is static non-chunked. More...
 
virtual bool isStaticChunked (OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
 Check if the specified ScheduleKind is static chunked. More...
 
virtual bool isStaticChunked (OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const
 Check if the specified ScheduleKind is static non-chunked. More...
 
virtual bool isDynamic (OpenMPScheduleClauseKind ScheduleKind) const
 Check if the specified ScheduleKind is dynamic. More...
 
virtual void emitForDispatchInit (CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
 Call the appropriate runtime routine to initialize it before start of loop. More...
 
virtual void emitForStaticInit (CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
 Call the appropriate runtime routine to initialize it before start of loop. More...
 
virtual void emitDistributeStaticInit (CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
 
virtual void emitForOrderedIterationEnd (CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
 Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with the dynamic scheduling. More...
 
virtual void emitForStaticFinish (CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
 Call the appropriate runtime routine to notify that we finished all the work with current loop. More...
 
virtual llvm::ValueemitForNext (CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
 Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride);. More...
 
virtual Address getAddrOfThreadPrivate (CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
 Returns address of the threadprivate variable for the current thread. More...
 
virtual Address getAddrOfDeclareTargetVar (const VarDecl *VD)
 Returns the address of the variable marked as declare target with link clause OR as declare target with to clause and unified memory. More...
 
virtual llvm::Function * emitThreadPrivateVarDefinition (const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
 Emit a code for initialization of threadprivate variable. More...
 
virtual bool emitDeclareTargetVarDefinition (const VarDecl *VD, llvm::GlobalVariable *Addr, bool PerformInit)
 Emit a code for initialization of declare target variable. More...
 
virtual Address getAddrOfArtificialThreadPrivate (CodeGenFunction &CGF, QualType VarType, StringRef Name)
 Creates artificial threadprivate variable with name Name and type VarType. More...
 
virtual void emitFlush (CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
 Emit flush of the variables specified in 'omp flush' directive. More...
 
virtual void emitTaskCall (CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
 Emit task region for the task directive. More...
 
virtual void emitTaskLoopCall (CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
 Emit task region for the taskloop directive. More...
 
virtual void emitInlinedDirective (CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
 Emit code for the directive that does not require outlining. More...
 
llvm::Function * emitReductionFunction (SourceLocation Loc, llvm::Type *ArgsType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
 Emits reduction function. More...
 
void emitSingleReductionCombiner (CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
 Emits single reduction combiner. More...
 
virtual llvm::ValueemitTaskReductionInit (CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
 Emit a code for initialization of task reduction clause. More...
 
virtual void emitTaskReductionFini (CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
 Emits the following code for reduction clause with task modifier: More...
 
virtual void emitTaskReductionFixups (CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
 Required to resolve existing problems in the runtime. More...
 
virtual Address getTaskReductionItem (CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
 Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal. More...
 
virtual void emitTaskwaitCall (CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
 Emit code for 'taskwait' directive. More...
 
virtual void emitCancellationPointCall (CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
 Emit code for 'cancellation point' construct. More...
 
virtual void emitCancelCall (CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
 Emit code for 'cancel' construct. More...
 
virtual void emitTargetCall (CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
 Emit the target offloading code associated with D. More...
 
virtual bool emitTargetFunctions (GlobalDecl GD)
 Emit the target regions enclosed in GD function definition or the function itself in case it is a valid device function. More...
 
virtual bool emitTargetGlobalVariable (GlobalDecl GD)
 Emit the global variable if it is a valid device global variable. More...
 
virtual void registerTargetGlobalVariable (const VarDecl *VD, llvm::Constant *Addr)
 Checks if the provided global decl GD is a declare target variable and registers it when emitting code for the host. More...
 
virtual bool emitTargetGlobal (GlobalDecl GD)
 Emit the global GD if it is meaningful for the target. More...
 
llvm::Function * emitRequiresDirectiveRegFun ()
 Creates and returns a registration function for when at least one requires directives was used in the current module. More...
 
void createOffloadEntriesAndInfoMetadata ()
 Creates all the offload entries in the current compilation unit along with the associated metadata. More...
 
virtual void emitTargetDataCalls (CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info)
 Emit the target data mapping code associated with D. More...
 
virtual void emitTargetDataStandAloneCall (CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
 Emit the data mapping/movement code associated with the directive D that should be of the form 'target [{enter|exit} data | update]'. More...
 
virtual void emitDeclareSimdFunction (const FunctionDecl *FD, llvm::Function *Fn)
 Marks function Fn with properly mangled versions of vector functions. More...
 
virtual void emitDoacrossInit (CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
 Emit initialization for doacross loop nesting support. More...
 
virtual void emitDoacrossOrdered (CodeGenFunction &CGF, const OMPDependClause *C)
 Emit code for doacross ordered directive with 'depend' clause. More...
 
bool markAsGlobalTarget (GlobalDecl GD)
 Marks the declaration as already emitted for the device code and returns true, if it was marked already, and false, otherwise. More...
 
void emitDeferredTargetDecls () const
 Emit deferred declare target variables marked for deferred emission. More...
 
llvm::AtomicOrdering getDefaultMemoryOrdering () const
 Gets default memory ordering as specified in requires directive. More...
 
bool hasRequiresUnifiedSharedMemory () const
 Return whether the unified_shared_memory has been specified. More...
 
bool isNontemporalDecl (const ValueDecl *VD) const
 Checks if the VD variable is marked as nontemporal declaration in current context. More...
 
Address emitLastprivateConditionalInit (CodeGenFunction &CGF, const VarDecl *VD)
 Create specialized alloca to handle lastprivate conditionals. More...
 
virtual void checkAndEmitLastprivateConditional (CodeGenFunction &CGF, const Expr *LHS)
 Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the original variable. More...
 
virtual void checkAndEmitSharedLastprivateConditional (CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl >> &IgnoredDecls)
 Checks if the lastprivate conditional was updated in inner region and writes the value. More...
 
virtual void emitLastprivateConditionalFinalUpdate (CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
 Gets the address of the global copy used for lastprivate conditional update, if any. More...
 
std::pair< llvm::Value *, AddressemitDependClause (CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
 Emits list of dependecies based on the provided data (array of dependence/expression pairs). More...
 
Address emitDepobjDependClause (CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
 Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depobj construct. More...
 
void emitDestroyClause (CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
 Emits the code to destroy the dependency object provided in depobj directive. More...
 
void emitUpdateClause (CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
 Updates the dependency kind in the specified depobj object. More...
 
void emitUsesAllocatorsInit (CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
 Initializes user defined allocators specified in the uses_allocators clauses. More...
 
void emitUsesAllocatorsFini (CodeGenFunction &CGF, const Expr *Allocator)
 Destroys user defined allocators specified in the uses_allocators clause. More...
 
bool isLocalVarInUntiedTask (CodeGenFunction &CGF, const VarDecl *VD) const
 Returns true if the variable is a local variable in untied task. More...
 

Protected Member Functions

StringRef getOutlinedHelperName () const override
 Get the function name of an outlined region. More...
 
bool isDefaultLocationConstant () const override
 Check if the default location must be constant. More...
 
unsigned getDefaultLocationReserved2Flags () const override
 Returns additional flags that can be stored in reserved_2 field of the default location. More...
 
- Protected Member Functions inherited from clang::CodeGen::CGOpenMPRuntime
 CGOpenMPRuntime (CodeGenModule &CGM, StringRef FirstSeparator, StringRef Separator)
 Constructor allowing to redefine the name separator for the variables. More...
 
virtual void emitTargetOutlinedFunctionHelper (const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
 Helper to emit outlined function for 'target' directive. More...
 
llvm::ValueemitUpdateLocation (CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0)
 Emits object of ident_t type with info for source location. More...
 
const ExprgetNumTeamsExprForTargetDirective (CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &DefaultVal)
 Emit the number of teams for a target directive. More...
 
llvm::ValueemitNumTeamsForTargetDirective (CodeGenFunction &CGF, const OMPExecutableDirective &D)
 
const ExprgetNumThreadsExprForTargetDirective (CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &DefaultVal)
 Emit the number of threads for a target directive. More...
 
llvm::ValueemitNumThreadsForTargetDirective (CodeGenFunction &CGF, const OMPExecutableDirective &D)
 
llvm::Type * getIdentTyPointerTy ()
 Returns pointer to ident_t type. More...
 
llvm::ValuegetThreadID (CodeGenFunction &CGF, SourceLocation Loc)
 Gets thread id value for the current thread. More...
 
void emitCall (CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args=llvm::None) const
 Emits Callee function call with arguments Args with location Loc. More...
 
virtual Address emitThreadIDAddress (CodeGenFunction &CGF, SourceLocation Loc)
 Emits address of the word in a memory where current thread id is stored. More...
 
void setLocThreadIdInsertPt (CodeGenFunction &CGF, bool AtCurrentPoint=false)
 
void clearLocThreadIdInsertPt (CodeGenFunction &CGF)
 
llvm::ArrayType * getKmpCriticalNameTy () const
 Get the LLVM type for the critical name. More...
 
llvm::ValuegetCriticalRegionLock (StringRef CriticalName)
 Returns corresponding lock object for the specified critical region name. More...
 

Additional Inherited Members

- Static Public Member Functions inherited from clang::CodeGen::CGOpenMPRuntime
static const StmtgetSingleCompoundChild (ASTContext &Ctx, const Stmt *Body)
 Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is not evaluatable at the compile time. More...
 
- Static Protected Member Functions inherited from clang::CodeGen::CGOpenMPRuntime
static unsigned getDefaultFlagsForBarriers (OpenMPDirectiveKind Kind)
 Returns default flags for the barriers depending on the directive, for which this barier is going to be emitted. More...
 
- Protected Attributes inherited from clang::CodeGen::CGOpenMPRuntime
CodeGenModuleCGM
 
StringRef FirstSeparator
 
StringRef Separator
 
llvm::OpenMPIRBuilder OMPBuilder
 An OpenMP-IR-Builder instance. More...
 

Detailed Description

Definition at line 24 of file CGOpenMPRuntimeGPU.h.

Member Enumeration Documentation

◆ DataSharingMode

Target codegen is specialized based on two data-sharing modes: CUDA, in which the local variables are actually global threadlocal, and Generic, in which the local variables are placed in global memory if they may escape their declaration context.

Enumerator
CUDA 

CUDA data sharing mode.

Generic 

Generic data-sharing mode.

Definition at line 345 of file CGOpenMPRuntimeGPU.h.

◆ ExecutionMode

Defines the execution mode.

Enumerator
EM_SPMD 

SPMD execution mode (all threads are worker threads).

EM_NonSPMD 

Non-SPMD execution mode (1 master thread, others are workers).

EM_Unknown 

Unknown execution mode (orphaned directive).

Definition at line 27 of file CGOpenMPRuntimeGPU.h.

Constructor & Destructor Documentation

◆ CGOpenMPRuntimeGPU()

CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU ( CodeGenModule CGM)
explicit

Member Function Documentation

◆ adjustTargetSpecificDataForLambdas()

void CGOpenMPRuntimeGPU::adjustTargetSpecificDataForLambdas ( CodeGenFunction CGF,
const OMPExecutableDirective D 
) const
overridevirtual

◆ clear()

void CGOpenMPRuntimeGPU::clear ( )
overridevirtual

◆ createNVPTXRuntimeFunction()

llvm::FunctionCallee clang::CodeGen::CGOpenMPRuntimeGPU::createNVPTXRuntimeFunction ( unsigned  Function)

Returns specified OpenMP runtime function for the current OpenMP implementation.

Specialized for the NVPTX device.

Parameters
FunctionOpenMP runtime function.
Returns
Specified function.

◆ emitBarrierCall()

void CGOpenMPRuntimeGPU::emitBarrierCall ( CodeGenFunction CGF,
SourceLocation  Loc,
OpenMPDirectiveKind  Kind,
bool  EmitChecks = true,
bool  ForceSimpleCall = false 
)
overridevirtual

Emit an implicit/explicit barrier for OpenMP threads.

Parameters
KindDirective for which this implicit barrier call must be generated. Must be OMPD_barrier for explicit barrier generation.
EmitCheckstrue if need to emit checks for cancellation barriers.
ForceSimpleCalltrue simple barrier call must be emitted, false if runtime class decides which one to emit (simple or with cancellation checks).

Reimplemented from clang::CodeGen::CGOpenMPRuntime.

Definition at line 1596 of file CGOpenMPRuntimeGPU.cpp.

References clang::CodeGen::CGOpenMPRuntime::CGM, clang::CodeGen::CodeGenFunction::EmitRuntimeCall(), clang::CodeGen::CGOpenMPRuntime::emitUpdateLocation(), clang::CodeGen::CGOpenMPRuntime::getDefaultFlagsForBarriers(), clang::CodeGen::CodeGenModule::getModule(), clang::CodeGen::CGOpenMPRuntime::getThreadID(), clang::CodeGen::CodeGenFunction::HaveInsertPoint(), and clang::CodeGen::CGOpenMPRuntime::OMPBuilder.

◆ emitCriticalRegion()

void CGOpenMPRuntimeGPU::emitCriticalRegion ( CodeGenFunction CGF,
StringRef  CriticalName,
const RegionCodeGenTy CriticalOpGen,
SourceLocation  Loc,
const Expr Hint = nullptr 
)
overridevirtual

◆ emitFunctionProlog()

void CGOpenMPRuntimeGPU::emitFunctionProlog ( CodeGenFunction CGF,
const Decl D 
)
overridevirtual

◆ emitNumTeamsClause()

void CGOpenMPRuntimeGPU::emitNumTeamsClause ( CodeGenFunction CGF,
const Expr NumTeams,
const Expr ThreadLimit,
SourceLocation  Loc 
)
overridevirtual

This function ought to emit, in the general case, a call to.

Parameters
NumTeamsAn integer expression of teams.
ThreadLimitAn integer expression of threads.

Reimplemented from clang::CodeGen::CGOpenMPRuntime.

Definition at line 1231 of file CGOpenMPRuntimeGPU.cpp.

◆ emitNumThreadsClause()

void CGOpenMPRuntimeGPU::emitNumThreadsClause ( CodeGenFunction CGF,
llvm::Value NumThreads,
SourceLocation  Loc 
)
overridevirtual

Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads) to generate code for 'num_threads' clause.

Parameters
NumThreadsAn integer value of threads.

Reimplemented from clang::CodeGen::CGOpenMPRuntime.

Definition at line 1221 of file CGOpenMPRuntimeGPU.cpp.

References EM_SPMD, and clang::CodeGen::CGOpenMPRuntime::emitNumThreadsClause().

◆ emitOutlinedFunctionCall()

void CGOpenMPRuntimeGPU::emitOutlinedFunctionCall ( CodeGenFunction CGF,
SourceLocation  Loc,
llvm::FunctionCallee  OutlinedFn,
ArrayRef< llvm::Value * >  Args = llvm::None 
) const
overridevirtual

Emits call of the outlined function with the provided arguments, translating these arguments to correct target-specific arguments.

Reimplemented from clang::CodeGen::CGOpenMPRuntime.

Definition at line 3411 of file CGOpenMPRuntimeGPU.cpp.

References clang::CodeGen::CodeGenFunction::Builder, clang::CodeGen::CGBuilderTy::CreatePointerBitCastOrAddrSpaceCast(), and clang::CodeGen::CGOpenMPRuntime::emitOutlinedFunctionCall().

Referenced by emitTeamsCall().

◆ emitParallelCall()

void CGOpenMPRuntimeGPU::emitParallelCall ( CodeGenFunction CGF,
SourceLocation  Loc,
llvm::Function *  OutlinedFn,
ArrayRef< llvm::Value * >  CapturedVars,
const Expr IfCond 
)
overridevirtual

Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which address is stored in CapturedStruct.

Parameters
OutlinedFnOutlined function to be run in parallel threads. Type of this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
CapturedVarsA pointer to the record with the references to variables used in OutlinedFn function.
IfCondCondition in the associated 'if' clause, if it was specified, nullptr otherwise.

Reimplemented from clang::CodeGen::CGOpenMPRuntime.

Definition at line 1509 of file CGOpenMPRuntimeGPU.cpp.

References clang::CodeGen::CGOpenMPRuntime::CGM, clang::CodeGen::CGBuilderTy::CreateConstArrayGEP(), clang::CodeGen::CGBuilderTy::CreatePointerBitCastOrAddrSpaceCast(), clang::CodeGen::CGOpenMPRuntime::emitUpdateLocation(), clang::CodeGen::CodeGenModule::getModule(), clang::CodeGen::Address::getPointer(), clang::ASTContext::getPointerType(), clang::CodeGen::CGOpenMPRuntime::getThreadID(), clang::CodeGen::CodeGenFunction::HaveInsertPoint(), clang::CodeGen::CodeGenTypeCache::Int8PtrTy, clang::CodeGen::CGOpenMPRuntime::OMPBuilder, clang::CodeGen::CodeGenTypeCache::SizeTy, V, clang::CodeGen::CodeGenTypeCache::VoidPtrTy, and clang::ASTContext::VoidPtrTy.

◆ emitParallelOutlinedFunction()

llvm::Function * CGOpenMPRuntimeGPU::emitParallelOutlinedFunction ( const OMPExecutableDirective D,
const VarDecl ThreadIDVar,
OpenMPDirectiveKind  InnermostKind,
const RegionCodeGenTy CodeGen 
)
overridevirtual

Emits inlined function for the specified OpenMP parallel.

D. This outlined function has type void(*)(kmp_int32 ThreadID, kmp_int32 BoundID, struct context_vars).

Parameters
DOpenMP directive.
ThreadIDVarVariable for thread id in the current OpenMP region.
InnermostKindKind of innermost directive (for simple directives it is a directive itself, for combined - its innermost directive).
CodeGenCode generation sequence for the D directive.

Reimplemented from clang::CodeGen::CGOpenMPRuntime.

Definition at line 1236 of file CGOpenMPRuntimeGPU.cpp.

References EM_SPMD, clang::CodeGen::CGOpenMPRuntime::emitParallelOutlinedFunction(), and clang::CodeGen::RegionCodeGenTy::setAction().

◆ emitProcBindClause()

void CGOpenMPRuntimeGPU::emitProcBindClause ( CodeGenFunction CGF,
llvm::omp::ProcBindKind  ProcBind,
SourceLocation  Loc 
)
overridevirtual

Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generate code for 'proc_bind' clause.

Reimplemented from clang::CodeGen::CGOpenMPRuntime.

Definition at line 1211 of file CGOpenMPRuntimeGPU.cpp.

References EM_SPMD, and clang::CodeGen::CGOpenMPRuntime::emitProcBindClause().

◆ emitReduction()

void CGOpenMPRuntimeGPU::emitReduction ( CodeGenFunction CGF,
SourceLocation  Loc,
ArrayRef< const Expr * >  Privates,
ArrayRef< const Expr * >  LHSExprs,
ArrayRef< const Expr * >  RHSExprs,
ArrayRef< const Expr * >  ReductionOps,
ReductionOptionsTy  Options 
)
overridevirtual

Emit a code for reduction clause.

Design of OpenMP reductions on the GPU.

Parameters
PrivatesList of private copies for original reduction arguments.
LHSExprsList of LHS in ReductionOps reduction operations.
RHSExprsList of RHS in ReductionOps reduction operations.
ReductionOpsList of reduction operations in form 'LHS binop RHS' or 'operator binop(LHS, RHS)'.
OptionsList of options for reduction codegen: WithNowait true if parent directive has also nowait clause, false otherwise. SimpleReduction Emit reduction operation only. Used for omp simd directive on the host. ReductionKind The kind of reduction to perform.

Consider a typical OpenMP program with one or more reduction clauses:

float foo; double bar; #pragma omp target teams distribute parallel for \ reduction(+:foo) reduction(*:bar) for (int i = 0; i < N; i++) { foo += A[i]; bar *= B[i]; }

where 'foo' and 'bar' are reduced across all OpenMP threads in all teams. In our OpenMP implementation on the NVPTX device an OpenMP team is mapped to a CUDA threadblock and OpenMP threads within a team are mapped to CUDA threads within a threadblock. Our goal is to efficiently aggregate values across all OpenMP threads such that:

  • the compiler and runtime are logically concise, and
  • the reduction is performed efficiently in a hierarchical manner as follows: within OpenMP threads in the same warp, across warps in a threadblock, and finally across teams on the NVPTX device.

Introduction to Decoupling

We would like to decouple the compiler and the runtime so that the latter is ignorant of the reduction variables (number, data types) and the reduction operators. This allows a simpler interface and implementation while still attaining good performance.

Pseudocode for the aforementioned OpenMP program generated by the compiler is as follows:

  1. Create private copies of reduction variables on each OpenMP thread: 'foo_private', 'bar_private'
  2. Each OpenMP thread reduces the chunk of 'A' and 'B' assigned to it and writes the result in 'foo_private' and 'bar_private' respectively.
  3. Call the OpenMP runtime on the GPU to reduce within a team and store the result on the team master:

    __kmpc_nvptx_parallel_reduce_nowait_v2(..., reduceData, shuffleReduceFn, interWarpCpyFn)

    where: struct ReduceData { double *foo; double *bar; } reduceData reduceData.foo = &foo_private reduceData.bar = &bar_private

    'shuffleReduceFn' and 'interWarpCpyFn' are pointers to two auxiliary functions generated by the compiler that operate on variables of type 'ReduceData'. They aid the runtime perform algorithmic steps in a data agnostic manner.

    'shuffleReduceFn' is a pointer to a function that reduces data of type 'ReduceData' across two OpenMP threads (lanes) in the same warp. It takes the following arguments as input:

    a. variable of type 'ReduceData' on the calling lane, b. its lane_id, c. an offset relative to the current lane_id to generate a remote_lane_id. The remote lane contains the second variable of type 'ReduceData' that is to be reduced. d. an algorithm version parameter determining which reduction algorithm to use.

    'shuffleReduceFn' retrieves data from the remote lane using efficient GPU shuffle intrinsics and reduces, using the algorithm specified by the 4th parameter, the two operands element-wise. The result is written to the first operand.

    Different reduction algorithms are implemented in different runtime functions, all calling 'shuffleReduceFn' to perform the essential reduction step. Therefore, based on the 4th parameter, this function behaves slightly differently to cooperate with the runtime to ensure correctness under different circumstances.

    'InterWarpCpyFn' is a pointer to a function that transfers reduced variables across warps. It tunnels, through CUDA shared memory, the thread-private data of type 'ReduceData' from lane 0 of each warp to a lane in the first warp.

  4. Call the OpenMP runtime on the GPU to reduce across teams. The last team writes the global reduced value to memory.

    ret = __kmpc_nvptx_teams_reduce_nowait(..., reduceData, shuffleReduceFn, interWarpCpyFn, scratchpadCopyFn, loadAndReduceFn)

    'scratchpadCopyFn' is a helper that stores reduced data from the team master to a scratchpad array in global memory.

    'loadAndReduceFn' is a helper that loads data from the scratchpad array and reduces it with the input operand.

    These compiler generated functions hide address calculation and alignment information from the runtime.

  5. if ret == 1: The team master of the last team stores the reduced result to the globals in memory. foo += reduceData.foo; bar *= reduceData.bar

Warp Reduction Algorithms

On the warp level, we have three algorithms implemented in the OpenMP runtime depending on the number of active lanes:

Full Warp Reduction

The reduce algorithm within a warp where all lanes are active is implemented in the runtime as follows:

full_warp_reduce(void *reduce_data, kmp_ShuffleReductFctPtr ShuffleReduceFn) { for (int offset = WARPSIZE/2; offset > 0; offset /= 2) ShuffleReduceFn(reduce_data, 0, offset, 0); }

The algorithm completes in log(2, WARPSIZE) steps.

'ShuffleReduceFn' is used here with lane_id set to 0 because it is not used therefore we save instructions by not retrieving lane_id from the corresponding special registers. The 4th parameter, which represents the version of the algorithm being used, is set to 0 to signify full warp reduction.

In this version, 'ShuffleReduceFn' behaves, per element, as follows:

#reduce_elem refers to an element in the local lane's data structure #remote_elem is retrieved from a remote lane remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE); reduce_elem = reduce_elem REDUCE_OP remote_elem;

Contiguous Partial Warp Reduction

This reduce algorithm is used within a warp where only the first 'n' (n <= WARPSIZE) lanes are active. It is typically used when the number of OpenMP threads in a parallel region is not a multiple of WARPSIZE. The algorithm is implemented in the runtime as follows:

void contiguous_partial_reduce(void *reduce_data, kmp_ShuffleReductFctPtr ShuffleReduceFn, int size, int lane_id) { int curr_size; int offset; curr_size = size; mask = curr_size/2; while (offset>0) { ShuffleReduceFn(reduce_data, lane_id, offset, 1); curr_size = (curr_size+1)/2; offset = curr_size/2; } }

In this version, 'ShuffleReduceFn' behaves, per element, as follows:

remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE); if (lane_id < offset) reduce_elem = reduce_elem REDUCE_OP remote_elem else reduce_elem = remote_elem

This algorithm assumes that the data to be reduced are located in a contiguous subset of lanes starting from the first. When there is an odd number of active lanes, the data in the last lane is not aggregated with any other lane's dat but is instead copied over.

Dispersed Partial Warp Reduction

This algorithm is used within a warp when any discontiguous subset of lanes are active. It is used to implement the reduction operation across lanes in an OpenMP simd region or in a nested parallel region.

void dispersed_partial_reduce(void *reduce_data, kmp_ShuffleReductFctPtr ShuffleReduceFn) { int size, remote_id; int logical_lane_id = number_of_active_lanes_before_me() * 2; do { remote_id = next_active_lane_id_right_after_me();

the above function returns 0 of no active lane

is present right after the current lane.

size = number_of_active_lanes_in_this_warp(); logical_lane_id /= 2; ShuffleReduceFn(reduce_data, logical_lane_id, remote_id-1-threadIdx.x, 2); } while (logical_lane_id % 2 == 0 && size > 1); }

There is no assumption made about the initial state of the reduction. Any number of lanes (>=1) could be active at any position. The reduction result is returned in the first active lane.

In this version, 'ShuffleReduceFn' behaves, per element, as follows:

remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE); if (lane_id % 2 == 0 && offset > 0) reduce_elem = reduce_elem REDUCE_OP remote_elem else reduce_elem = remote_elem

Intra-Team Reduction

This function, as implemented in the runtime call '__kmpc_nvptx_parallel_reduce_nowait_v2', aggregates data across OpenMP threads in a team. It first reduces within a warp using the aforementioned algorithms. We then proceed to gather all such reduced values at the first warp.

The runtime makes use of the function 'InterWarpCpyFn', which copies data from each of the "warp master" (zeroth lane of each warp, where warp-reduced data is held) to the zeroth warp. This step reduces (in a mathematical sense) the problem of reduction across warp masters in a block to the problem of warp reduction.

Inter-Team Reduction

Once a team has reduced its data to a single value, it is stored in a global scratchpad array. Since each team has a distinct slot, this can be done without locking.

The last team to write to the scratchpad array proceeds to reduce the scratchpad array. One or more workers in the last team use the helper 'loadAndReduceDataFn' to load and reduce values from the array, i.e., the k'th worker reduces every k'th element.

Finally, a call is made to '__kmpc_nvptx_parallel_reduce_nowait_v2' to reduce across workers and compute a globally reduced value.

Reimplemented from clang::CodeGen::CGOpenMPRuntime.

Definition at line 3163 of file CGOpenMPRuntimeGPU.cpp.

◆ emitTeamsCall()

void CGOpenMPRuntimeGPU::emitTeamsCall ( CodeGenFunction CGF,
const OMPExecutableDirective D,
SourceLocation  Loc,
llvm::Function *  OutlinedFn,
ArrayRef< llvm::Value * >  CapturedVars 
)
overridevirtual

Emits code for teams call of the OutlinedFn with variables captured in a record which address is stored in CapturedStruct.

Parameters
OutlinedFnOutlined function to be run by team masters. Type of this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
CapturedVarsA pointer to the record with the references to variables used in OutlinedFn function.

Reimplemented from clang::CodeGen::CGOpenMPRuntime.

Definition at line 1491 of file CGOpenMPRuntimeGPU.cpp.

References clang::CodeGen::CodeGenFunction::Builder, clang::CodeGen::CodeGenFunction::CreateDefaultAlignTempAlloca(), clang::CodeGen::CGBuilderTy::CreateStore(), emitOutlinedFunctionCall(), clang::CodeGen::CGOpenMPRuntime::emitThreadIDAddress(), clang::CodeGen::Address::getPointer(), clang::CodeGen::CodeGenFunction::HaveInsertPoint(), and clang::CodeGen::CodeGenTypeCache::Int32Ty.

◆ emitTeamsOutlinedFunction()

llvm::Function * CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction ( const OMPExecutableDirective D,
const VarDecl ThreadIDVar,
OpenMPDirectiveKind  InnermostKind,
const RegionCodeGenTy CodeGen 
)
overridevirtual

Emits inlined function for the specified OpenMP teams.

D. This outlined function has type void(*)(kmp_int32 ThreadID, kmp_int32 BoundID, struct context_vars).

Parameters
DOpenMP directive.
ThreadIDVarVariable for thread id in the current OpenMP region.
InnermostKindKind of innermost directive (for simple directives it is a directive itself, for combined - its innermost directive).
CodeGenCode generation sequence for the D directive.

Reimplemented from clang::CodeGen::CGOpenMPRuntime.

Definition at line 1313 of file CGOpenMPRuntimeGPU.cpp.

References clang::CodeGen::CodeGenFunction::CGM, clang::CodeGen::CGOpenMPRuntime::CGM, clang::CodeGen::CodeGenFunction::CurFn, EM_SPMD, clang::CodeGen::CGOpenMPRuntime::emitTeamsOutlinedFunction(), clang::OMPExecutableDirective::getBeginLoc(), clang::CapturedStmt::getCapturedDecl(), clang::OMPExecutableDirective::getCapturedStmt(), clang::CodeGen::CodeGenModule::getContext(), getDistributeLastprivateVars(), clang::TargetInfo::getGridValue(), clang::CodeGen::CodeGenModule::getOpenMPRuntime(), clang::CodeGen::CodeGenModule::getTarget(), getTeamsReductionVars(), and clang::CodeGen::RegionCodeGenTy::setAction().

◆ functionFinished()

void CGOpenMPRuntimeGPU::functionFinished ( CodeGenFunction CGF)
overridevirtual

Cleans up references to the objects in finished function.

Reimplemented from clang::CodeGen::CGOpenMPRuntime.

Definition at line 3709 of file CGOpenMPRuntimeGPU.cpp.

References clang::CodeGen::CodeGenFunction::CurFn, and clang::CodeGen::CGOpenMPRuntime::functionFinished().

◆ getAddressOfLocalVariable()

Address CGOpenMPRuntimeGPU::getAddressOfLocalVariable ( CodeGenFunction CGF,
const VarDecl VD 
)
overridevirtual

◆ getDefaultDistScheduleAndChunk()

void CGOpenMPRuntimeGPU::getDefaultDistScheduleAndChunk ( CodeGenFunction CGF,
const OMPLoopDirective S,
OpenMPDistScheduleClauseKind ScheduleKind,
llvm::Value *&  Chunk 
) const
overridevirtual

Choose a default value for the dist_schedule clause.

Reimplemented from clang::CodeGen::CGOpenMPRuntime.

Definition at line 3714 of file CGOpenMPRuntimeGPU.cpp.

◆ getDefaultLocationReserved2Flags()

unsigned CGOpenMPRuntimeGPU::getDefaultLocationReserved2Flags ( ) const
overrideprotectedvirtual

Returns additional flags that can be stored in reserved_2 field of the default location.

For NVPTX target contains data about SPMD/Non-SPMD execution mode + Full/Lightweight runtime mode. Used for better optimization.

Reimplemented from clang::CodeGen::CGOpenMPRuntime.

Definition at line 1180 of file CGOpenMPRuntimeGPU.cpp.

◆ getDefaultScheduleAndChunk()

void CGOpenMPRuntimeGPU::getDefaultScheduleAndChunk ( CodeGenFunction CGF,
const OMPLoopDirective S,
OpenMPScheduleClauseKind ScheduleKind,
const Expr *&  ChunkExpr 
) const
overridevirtual

Choose a default value for the schedule clause.

Reimplemented from clang::CodeGen::CGOpenMPRuntime.

Definition at line 3731 of file CGOpenMPRuntimeGPU.cpp.

◆ getGPUNumThreads()

llvm::Value * CGOpenMPRuntimeGPU::getGPUNumThreads ( CodeGenFunction CGF)

◆ getGPUThreadID()

llvm::Value * CGOpenMPRuntimeGPU::getGPUThreadID ( CodeGenFunction CGF)

◆ getGPUWarpSize()

llvm::Value * CGOpenMPRuntimeGPU::getGPUWarpSize ( CodeGenFunction CGF)

Declare generalized virtual functions which need to be defined by all specializations of OpenMPGPURuntime Targets like AMDGCN and NVPTX.

Get the GPU warp size.

Definition at line 3970 of file CGOpenMPRuntimeGPU.cpp.

References clang::CodeGen::CGOpenMPRuntime::CGM, clang::CodeGen::CodeGenFunction::EmitRuntimeCall(), clang::CodeGen::CodeGenModule::getModule(), and clang::CodeGen::CGOpenMPRuntime::OMPBuilder.

Referenced by createRuntimeShuffleFunction().

◆ getOutlinedHelperName()

StringRef clang::CodeGen::CGOpenMPRuntimeGPU::getOutlinedHelperName ( ) const
inlineoverrideprotectedvirtual

Get the function name of an outlined region.

Reimplemented from clang::CodeGen::CGOpenMPRuntime.

Definition at line 156 of file CGOpenMPRuntimeGPU.h.

◆ getParameterAddress()

Address CGOpenMPRuntimeGPU::getParameterAddress ( CodeGenFunction CGF,
const VarDecl NativeParam,
const VarDecl TargetParam 
) const
overridevirtual

◆ hasAllocateAttributeForGlobalVar()

bool CGOpenMPRuntimeGPU::hasAllocateAttributeForGlobalVar ( const VarDecl VD,
LangAS AS 
)
overridevirtual

Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and translates it into the corresponding address space.

Reimplemented from clang::CodeGen::CGOpenMPRuntime.

Definition at line 3795 of file CGOpenMPRuntimeGPU.cpp.

References clang::cuda_constant, clang::cuda_shared, clang::Default, clang::Decl::getAttr(), and clang::Decl::hasAttr().

◆ isDefaultLocationConstant()

bool clang::CodeGen::CGOpenMPRuntimeGPU::isDefaultLocationConstant ( ) const
inlineoverrideprotectedvirtual

Check if the default location must be constant.

Constant for NVPTX for better optimization.

Reimplemented from clang::CodeGen::CGOpenMPRuntime.

Definition at line 162 of file CGOpenMPRuntimeGPU.h.

◆ processRequiresDirective()

void CGOpenMPRuntimeGPU::processRequiresDirective ( const OMPRequiresDecl D)
overridevirtual

◆ translateParameter()

const VarDecl * CGOpenMPRuntimeGPU::translateParameter ( const FieldDecl FD,
const VarDecl NativeParam 
) const
overridevirtual

The documentation for this class was generated from the following files: