clang 20.0.0git
|
#include "/home/buildbot/as-worker-4/publish-doxygen-docs/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h"
Public Types | |
enum | ExecutionMode { EM_SPMD , EM_NonSPMD , EM_Unknown } |
Defines the execution mode. More... | |
enum | DataSharingMode { DS_CUDA , DS_Generic } |
Target codegen is specialized based on two data-sharing modes: CUDA, in which the local variables are actually global threadlocal, and Generic, in which the local variables are placed in global memory if they may escape their declaration context. More... | |
Public Member Functions | |
CGOpenMPRuntimeGPU (CodeGenModule &CGM) | |
bool | isGPU () const override |
Returns true if the current target is a GPU. | |
bool | isDelayedVariableLengthDecl (CodeGenFunction &CGF, const VarDecl *VD) const override |
Declare generalized virtual functions which need to be defined by all specializations of OpenMPGPURuntime Targets like AMDGCN and NVPTX. | |
std::pair< llvm::Value *, llvm::Value * > | getKmpcAllocShared (CodeGenFunction &CGF, const VarDecl *VD) override |
Get call to __kmpc_alloc_shared. | |
void | getKmpcFreeShared (CodeGenFunction &CGF, const std::pair< llvm::Value *, llvm::Value * > &AddrSizePair) override |
Get call to __kmpc_free_shared. | |
llvm::Value * | getGPUThreadID (CodeGenFunction &CGF) |
Get the id of the current thread on the GPU. | |
llvm::Value * | getGPUNumThreads (CodeGenFunction &CGF) |
Get the maximum number of threads in a block of the GPU. | |
void | emitProcBindClause (CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override |
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
global_tid, int proc_bind) to generate code for 'proc_bind' clause. | |
void | emitNumThreadsClause (CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override |
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
global_tid, kmp_int32 num_threads) to generate code for 'num_threads' clause. | |
void | emitNumTeamsClause (CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override |
This function ought to emit, in the general case, a call to. | |
llvm::Function * | emitParallelOutlinedFunction (CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override |
Emits inlined function for the specified OpenMP parallel. | |
llvm::Function * | emitTeamsOutlinedFunction (CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override |
Emits inlined function for the specified OpenMP teams. | |
void | emitTeamsCall (CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override |
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stored in CapturedStruct. | |
void | emitParallelCall (CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads) override |
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which address is stored in CapturedStruct. | |
void | emitBarrierCall (CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override |
Emit an implicit/explicit barrier for OpenMP threads. | |
void | emitCriticalRegion (CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override |
Emits a critical region. | |
void | emitReduction (CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override |
Emit a code for reduction clause. | |
const VarDecl * | translateParameter (const FieldDecl *FD, const VarDecl *NativeParam) const override |
Translates the native parameter of outlined function if this is required for target. | |
Address | getParameterAddress (CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override |
Gets the address of the native argument basing on the address of the target-specific parameter. | |
void | emitOutlinedFunctionCall (CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args=std::nullopt) const override |
Emits call of the outlined function with the provided arguments, translating these arguments to correct target-specific arguments. | |
void | emitFunctionProlog (CodeGenFunction &CGF, const Decl *D) override |
Emits OpenMP-specific function prolog. | |
Address | getAddressOfLocalVariable (CodeGenFunction &CGF, const VarDecl *VD) override |
Gets the OpenMP-specific address of the local variable. | |
void | functionFinished (CodeGenFunction &CGF) override |
Cleans up references to the objects in finished function. | |
void | getDefaultDistScheduleAndChunk (CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const override |
Choose a default value for the dist_schedule clause. | |
void | getDefaultScheduleAndChunk (CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const override |
Choose a default value for the schedule clause. | |
void | adjustTargetSpecificDataForLambdas (CodeGenFunction &CGF, const OMPExecutableDirective &D) const override |
Adjust some parameters for the target-based directives, like addresses of the variables captured by reference in lambdas. | |
void | processRequiresDirective (const OMPRequiresDecl *D) override |
Perform check on requires decl to ensure that target architecture supports unified addressing. | |
bool | hasAllocateAttributeForGlobalVar (const VarDecl *VD, LangAS &AS) override |
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and translates it into the corresponding address space. | |
Public Member Functions inherited from clang::CodeGen::CGOpenMPRuntime | |
llvm::OpenMPIRBuilder & | getOMPBuilder () |
CGOpenMPRuntime (CodeGenModule &CGM) | |
virtual | ~CGOpenMPRuntime () |
virtual void | clear () |
llvm::Value * | emitUpdateLocation (CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false) |
Emits object of ident_t type with info for source location. | |
const Expr * | getNumTeamsExprForTargetDirective (CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal) |
Emit the number of teams for a target directive. | |
llvm::Value * | emitNumTeamsForTargetDirective (CodeGenFunction &CGF, const OMPExecutableDirective &D) |
const Expr * | getNumThreadsExprForTargetDirective (CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr) |
Check for a number of threads upper bound constant value (stored in UpperBound ), or expression (returned). | |
llvm::Value * | emitNumThreadsForTargetDirective (CodeGenFunction &CGF, const OMPExecutableDirective &D) |
Emit an expression that denotes the number of threads a target region shall use. | |
llvm::Value * | emitTargetNumIterationsCall (CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) |
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams distribute parallel for'. | |
virtual bool | isGPU () const |
Returns true if the current target is a GPU. | |
virtual bool | isDelayedVariableLengthDecl (CodeGenFunction &CGF, const VarDecl *VD) const |
Check if the variable length declaration is delayed: | |
virtual std::pair< llvm::Value *, llvm::Value * > | getKmpcAllocShared (CodeGenFunction &CGF, const VarDecl *VD) |
Get call to __kmpc_alloc_shared. | |
virtual void | getKmpcFreeShared (CodeGenFunction &CGF, const std::pair< llvm::Value *, llvm::Value * > &AddrSizePair) |
Get call to __kmpc_free_shared. | |
void | emitIfClause (CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen) |
Emits code for OpenMP 'if' clause using specified CodeGen function. | |
std::string | getName (ArrayRef< StringRef > Parts) const |
Get the platform-specific name separator. | |
virtual void | emitUserDefinedReduction (CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) |
Emit code for the specified user defined reduction construct. | |
virtual std::pair< llvm::Function *, llvm::Function * > | getUserDefinedReduction (const OMPDeclareReductionDecl *D) |
Get combiner/initializer for the specified user-defined reduction, if any. | |
void | emitUserDefinedMapper (const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr) |
Emit the function for the user defined mapper construct. | |
llvm::Function * | getOrCreateUserDefinedMapperFunc (const OMPDeclareMapperDecl *D) |
Get the function for the specified user-defined mapper. | |
virtual llvm::Function * | emitParallelOutlinedFunction (CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) |
Emits outlined function for the specified OpenMP parallel directive D. | |
virtual llvm::Function * | emitTeamsOutlinedFunction (CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) |
Emits outlined function for the specified OpenMP teams directive D. | |
virtual llvm::Function * | emitTaskOutlinedFunction (const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) |
Emits outlined function for the OpenMP task directive D. | |
virtual void | functionFinished (CodeGenFunction &CGF) |
Cleans up references to the objects in finished function. | |
virtual void | emitParallelCall (CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads) |
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which address is stored in CapturedStruct. | |
virtual void | emitCriticalRegion (CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) |
Emits a critical region. | |
virtual void | emitMasterRegion (CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) |
Emits a master region. | |
virtual void | emitMaskedRegion (CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) |
Emits a masked region. | |
virtual void | emitTaskyieldCall (CodeGenFunction &CGF, SourceLocation Loc) |
Emits code for a taskyield directive. | |
virtual void | emitErrorCall (CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal) |
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity, const char *message);. | |
virtual void | emitTaskgroupRegion (CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) |
Emit a taskgroup region. | |
virtual void | emitSingleRegion (CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) |
Emits a single region. | |
virtual void | emitOrderedRegion (CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) |
Emit an ordered region. | |
virtual void | emitBarrierCall (CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) |
Emit an implicit/explicit barrier for OpenMP threads. | |
virtual bool | isStaticNonchunked (OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const |
Check if the specified ScheduleKind is static non-chunked. | |
virtual bool | isStaticNonchunked (OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const |
Check if the specified ScheduleKind is static non-chunked. | |
virtual bool | isStaticChunked (OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const |
Check if the specified ScheduleKind is static chunked. | |
virtual bool | isStaticChunked (OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const |
Check if the specified ScheduleKind is static non-chunked. | |
virtual bool | isDynamic (OpenMPScheduleClauseKind ScheduleKind) const |
Check if the specified ScheduleKind is dynamic. | |
virtual void | emitForDispatchInit (CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) |
Call the appropriate runtime routine to initialize it before start of loop. | |
virtual void | emitForDispatchDeinit (CodeGenFunction &CGF, SourceLocation Loc) |
This is used for non static scheduled types and when the ordered clause is present on the loop construct. | |
virtual void | emitForStaticInit (CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) |
Call the appropriate runtime routine to initialize it before start of loop. | |
virtual void | emitDistributeStaticInit (CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) |
virtual void | emitForOrderedIterationEnd (CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) |
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with the dynamic scheduling. | |
virtual void | emitForStaticFinish (CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) |
Call the appropriate runtime routine to notify that we finished all the work with current loop. | |
virtual llvm::Value * | emitForNext (CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) |
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride);. | |
virtual void | emitNumThreadsClause (CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) |
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
global_tid, kmp_int32 num_threads) to generate code for 'num_threads' clause. | |
virtual void | emitProcBindClause (CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) |
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
global_tid, int proc_bind) to generate code for 'proc_bind' clause. | |
virtual Address | getAddrOfThreadPrivate (CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) |
Returns address of the threadprivate variable for the current thread. | |
virtual ConstantAddress | getAddrOfDeclareTargetVar (const VarDecl *VD) |
Returns the address of the variable marked as declare target with link clause OR as declare target with to clause and unified memory. | |
virtual llvm::Function * | emitThreadPrivateVarDefinition (const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) |
Emit a code for initialization of threadprivate variable. | |
virtual void | emitDeclareTargetFunction (const FunctionDecl *FD, llvm::GlobalValue *GV) |
Emit code for handling declare target functions in the runtime. | |
virtual Address | getAddrOfArtificialThreadPrivate (CodeGenFunction &CGF, QualType VarType, StringRef Name) |
Creates artificial threadprivate variable with name Name and type VarType . | |
virtual void | emitFlush (CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) |
Emit flush of the variables specified in 'omp flush' directive. | |
virtual void | emitTaskCall (CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) |
Emit task region for the task directive. | |
virtual void | emitTaskLoopCall (CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) |
Emit task region for the taskloop directive. | |
virtual void | emitInlinedDirective (CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false) |
Emit code for the directive that does not require outlining. | |
llvm::Function * | emitReductionFunction (StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps) |
Emits reduction function. | |
void | emitSingleReductionCombiner (CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS) |
Emits single reduction combiner. | |
virtual void | emitReduction (CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) |
Emit a code for reduction clause. | |
virtual llvm::Value * | emitTaskReductionInit (CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) |
Emit a code for initialization of task reduction clause. | |
virtual void | emitTaskReductionFini (CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) |
Emits the following code for reduction clause with task modifier: | |
virtual void | emitTaskReductionFixups (CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) |
Required to resolve existing problems in the runtime. | |
virtual Address | getTaskReductionItem (CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) |
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal . | |
virtual void | emitTaskwaitCall (CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) |
Emit code for 'taskwait' directive. | |
virtual void | emitCancellationPointCall (CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) |
Emit code for 'cancellation point' construct. | |
virtual void | emitCancelCall (CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) |
Emit code for 'cancel' construct. | |
virtual void | emitTargetOutlinedFunction (const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) |
Emit outilined function for 'target' directive. | |
virtual void | emitTargetCall (CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) |
Emit the target offloading code associated with D. | |
virtual bool | emitTargetFunctions (GlobalDecl GD) |
Emit the target regions enclosed in GD function definition or the function itself in case it is a valid device function. | |
virtual bool | emitTargetGlobalVariable (GlobalDecl GD) |
Emit the global variable if it is a valid device global variable. | |
virtual void | registerTargetGlobalVariable (const VarDecl *VD, llvm::Constant *Addr) |
Checks if the provided global decl GD is a declare target variable and registers it when emitting code for the host. | |
virtual bool | emitTargetGlobal (GlobalDecl GD) |
Emit the global GD if it is meaningful for the target. | |
void | createOffloadEntriesAndInfoMetadata () |
Creates all the offload entries in the current compilation unit along with the associated metadata. | |
virtual void | emitTeamsCall (CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) |
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stored in CapturedStruct. | |
virtual void | emitNumTeamsClause (CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) |
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32
global_tid, kmp_int32 num_teams, kmp_int32 thread_limit) to generate code for num_teams clause. | |
virtual void | emitThreadLimitClause (CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc) |
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32
global_tid, kmp_int32 thread_limit) to generate code for thread_limit clause on target directive. | |
virtual void | emitTargetDataCalls (CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) |
Emit the target data mapping code associated with D. | |
virtual void | emitTargetDataStandAloneCall (CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) |
Emit the data mapping/movement code associated with the directive D that should be of the form 'target [{enter|exit} data | update]'. | |
virtual void | emitDeclareSimdFunction (const FunctionDecl *FD, llvm::Function *Fn) |
Marks function Fn with properly mangled versions of vector functions. | |
virtual void | emitDoacrossInit (CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) |
Emit initialization for doacross loop nesting support. | |
virtual void | emitDoacrossOrdered (CodeGenFunction &CGF, const OMPDependClause *C) |
Emit code for doacross ordered directive with 'depend' clause. | |
virtual void | emitDoacrossOrdered (CodeGenFunction &CGF, const OMPDoacrossClause *C) |
Emit code for doacross ordered directive with 'doacross' clause. | |
virtual const VarDecl * | translateParameter (const FieldDecl *FD, const VarDecl *NativeParam) const |
Translates the native parameter of outlined function if this is required for target. | |
virtual Address | getParameterAddress (CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const |
Gets the address of the native argument basing on the address of the target-specific parameter. | |
virtual void | getDefaultDistScheduleAndChunk (CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const |
Choose default schedule type and chunk value for the dist_schedule clause. | |
virtual void | getDefaultScheduleAndChunk (CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const |
Choose default schedule type and chunk value for the schedule clause. | |
virtual void | emitOutlinedFunctionCall (CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args=std::nullopt) const |
Emits call of the outlined function with the provided arguments, translating these arguments to correct target-specific arguments. | |
virtual void | emitFunctionProlog (CodeGenFunction &CGF, const Decl *D) |
Emits OpenMP-specific function prolog. | |
virtual Address | getAddressOfLocalVariable (CodeGenFunction &CGF, const VarDecl *VD) |
Gets the OpenMP-specific address of the local variable. | |
bool | markAsGlobalTarget (GlobalDecl GD) |
Marks the declaration as already emitted for the device code and returns true, if it was marked already, and false, otherwise. | |
void | emitDeferredTargetDecls () const |
Emit deferred declare target variables marked for deferred emission. | |
virtual void | adjustTargetSpecificDataForLambdas (CodeGenFunction &CGF, const OMPExecutableDirective &D) const |
Adjust some parameters for the target-based directives, like addresses of the variables captured by reference in lambdas. | |
virtual void | processRequiresDirective (const OMPRequiresDecl *D) |
Perform check on requires decl to ensure that target architecture supports unified addressing. | |
llvm::AtomicOrdering | getDefaultMemoryOrdering () const |
Gets default memory ordering as specified in requires directive. | |
virtual bool | hasAllocateAttributeForGlobalVar (const VarDecl *VD, LangAS &AS) |
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and translates it into the corresponding address space. | |
bool | hasRequiresUnifiedSharedMemory () const |
Return whether the unified_shared_memory has been specified. | |
bool | isNontemporalDecl (const ValueDecl *VD) const |
Checks if the VD variable is marked as nontemporal declaration in current context. | |
Address | emitLastprivateConditionalInit (CodeGenFunction &CGF, const VarDecl *VD) |
Create specialized alloca to handle lastprivate conditionals. | |
virtual void | checkAndEmitLastprivateConditional (CodeGenFunction &CGF, const Expr *LHS) |
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the original variable. | |
virtual void | checkAndEmitSharedLastprivateConditional (CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls) |
Checks if the lastprivate conditional was updated in inner region and writes the value. | |
virtual void | emitLastprivateConditionalFinalUpdate (CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc) |
Gets the address of the global copy used for lastprivate conditional update, if any. | |
std::pair< llvm::Value *, Address > | emitDependClause (CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc) |
Emits list of dependecies based on the provided data (array of dependence/expression pairs). | |
Address | emitDepobjDependClause (CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc) |
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depobj construct. | |
void | emitDestroyClause (CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc) |
Emits the code to destroy the dependency object provided in depobj directive. | |
void | emitUpdateClause (CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc) |
Updates the dependency kind in the specified depobj object. | |
void | emitUsesAllocatorsInit (CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits) |
Initializes user defined allocators specified in the uses_allocators clauses. | |
void | emitUsesAllocatorsFini (CodeGenFunction &CGF, const Expr *Allocator) |
Destroys user defined allocators specified in the uses_allocators clause. | |
bool | isLocalVarInUntiedTask (CodeGenFunction &CGF, const VarDecl *VD) const |
Returns true if the variable is a local variable in untied task. | |
Protected Member Functions | |
bool | isDefaultLocationConstant () const override |
Check if the default location must be constant. | |
Protected Member Functions inherited from clang::CodeGen::CGOpenMPRuntime | |
void | computeMinAndMaxThreadsAndTeams (const OMPExecutableDirective &D, CodeGenFunction &CGF, int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal, int32_t &MaxTeamsVal) |
Helper to determine the min/max number of threads/teams for D . | |
virtual void | emitTargetOutlinedFunctionHelper (const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) |
Helper to emit outlined function for 'target' directive. | |
llvm::Type * | getIdentTyPointerTy () |
Returns pointer to ident_t type. | |
llvm::Value * | getThreadID (CodeGenFunction &CGF, SourceLocation Loc) |
Gets thread id value for the current thread. | |
std::string | getOutlinedHelperName (StringRef Name) const |
Get the function name of an outlined region. | |
std::string | getOutlinedHelperName (CodeGenFunction &CGF) const |
std::string | getReductionFuncName (StringRef Name) const |
Get the function name of a reduction function. | |
void | emitCall (CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args=std::nullopt) const |
Emits Callee function call with arguments Args with location Loc . | |
virtual Address | emitThreadIDAddress (CodeGenFunction &CGF, SourceLocation Loc) |
Emits address of the word in a memory where current thread id is stored. | |
void | setLocThreadIdInsertPt (CodeGenFunction &CGF, bool AtCurrentPoint=false) |
void | clearLocThreadIdInsertPt (CodeGenFunction &CGF) |
virtual bool | isDefaultLocationConstant () const |
Check if the default location must be constant. | |
virtual unsigned | getDefaultLocationReserved2Flags () const |
Returns additional flags that can be stored in reserved_2 field of the default location. | |
llvm::ArrayType * | getKmpCriticalNameTy () const |
Get the LLVM type for the critical name. | |
llvm::Value * | getCriticalRegionLock (StringRef CriticalName) |
Returns corresponding lock object for the specified critical region name. | |
void | scanForTargetRegionsFunctions (const Stmt *S, StringRef ParentName) |
Start scanning from statement S and emit all target regions found along the way. | |
void | emitKmpRoutineEntryT (QualType KmpInt32Ty) |
Build type kmp_routine_entry_t (if not built yet). | |
llvm::Type * | getKmpc_MicroPointerTy () |
Returns pointer to kmpc_micro type. | |
llvm::Constant * | getOrCreateThreadPrivateCache (const VarDecl *VD) |
If the specified mangled name is not in the module, create and return threadprivate cache object. | |
void | emitThreadPrivateVarInit (CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) |
Emits initialization code for the threadprivate variables. | |
void | emitUDMapperArrayInitOrDel (CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *BasePtr, llvm::Value *Ptr, llvm::Value *Size, llvm::Value *MapType, llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) |
Emit the array initialization or deletion portion for user-defined mapper code generation. | |
TaskResultTy | emitTaskInit (CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data) |
Emit task region for the task directive. | |
void | emitLastprivateConditionalUpdate (CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc) |
Emit update for lastprivate conditional data. | |
std::pair< llvm::Value *, LValue > | getDepobjElements (CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc) |
Returns the number of the elements and the address of the depobj dependency array. | |
SmallVector< llvm::Value *, 4 > | emitDepobjElementsSizes (CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data) |
void | emitDepobjElements (CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray) |
Additional Inherited Members | |
Static Public Member Functions inherited from clang::CodeGen::CGOpenMPRuntime | |
static const Stmt * | getSingleCompoundChild (ASTContext &Ctx, const Stmt *Body) |
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is not evaluatable at the compile time. | |
Protected Types inherited from clang::CodeGen::CGOpenMPRuntime | |
typedef llvm::DenseMap< SourceLocation, llvm::Value * > | OpenMPDebugLocMapTy |
Map for SourceLocation and OpenMP runtime library debug locations. | |
typedef llvm::DenseMap< llvm::Function *, DebugLocThreadIdTy > | OpenMPLocThreadIDMapTy |
Map of local debug location, ThreadId and functions. | |
typedef llvm::DenseMap< const OMPDeclareReductionDecl *, std::pair< llvm::Function *, llvm::Function * > > | UDRMapTy |
Map of UDRs and corresponding combiner/initializer. | |
typedef llvm::DenseMap< llvm::Function *, SmallVector< const OMPDeclareReductionDecl *, 4 > > | FunctionUDRMapTy |
Map of functions and locally defined UDRs. | |
using | FunctionUDMMapTy = llvm::DenseMap< llvm::Function *, SmallVector< const OMPDeclareMapperDecl *, 4 > > |
Map of functions and their local user-defined mappers. | |
using | NontemporalDeclsSet = llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > |
using | UntiedLocalVarsAddressesMap = llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > |
Static Protected Member Functions inherited from clang::CodeGen::CGOpenMPRuntime | |
static unsigned | getDefaultFlagsForBarriers (OpenMPDirectiveKind Kind) |
Returns default flags for the barriers depending on the directive, for which this barier is going to be emitted. | |
Protected Attributes inherited from clang::CodeGen::CGOpenMPRuntime | |
CodeGenModule & | CGM |
llvm::OpenMPIRBuilder | OMPBuilder |
An OpenMP-IR-Builder instance. | |
OpenMPDebugLocMapTy | OpenMPDebugLocMap |
llvm::FunctionType * | Kmpc_MicroTy = nullptr |
The type for a microtask which gets passed to __kmpc_fork_call(). | |
OpenMPLocThreadIDMapTy | OpenMPLocThreadIDMap |
UDRMapTy | UDRMap |
FunctionUDRMapTy | FunctionUDRMap |
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > | UDMMap |
Map from the user-defined mapper declaration to its corresponding functions. | |
FunctionUDMMapTy | FunctionUDMMap |
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > | LastprivateConditionalToTypes |
Maps local variables marked as lastprivate conditional to their internal types. | |
llvm::DenseMap< llvm::Function *, unsigned > | FunctionToUntiedTaskStackMap |
Maps function to the position of the untied task locals stack. | |
llvm::ArrayType * | KmpCriticalNameTy |
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];. | |
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > | InternalVars |
An ordered map of auto-generated variables to their unique names. | |
llvm::Type * | KmpRoutineEntryPtrTy = nullptr |
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);. | |
QualType | KmpRoutineEntryPtrQTy |
QualType | KmpTaskTQTy |
Type typedef struct kmp_task { void * shareds; /**< pointer to block of pointers to shared vars / kmp_routine_entry_t routine; /**< pointer to routine to call for executing task */ kmp_int32 part_id; /**< part id for the task */ kmp_routine_entry_t destructors; / pointer to function to invoke deconstructors of firstprivate C++ objects */ } kmp_task_t;. | |
QualType | SavedKmpTaskTQTy |
Saved kmp_task_t for task directive. | |
QualType | SavedKmpTaskloopTQTy |
Saved kmp_task_t for taskloop-based directive. | |
QualType | KmpDependInfoTy |
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool out:1; } flags; } kmp_depend_info_t;. | |
QualType | KmpTaskAffinityInfoTy |
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 : 1; bool flag2 : 1; kmp_int32 reserved : 30; } flags; } kmp_task_affinity_info_t;. | |
QualType | KmpDimTy |
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // upper kmp_int64 st; // stride }; | |
bool | ShouldMarkAsGlobal = true |
llvm::DenseSet< CanonicalDeclPtr< const Decl > > | AlreadyEmittedTargetDecls |
List of the emitted declarations. | |
llvm::StringMap< llvm::WeakTrackingVH > | EmittedNonTargetVariables |
List of the global variables with their addresses that should not be emitted for the target. | |
llvm::SmallDenseSet< const VarDecl * > | DeferredGlobalVariables |
List of variables that can become declare target implicitly and, thus, must be emitted. | |
llvm::SmallVector< NontemporalDeclsSet, 4 > | NontemporalDeclsStack |
Stack for list of declarations in current context marked as nontemporal. | |
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > | UntiedLocalVarsStack |
llvm::SmallVector< LastprivateConditionalData, 4 > | LastprivateConditionalStack |
Stack for list of addresses of declarations in current context marked as lastprivate conditional. | |
bool | HasRequiresUnifiedSharedMemory = false |
Flag for keeping track of weather a requires unified_shared_memory directive is present. | |
llvm::AtomicOrdering | RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic |
Atomic ordering from the omp requires directive. | |
bool | HasEmittedTargetRegion = false |
Flag for keeping track of weather a target region has been emitted. | |
bool | HasEmittedDeclareTargetRegion = false |
Flag for keeping track of weather a device routine has been emitted. | |
llvm::StringSet | ThreadPrivateWithDefinition |
Set of threadprivate variables with the generated initializer. | |
llvm::StringSet | DeclareTargetWithDefinition |
Set of declare target variables with the generated initializer. | |
Definition at line 24 of file CGOpenMPRuntimeGPU.h.
Target codegen is specialized based on two data-sharing modes: CUDA, in which the local variables are actually global threadlocal, and Generic, in which the local variables are placed in global memory if they may escape their declaration context.
Enumerator | |
---|---|
DS_CUDA | CUDA data sharing mode. |
DS_Generic | Generic data-sharing mode. |
Definition at line 40 of file CGOpenMPRuntimeGPU.h.
Defines the execution mode.
Definition at line 27 of file CGOpenMPRuntimeGPU.h.
|
explicit |
Definition at line 865 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CGOpenMPRuntime::CGM, DS_CUDA, clang::CodeGen::CodeGenModule::getLangOpts(), clang::CodeGen::CGOpenMPRuntime::getOMPBuilder(), clang::CodeGen::CGOpenMPRuntime::hasRequiresUnifiedSharedMemory(), isGPU(), clang::CodeGen::CGOpenMPRuntime::OMPBuilder, and clang::LangOptions::OMPHostIRFile.
|
overridevirtual |
Adjust some parameters for the target-based directives, like addresses of the variables captured by reference in lambdas.
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 2148 of file CGOpenMPRuntimeGPU.cpp.
References clang::C, clang::CodeGen::CodeGenFunction::CapturedStmtInfo, clang::CapturedStmt::captures(), clang::CapturedStmt::capturesVariable(), clang::CXXThis, D, clang::CodeGen::CodeGenFunction::EmitLoadOfReferenceLValue(), clang::CodeGen::CodeGenFunction::EmitLValueForFieldInitialization(), clang::CodeGen::Address::emitRawPointer(), clang::CodeGen::CodeGenFunction::EmitStoreOfScalar(), clang::CodeGen::LValue::getAddress(), clang::CodeGen::CodeGenFunction::GetAddrOfLocalVar(), clang::Type::getAsCXXRecordDecl(), clang::QualType::getCanonicalType(), clang::QualType::getNonReferenceType(), clang::ValueDecl::getType(), clang::CodeGen::CodeGenFunction::CGCapturedStmtInfo::isCXXThisExprCaptured(), clang::isOpenMPTargetExecutionDirective(), clang::Type::isReferenceType(), clang::LCK_ByRef, clang::CodeGen::CodeGenFunction::LoadCXXThis(), and clang::CodeGen::CodeGenFunction::MakeAddrLValue().
|
overridevirtual |
Emit an implicit/explicit barrier for OpenMP threads.
Kind | Directive for which this implicit barrier call must be generated. Must be OMPD_barrier for explicit barrier generation. |
EmitChecks | true if need to emit checks for cancellation barriers. |
ForceSimpleCall | true simple barrier call must be emitted, false if runtime class decides which one to emit (simple or with cancellation checks). |
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 1300 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CGOpenMPRuntime::CGM, clang::CodeGen::CodeGenFunction::EmitRuntimeCall(), clang::CodeGen::CGOpenMPRuntime::emitUpdateLocation(), clang::CodeGen::CGOpenMPRuntime::getDefaultFlagsForBarriers(), clang::CodeGen::CodeGenModule::getModule(), clang::CodeGen::CGOpenMPRuntime::getThreadID(), clang::CodeGen::CodeGenFunction::HaveInsertPoint(), Loc, and clang::CodeGen::CGOpenMPRuntime::OMPBuilder.
|
overridevirtual |
Emits a critical region.
CriticalName | Name of the critical region. |
CriticalOpGen | Generator for the statement associated with the given critical region. |
Hint | Value of the 'hint' clause (optional). |
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 1317 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenFunction::Builder, clang::CodeGen::CGOpenMPRuntime::CGM, clang::CodeGen::CodeGenFunction::CGM, clang::CodeGen::CodeGenFunction::createBasicBlock(), clang::CodeGen::CodeGenFunction::CreateMemTemp(), clang::CodeGen::CodeGenFunction::EmitBlock(), clang::CodeGen::CodeGenFunction::EmitBranch(), clang::CodeGen::CGOpenMPRuntime::emitCriticalRegion(), clang::CodeGen::CodeGenFunction::EmitLoadOfScalar(), clang::CodeGen::CodeGenFunction::EmitRuntimeCall(), clang::CodeGen::CodeGenFunction::EmitStoreOfScalar(), clang::CodeGen::CodeGenFunction::getContext(), clang::ASTContext::getIntTypeForBitwidth(), clang::CodeGen::CodeGenModule::getModule(), clang::CodeGen::CodeGenModule::getOpenMPRuntime(), clang::CodeGen::CodeGenTypeCache::Int32Ty, Loc, clang::CodeGen::CodeGenFunction::MakeAddrLValue(), and clang::CodeGen::CGOpenMPRuntime::OMPBuilder.
|
overridevirtual |
Emits OpenMP-specific function prolog.
Required for device constructs.
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 1978 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenFunction::CapturedStmtInfo, clang::CodeGen::CodeGenFunction::CGM, clang::CR_OpenMP, clang::CodeGen::CodeGenFunction::CurFn, D, clang::Data, DS_Generic, clang::CodeGen::CodeGenFunction::EHStack, EM_SPMD, clang::Decl::getBeginLoc(), clang::CodeGen::CodeGenFunction::CGCapturedStmtInfo::getKind(), clang::CodeGen::CodeGenModule::getOpenMPRuntime(), clang::Decl::isCanonicalDecl(), and clang::CodeGen::NormalAndEHCleanup.
|
overridevirtual |
This function ought to emit, in the general case, a call to.
NumTeams | An integer expression of teams. |
ThreadLimit | An integer expression of threads. |
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 908 of file CGOpenMPRuntimeGPU.cpp.
|
overridevirtual |
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads) to generate code for 'num_threads' clause.
NumThreads | An integer value of threads. |
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 902 of file CGOpenMPRuntimeGPU.cpp.
|
overridevirtual |
Emits call of the outlined function with the provided arguments, translating these arguments to correct target-specific arguments.
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 1824 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenFunction::Builder, clang::CodeGen::CGBuilderTy::CreatePointerBitCastOrAddrSpaceCast(), E, clang::CodeGen::CGOpenMPRuntime::emitOutlinedFunctionCall(), and Loc.
Referenced by emitTeamsCall().
|
overridevirtual |
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which address is stored in CapturedStruct.
OutlinedFn | Outlined function to be run in parallel threads. Type of this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). |
CapturedVars | A pointer to the record with the references to variables used in OutlinedFn function. |
IfCond | Condition in the associated 'if' clause, if it was specified, nullptr otherwise. |
NumThreads | The value corresponding to the num_threads clause, if any, or nullptr. |
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 1205 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenFunction::Builder, clang::CodeGen::CGOpenMPRuntime::CGM, clang::CodeGen::CGBuilderTy::CreateConstArrayGEP(), clang::CodeGen::CodeGenFunction::CreateDefaultAlignTempAlloca(), clang::CodeGen::CGBuilderTy::CreatePointerBitCastOrAddrSpaceCast(), clang::CodeGen::Address::emitRawPointer(), clang::CodeGen::CodeGenFunction::EmitRuntimeCall(), clang::CodeGen::CodeGenFunction::EmitStoreOfScalar(), clang::CodeGen::CGOpenMPRuntime::emitUpdateLocation(), clang::CodeGen::CodeGenFunction::EvaluateExprAsBool(), clang::CodeGen::CodeGenFunction::getContext(), clang::CodeGen::CodeGenModule::getModule(), clang::ASTContext::getPointerType(), clang::CodeGen::CGOpenMPRuntime::getThreadID(), clang::CodeGen::CodeGenFunction::HaveInsertPoint(), clang::CodeGen::CodeGenTypeCache::Int32Ty, clang::CodeGen::CodeGenTypeCache::Int8PtrTy, Loc, clang::CodeGen::CGOpenMPRuntime::OMPBuilder, clang::CodeGen::CodeGenTypeCache::SizeTy, V, clang::CodeGen::CodeGenTypeCache::VoidPtrPtrTy, clang::ASTContext::VoidPtrTy, and clang::CodeGen::CodeGenTypeCache::VoidPtrTy.
|
overridevirtual |
Emits inlined function for the specified OpenMP parallel.
D. This outlined function has type void(*)(kmp_int32 ThreadID, kmp_int32 BoundID, struct context_vars).
CGF | Reference to current CodeGenFunction. |
D | OpenMP directive. |
ThreadIDVar | Variable for thread id in the current OpenMP region. |
InnermostKind | Kind of innermost directive (for simple directives it is a directive itself, for combined - its innermost directive). |
CodeGen | Code generation sequence for the D directive. |
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 913 of file CGOpenMPRuntimeGPU.cpp.
References D, EM_SPMD, and clang::CodeGen::CGOpenMPRuntime::emitParallelOutlinedFunction().
|
overridevirtual |
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generate code for 'proc_bind' clause.
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 896 of file CGOpenMPRuntimeGPU.cpp.
|
overridevirtual |
Emit a code for reduction clause.
Design of OpenMP reductions on the GPU.
Privates | List of private copies for original reduction arguments. |
LHSExprs | List of LHS in ReductionOps reduction operations. |
RHSExprs | List of RHS in ReductionOps reduction operations. |
ReductionOps | List of reduction operations in form 'LHS binop RHS' or 'operator binop(LHS, RHS)'. |
Options | List of options for reduction codegen: WithNowait true if parent directive has also nowait clause, false otherwise. SimpleReduction Emit reduction operation only. Used for omp simd directive on the host. ReductionKind The kind of reduction to perform. |
Consider a typical OpenMP program with one or more reduction clauses:
float foo; double bar; #pragma omp target teams distribute parallel for \ reduction(+:foo) reduction(*:bar) for (int i = 0; i < N; i++) { foo += A[i]; bar *= B[i]; }
where 'foo' and 'bar' are reduced across all OpenMP threads in all teams. In our OpenMP implementation on the NVPTX device an OpenMP team is mapped to a CUDA threadblock and OpenMP threads within a team are mapped to CUDA threads within a threadblock. Our goal is to efficiently aggregate values across all OpenMP threads such that:
Introduction to Decoupling
We would like to decouple the compiler and the runtime so that the latter is ignorant of the reduction variables (number, data types) and the reduction operators. This allows a simpler interface and implementation while still attaining good performance.
Pseudocode for the aforementioned OpenMP program generated by the compiler is as follows:
Call the OpenMP runtime on the GPU to reduce within a team and store the result on the team master:
__kmpc_nvptx_parallel_reduce_nowait_v2(..., reduceData, shuffleReduceFn, interWarpCpyFn)
where: struct ReduceData { double *foo; double *bar; } reduceData reduceData.foo = &foo_private reduceData.bar = &bar_private
'shuffleReduceFn' and 'interWarpCpyFn' are pointers to two auxiliary functions generated by the compiler that operate on variables of type 'ReduceData'. They aid the runtime perform algorithmic steps in a data agnostic manner.
'shuffleReduceFn' is a pointer to a function that reduces data of type 'ReduceData' across two OpenMP threads (lanes) in the same warp. It takes the following arguments as input:
a. variable of type 'ReduceData' on the calling lane, b. its lane_id, c. an offset relative to the current lane_id to generate a remote_lane_id. The remote lane contains the second variable of type 'ReduceData' that is to be reduced. d. an algorithm version parameter determining which reduction algorithm to use.
'shuffleReduceFn' retrieves data from the remote lane using efficient GPU shuffle intrinsics and reduces, using the algorithm specified by the 4th parameter, the two operands element-wise. The result is written to the first operand.
Different reduction algorithms are implemented in different runtime functions, all calling 'shuffleReduceFn' to perform the essential reduction step. Therefore, based on the 4th parameter, this function behaves slightly differently to cooperate with the runtime to ensure correctness under different circumstances.
'InterWarpCpyFn' is a pointer to a function that transfers reduced variables across warps. It tunnels, through CUDA shared memory, the thread-private data of type 'ReduceData' from lane 0 of each warp to a lane in the first warp.
Call the OpenMP runtime on the GPU to reduce across teams. The last team writes the global reduced value to memory.
ret = __kmpc_nvptx_teams_reduce_nowait(..., reduceData, shuffleReduceFn, interWarpCpyFn, scratchpadCopyFn, loadAndReduceFn)
'scratchpadCopyFn' is a helper that stores reduced data from the team master to a scratchpad array in global memory.
'loadAndReduceFn' is a helper that loads data from the scratchpad array and reduces it with the input operand.
These compiler generated functions hide address calculation and alignment information from the runtime.
Warp Reduction Algorithms
On the warp level, we have three algorithms implemented in the OpenMP runtime depending on the number of active lanes:
Full Warp Reduction
The reduce algorithm within a warp where all lanes are active is implemented in the runtime as follows:
full_warp_reduce(void *reduce_data, kmp_ShuffleReductFctPtr ShuffleReduceFn) { for (int offset = WARPSIZE/2; offset > 0; offset /= 2) ShuffleReduceFn(reduce_data, 0, offset, 0); }
The algorithm completes in log(2, WARPSIZE) steps.
'ShuffleReduceFn' is used here with lane_id set to 0 because it is not used therefore we save instructions by not retrieving lane_id from the corresponding special registers. The 4th parameter, which represents the version of the algorithm being used, is set to 0 to signify full warp reduction.
In this version, 'ShuffleReduceFn' behaves, per element, as follows:
#reduce_elem refers to an element in the local lane's data structure #remote_elem is retrieved from a remote lane remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE); reduce_elem = reduce_elem REDUCE_OP remote_elem;
Contiguous Partial Warp Reduction
This reduce algorithm is used within a warp where only the first 'n' (n <= WARPSIZE) lanes are active. It is typically used when the number of OpenMP threads in a parallel region is not a multiple of WARPSIZE. The algorithm is implemented in the runtime as follows:
void contiguous_partial_reduce(void *reduce_data, kmp_ShuffleReductFctPtr ShuffleReduceFn, int size, int lane_id) { int curr_size; int offset; curr_size = size; mask = curr_size/2; while (offset>0) { ShuffleReduceFn(reduce_data, lane_id, offset, 1); curr_size = (curr_size+1)/2; offset = curr_size/2; } }
In this version, 'ShuffleReduceFn' behaves, per element, as follows:
remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE); if (lane_id < offset) reduce_elem = reduce_elem REDUCE_OP remote_elem else reduce_elem = remote_elem
This algorithm assumes that the data to be reduced are located in a contiguous subset of lanes starting from the first. When there is an odd number of active lanes, the data in the last lane is not aggregated with any other lane's dat but is instead copied over.
Dispersed Partial Warp Reduction
This algorithm is used within a warp when any discontiguous subset of lanes are active. It is used to implement the reduction operation across lanes in an OpenMP simd region or in a nested parallel region.
void dispersed_partial_reduce(void *reduce_data, kmp_ShuffleReductFctPtr ShuffleReduceFn) { int size, remote_id; int logical_lane_id = number_of_active_lanes_before_me() * 2; do { remote_id = next_active_lane_id_right_after_me();
size = number_of_active_lanes_in_this_warp(); logical_lane_id /= 2; ShuffleReduceFn(reduce_data, logical_lane_id, remote_id-1-threadIdx.x, 2); } while (logical_lane_id % 2 == 0 && size > 1); }
There is no assumption made about the initial state of the reduction. Any number of lanes (>=1) could be active at any position. The reduction result is returned in the first active lane.
In this version, 'ShuffleReduceFn' behaves, per element, as follows:
remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE); if (lane_id % 2 == 0 && offset > 0) reduce_elem = reduce_elem REDUCE_OP remote_elem else reduce_elem = remote_elem
Intra-Team Reduction
This function, as implemented in the runtime call '__kmpc_nvptx_parallel_reduce_nowait_v2', aggregates data across OpenMP threads in a team. It first reduces within a warp using the aforementioned algorithms. We then proceed to gather all such reduced values at the first warp.
The runtime makes use of the function 'InterWarpCpyFn', which copies data from each of the "warp master" (zeroth lane of each warp, where warp-reduced data is held) to the zeroth warp. This step reduces (in a mathematical sense) the problem of reduction across warp masters in a block to the problem of warp reduction.
Inter-Team Reduction
Once a team has reduced its data to a single value, it is stored in a global scratchpad array. Since each team has a distinct slot, this can be done without locking.
The last team to write to the scratchpad array proceeds to reduce the scratchpad array. One or more workers in the last team use the helper 'loadAndReduceDataFn' to load and reduce values from the array, i.e., the k'th worker reduces every k'th element.
Finally, a call is made to '__kmpc_nvptx_parallel_reduce_nowait_v2' to reduce across workers and compute a globally reduced value.
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 1655 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenFunction::AllocaInsertPt, clang::CodeGen::CodeGenFunction::Builder, clang::C, clang::CodeGen::CGOpenMPRuntime::CGM, clang::CodeGen::CodeGenFunction::ConvertTypeForMem(), clang::CodeGen::CodeGenFunction::CurFn, clang::CodeGen::Address::emitRawPointer(), clang::CodeGen::CGOpenMPRuntime::emitReduction(), clang::CodeGen::CGOpenMPRuntime::emitSingleReductionCombiner(), clang::CodeGen::CGOpenMPRuntime::emitUpdateLocation(), clang::CodeGen::CodeGenFunction::GetAddrOfLocalVar(), clang::CodeGen::CodeGenModule::getContext(), clang::CodeGen::CodeGenFunction::getEvaluationKind(), clang::TargetInfo::getGridValue(), clang::CodeGen::CodeGenFunction::getTarget(), clang::CodeGen::CodeGenFunction::HaveInsertPoint(), clang::isOpenMPDistributeDirective(), clang::isOpenMPParallelDirective(), clang::isOpenMPTeamsDirective(), Loc, clang::CodeGen::CGOpenMPRuntime::OMPBuilder, clang::Private, clang::CodeGen::CodeGenFunction::SourceLocToDebugLoc(), clang::CodeGen::TEK_Aggregate, clang::CodeGen::TEK_Complex, clang::CodeGen::TEK_Scalar, and Variable.
|
overridevirtual |
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stored in CapturedStruct.
OutlinedFn | Outlined function to be run by team masters. Type of this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). |
CapturedVars | A pointer to the record with the references to variables used in OutlinedFn function. |
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 1180 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenFunction::Builder, clang::CodeGen::CGOpenMPRuntime::CGM, clang::CodeGen::CodeGenFunction::CreateDefaultAlignTempAlloca(), clang::CodeGen::CGBuilderTy::CreateStore(), D, emitOutlinedFunctionCall(), clang::CodeGen::Address::emitRawPointer(), clang::CodeGen::CGOpenMPRuntime::emitThreadIDAddress(), clang::CodeGen::CodeGenFunction::HaveInsertPoint(), clang::CodeGen::CodeGenTypeCache::Int32Ty, Loc, and clang::CodeGen::CodeGenTypeCache::VoidPtrTy.
|
overridevirtual |
Emits inlined function for the specified OpenMP teams.
D. This outlined function has type void(*)(kmp_int32 ThreadID, kmp_int32 BoundID, struct context_vars).
CGF | Reference to current CodeGenFunction. |
D | OpenMP directive. |
ThreadIDVar | Variable for thread id in the current OpenMP region. |
InnermostKind | Kind of innermost directive (for simple directives it is a directive itself, for combined - its innermost directive). |
CodeGen | Code generation sequence for the D directive. |
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 971 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CGOpenMPRuntime::CGM, clang::CodeGen::CodeGenFunction::CGM, clang::CodeGen::CodeGenFunction::CurFn, D, Data, EM_SPMD, clang::CodeGen::CGOpenMPRuntime::emitTeamsOutlinedFunction(), clang::Decl::getBeginLoc(), clang::CodeGen::CodeGenModule::getContext(), getDistributeLastprivateVars(), clang::TargetInfo::getGridValue(), clang::CodeGen::CodeGenModule::getOpenMPRuntime(), clang::CodeGen::CodeGenModule::getTarget(), getTeamsReductionVars(), Loc, and clang::CodeGen::RegionCodeGenTy::setAction().
|
overridevirtual |
Cleans up references to the objects in finished function.
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 2114 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenFunction::CurFn, and clang::CodeGen::CGOpenMPRuntime::functionFinished().
|
overridevirtual |
Gets the OpenMP-specific address of the local variable.
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 2045 of file CGOpenMPRuntimeGPU.cpp.
References clang::Decl::attr_begin(), clang::Decl::attr_end(), clang::CodeGen::CodeGenFunction::Builder, clang::CodeGen::CGOpenMPRuntime::CGM, clang::CodeGen::CodeGenFunction::ConvertTypeForMem(), clang::CodeGen::CGBuilderTy::CreatePointerBitCastOrAddrSpaceCast(), clang::cuda_constant, clang::cuda_shared, clang::CodeGen::CodeGenFunction::CurFn, clang::Default, DS_Generic, E, clang::QualType::getAddressSpace(), clang::CharUnits::getAsAlign(), clang::Decl::getAttr(), clang::VarDecl::getCanonicalDecl(), clang::CodeGen::CodeGenModule::getContext(), clang::ASTContext::getDeclAlign(), clang::CodeGen::CodeGenModule::getModule(), clang::NamedDecl::getName(), clang::ASTContext::getTargetAddressSpace(), clang::ValueDecl::getType(), clang::Decl::hasAttr(), clang::Decl::hasAttrs(), and clang::CodeGen::Address::invalid().
|
overridevirtual |
Choose a default value for the dist_schedule clause.
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 2119 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenFunction::CGM, EM_SPMD, clang::CodeGen::CodeGenFunction::EmitScalarConversion(), clang::CodeGen::CodeGenFunction::getContext(), clang::CodeGen::CGOpenMPRuntime::getDefaultDistScheduleAndChunk(), clang::ASTContext::getIntTypeForBitwidth(), and clang::CodeGen::CodeGenModule::getOpenMPRuntime().
|
overridevirtual |
Choose a default value for the schedule clause.
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 2136 of file CGOpenMPRuntimeGPU.cpp.
References clang::IntegerLiteral::Create(), clang::CodeGen::CodeGenFunction::getContext(), and clang::ASTContext::getIntTypeForBitwidth().
llvm::Value * CGOpenMPRuntimeGPU::getGPUNumThreads | ( | CodeGenFunction & | CGF | ) |
Get the maximum number of threads in a block of the GPU.
Definition at line 2343 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenFunction::Builder, clang::CodeGen::CodeGenFunction::CGM, clang::CodeGen::CodeGenModule::getModule(), and clang::CodeGen::CodeGenTypeCache::Int32Ty.
llvm::Value * CGOpenMPRuntimeGPU::getGPUThreadID | ( | CodeGenFunction & | CGF | ) |
Get the id of the current thread on the GPU.
Definition at line 2356 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CGOpenMPRuntime::CGM, clang::CodeGen::CodeGenFunction::EmitRuntimeCall(), clang::CodeGen::CodeGenModule::getModule(), and clang::CodeGen::CGOpenMPRuntime::OMPBuilder.
|
overridevirtual |
Get call to __kmpc_alloc_shared.
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 1117 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenFunction::Builder, clang::CodeGen::CGOpenMPRuntime::CGM, clang::CodeGen::CodeGenFunction::EmitRuntimeCall(), clang::CodeGen::CodeGenModule::getContext(), clang::ASTContext::getDeclAlign(), clang::CodeGen::CodeGenModule::getLLVMContext(), clang::CodeGen::CodeGenModule::getModule(), clang::NamedDecl::getName(), clang::CharUnits::getQuantity(), clang::ValueDecl::getType(), clang::CodeGen::CodeGenFunction::getTypeSize(), clang::CodeGen::CGOpenMPRuntime::OMPBuilder, and clang::CodeGen::CodeGenTypeCache::SizeTy.
|
overridevirtual |
Get call to __kmpc_free_shared.
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 1143 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CGOpenMPRuntime::CGM, clang::CodeGen::CodeGenFunction::EmitRuntimeCall(), clang::CodeGen::CodeGenModule::getModule(), and clang::CodeGen::CGOpenMPRuntime::OMPBuilder.
|
overridevirtual |
Gets the address of the native argument basing on the address of the target-specific parameter.
NativeParam | Parameter itself. |
TargetParam | Corresponding target-specific parameter. |
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 1798 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenFunction::Builder, clang::CodeGen::CodeGenFunction::CreateMemTemp(), clang::CodeGen::CGBuilderTy::CreatePointerBitCastOrAddrSpaceCast(), clang::CodeGen::CodeGenFunction::EmitLoadOfScalar(), clang::CodeGen::CodeGenFunction::EmitStoreOfScalar(), clang::CodeGen::CodeGenFunction::GetAddrOfLocalVar(), clang::CodeGen::CodeGenFunction::getLLVMContext(), clang::Type::getPointeeType(), clang::CodeGen::CodeGenTypes::getTargetAddressSpace(), clang::ValueDecl::getType(), clang::CodeGen::CodeGenFunction::getTypes(), clang::Type::isReferenceType(), and clang::QualifierCollector::strip().
|
overridevirtual |
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and translates it into the corresponding address space.
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 2202 of file CGOpenMPRuntimeGPU.cpp.
References clang::cuda_constant, clang::cuda_shared, clang::Default, clang::Decl::getAttr(), and clang::Decl::hasAttr().
|
inlineoverrideprotectedvirtual |
Check if the default location must be constant.
Constant for NVPTX for better optimization.
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 129 of file CGOpenMPRuntimeGPU.h.
|
overridevirtual |
Declare generalized virtual functions which need to be defined by all specializations of OpenMPGPURuntime Targets like AMDGCN and NVPTX.
Check if the variable length declaration is delayed:
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 1106 of file CGOpenMPRuntimeGPU.cpp.
References clang::CodeGen::CodeGenFunction::CurFn.
|
inlineoverridevirtual |
Returns true if the current target is a GPU.
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 134 of file CGOpenMPRuntimeGPU.h.
Referenced by CGOpenMPRuntimeGPU().
|
overridevirtual |
Perform check on requires decl to ensure that target architecture supports unified addressing.
Check to see if target architecture supports unified addressing which is a restriction for OpenMP requires clause "unified_shared_memory".
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 2247 of file CGOpenMPRuntimeGPU.cpp.
References clang::AMDGCNSPIRV, clang::CodeGen::CGOpenMPRuntime::CGM, D, clang::CodeGen::CodeGenModule::Error(), clang::Generic, getOffloadArch(), clang::GFX1010, clang::GFX1011, clang::GFX1012, clang::GFX1013, clang::GFX1030, clang::GFX1031, clang::GFX1032, clang::GFX1033, clang::GFX1034, clang::GFX1035, clang::GFX1036, clang::GFX10_1_GENERIC, clang::GFX10_3_GENERIC, clang::GFX1100, clang::GFX1101, clang::GFX1102, clang::GFX1103, clang::GFX1150, clang::GFX1151, clang::GFX1152, clang::GFX11_GENERIC, clang::GFX1200, clang::GFX1201, clang::GFX12_GENERIC, clang::GFX600, clang::GFX601, clang::GFX602, clang::GFX700, clang::GFX701, clang::GFX702, clang::GFX703, clang::GFX704, clang::GFX705, clang::GFX801, clang::GFX802, clang::GFX803, clang::GFX805, clang::GFX810, clang::GFX900, clang::GFX902, clang::GFX904, clang::GFX906, clang::GFX908, clang::GFX909, clang::GFX90a, clang::GFX90c, clang::GFX940, clang::GFX941, clang::GFX942, clang::GFX9_GENERIC, clang::LAST, clang::OffloadArchToString(), clang::CodeGen::CGOpenMPRuntime::processRequiresDirective(), clang::SM_20, clang::SM_21, clang::SM_30, clang::SM_32_, clang::SM_35, clang::SM_37, clang::SM_50, clang::SM_52, clang::SM_53, clang::SM_60, clang::SM_61, clang::SM_62, clang::SM_70, clang::SM_72, clang::SM_75, clang::SM_80, clang::SM_86, clang::SM_87, clang::SM_89, clang::SM_90, clang::SM_90a, clang::UNKNOWN, and clang::UNUSED.
|
overridevirtual |
Translates the native parameter of outlined function if this is required for target.
FD | Field decl from captured record for the parameter. |
NativeParam | Parameter itself. |
Reimplemented from clang::CodeGen::CGOpenMPRuntime.
Definition at line 1766 of file CGOpenMPRuntimeGPU.cpp.
References clang::Qualifiers::addAddressSpace(), clang::Qualifiers::addRestrict(), clang::QualifierCollector::apply(), clang::CodeGen::CGOpenMPRuntime::CGM, clang::ImplicitParamDecl::Create(), clang::ParmVarDecl::Create(), clang::ASTContext::getAddrSpaceQualType(), clang::Decl::getAttr(), clang::DeclaratorDecl::getBeginLoc(), clang::CodeGen::CodeGenModule::getContext(), clang::Decl::getDeclContext(), clang::NamedDecl::getIdentifier(), clang::getLangASFromTargetAS(), clang::Decl::getLocation(), clang::Type::getPointeeType(), clang::ASTContext::getPointerType(), clang::ValueDecl::getType(), clang::Type::isReferenceType(), clang::opencl_global, clang::Other, clang::SC_None, and clang::QualifierCollector::strip().