10#include "TargetInfo.h"
24 static const unsigned MaxNumRegsForArgsRet = 16;
26 unsigned numRegsForType(
QualType Ty)
const;
30 uint64_t Members)
const override;
33 llvm::Type *coerceKernelArgumentType(llvm::Type *Ty,
unsigned FromAS,
34 unsigned ToAS)
const {
36 auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty);
37 if (PtrTy && PtrTy->getAddressSpace() == FromAS)
38 return llvm::PointerType::get(Ty->getContext(), ToAS);
55bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(
QualType Ty)
const {
59bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
60 const Type *
Base, uint64_t Members)
const {
61 uint32_t NumRegs = (getContext().getTypeSize(
Base) + 31) / 32;
64 return Members * NumRegs <= MaxNumRegsForArgsRet;
68unsigned AMDGPUABIInfo::numRegsForType(
QualType Ty)
const {
74 QualType EltTy = VT->getElementType();
75 unsigned EltSize = getContext().getTypeSize(EltTy);
79 return (VT->getNumElements() + 1) / 2;
81 unsigned EltNumRegs = (EltSize + 31) / 32;
82 return EltNumRegs * VT->getNumElements();
91 NumRegs += numRegsForType(FieldTy);
97 return (getContext().getTypeSize(Ty) + 31) / 32;
106 unsigned NumRegsLeft = MaxNumRegsForArgsRet;
108 if (CC == llvm::CallingConv::AMDGPU_KERNEL) {
109 Arg.info = classifyKernelArgumentType(Arg.type);
118 llvm_unreachable(
"AMDGPU does not support varargs");
149 llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
153 if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
172 llvm::Type *OrigLTy = CGT.ConvertType(Ty);
173 llvm::Type *LTy = OrigLTy;
174 if (getContext().getLangOpts().
HIP) {
175 LTy = coerceKernelArgumentType(
176 OrigLTy, getContext().getTargetAddressSpace(LangAS::Default),
177 getContext().getTargetAddressSpace(LangAS::cuda_device));
186 if (!getContext().getLangOpts().
OpenCL && LTy == OrigLTy &&
189 getContext().getTypeAlignInChars(Ty),
190 getContext().getTargetAddressSpace(LangAS::opencl_constant),
201 unsigned &NumRegsLeft)
const {
202 assert(NumRegsLeft <= MaxNumRegsForArgsRet &&
"register estimate underflow");
231 unsigned NumRegs = (
Size + 31) / 32;
232 NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
241 llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
245 if (NumRegsLeft > 0) {
246 unsigned NumRegs = numRegsForType(Ty);
247 if (NumRegsLeft >= NumRegs) {
248 NumRegsLeft -= NumRegs;
256 getContext().getTypeAlignInChars(Ty),
257 getContext().getTargetAddressSpace(LangAS::opencl_private));
263 unsigned NumRegs = numRegsForType(Ty);
264 NumRegsLeft -= std::min(NumRegs, NumRegsLeft);
275 void setFunctionDeclAttributes(
const FunctionDecl *FD, llvm::Function *F,
285 llvm::PointerType *
T,
QualType QT)
const override;
289 getABIInfo().getDataLayout().getAllocaAddrSpace());
292 const VarDecl *D)
const override;
295 llvm::AtomicOrdering Ordering,
296 llvm::LLVMContext &Ctx)
const override;
298 llvm::Function *BlockInvokeFunc,
299 llvm::Type *BlockTy)
const override;
307 llvm::GlobalValue *GV) {
308 if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility)
311 return !D->
hasAttr<OMPDeclareTargetDeclAttr>() &&
312 (D->
hasAttr<OpenCLKernelAttr>() ||
313 (isa<FunctionDecl>(D) && D->
hasAttr<CUDAGlobalAttr>()) ||
315 (D->
hasAttr<CUDADeviceAttr>() || D->
hasAttr<CUDAConstantAttr>() ||
316 cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() ||
317 cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType())));
320void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
322 const auto *ReqdWGS =
324 const bool IsOpenCLKernel =
328 const auto *FlatWGS = FD->
getAttr<AMDGPUFlatWorkGroupSizeAttr>();
329 if (ReqdWGS || FlatWGS) {
331 }
else if (IsOpenCLKernel || IsHIPKernel) {
334 const unsigned OpenCLDefaultMaxWorkGroupSize = 256;
335 const unsigned DefaultMaxWorkGroupSize =
336 IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize
338 std::string AttrVal =
339 std::string(
"1,") + llvm::utostr(DefaultMaxWorkGroupSize);
340 F->addFnAttr(
"amdgpu-flat-work-group-size", AttrVal);
343 if (
const auto *
Attr = FD->
getAttr<AMDGPUWavesPerEUAttr>())
346 if (
const auto *
Attr = FD->
getAttr<AMDGPUNumSGPRAttr>()) {
347 unsigned NumSGPR =
Attr->getNumSGPR();
350 F->addFnAttr(
"amdgpu-num-sgpr", llvm::utostr(NumSGPR));
353 if (
const auto *
Attr = FD->
getAttr<AMDGPUNumVGPRAttr>()) {
354 uint32_t NumVGPR =
Attr->getNumVGPR();
357 F->addFnAttr(
"amdgpu-num-vgpr", llvm::utostr(NumVGPR));
360 if (
const auto *
Attr = FD->
getAttr<AMDGPUMaxNumWorkGroupsAttr>()) {
361 uint32_t
X =
Attr->getMaxNumWorkGroupsX()
365 uint32_t Y =
Attr->getMaxNumWorkGroupsY()
366 ?
Attr->getMaxNumWorkGroupsY()
370 uint32_t Z =
Attr->getMaxNumWorkGroupsZ()
371 ?
Attr->getMaxNumWorkGroupsZ()
377 llvm::raw_svector_ostream OS(AttrVal);
378 OS <<
X <<
',' << Y <<
',' << Z;
380 F->addFnAttr(
"amdgpu-max-num-workgroups", AttrVal.str());
386void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
388 StringRef Name =
"__oclc_ABI_version";
389 llvm::GlobalVariable *OriginalGV = CGM.
getModule().getNamedGlobal(Name);
390 if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage()))
394 llvm::CodeObjectVersionKind::COV_None)
397 auto *
Type = llvm::IntegerType::getIntNTy(CGM.
getModule().getContext(), 32);
398 llvm::Constant *COV = llvm::ConstantInt::get(
403 auto *GV =
new llvm::GlobalVariable(
404 CGM.
getModule(),
Type,
true, llvm::GlobalValue::WeakODRLinkage, COV, Name,
405 nullptr, llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
407 GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
408 GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility);
412 OriginalGV->replaceAllUsesWith(GV);
413 GV->takeName(OriginalGV);
414 OriginalGV->eraseFromParent();
418void AMDGPUTargetCodeGenInfo::setTargetAttributes(
421 GV->setVisibility(llvm::GlobalValue::ProtectedVisibility);
422 GV->setDSOLocal(
true);
425 if (GV->isDeclaration())
428 llvm::Function *F = dyn_cast<llvm::Function>(GV);
432 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
434 setFunctionDeclAttributes(FD, F, M);
437 F->addFnAttr(
"amdgpu-unsafe-fp-atomics",
"true");
439 if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
440 F->addFnAttr(
"amdgpu-ieee",
"false");
443unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv()
const {
444 return llvm::CallingConv::AMDGPU_KERNEL;
452llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
456 return llvm::ConstantPointerNull::get(PT);
459 auto NPT = llvm::PointerType::get(
460 PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic));
461 return llvm::ConstantExpr::getAddrSpaceCast(
462 llvm::ConstantPointerNull::get(NPT), PT);
466AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(
CodeGenModule &CGM,
470 "Address space agnostic languages only");
474 return DefaultGlobalAS;
477 if (AddrSpace != LangAS::Default)
486 return DefaultGlobalAS;
490AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(
const LangOptions &LangOpts,
492 llvm::AtomicOrdering Ordering,
493 llvm::LLVMContext &Ctx)
const {
496 case SyncScope::HIPSingleThread:
497 case SyncScope::SingleScope:
498 Name =
"singlethread";
500 case SyncScope::HIPWavefront:
501 case SyncScope::OpenCLSubGroup:
502 case SyncScope::WavefrontScope:
505 case SyncScope::HIPWorkgroup:
506 case SyncScope::OpenCLWorkGroup:
507 case SyncScope::WorkgroupScope:
510 case SyncScope::HIPAgent:
511 case SyncScope::OpenCLDevice:
512 case SyncScope::DeviceScope:
515 case SyncScope::SystemScope:
516 case SyncScope::HIPSystem:
517 case SyncScope::OpenCLAllSVMDevices:
522 if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) {
524 Name = Twine(Twine(Name) + Twine(
"-")).str();
526 Name = Twine(Twine(Name) + Twine(
"one-as")).str();
529 return Ctx.getOrInsertSyncScopeID(Name);
532bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases()
const {
536bool AMDGPUTargetCodeGenInfo::shouldEmitDWARFBitFieldSeparators()
const {
540void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
542 FT = getABIInfo().getContext().adjustFunctionType(
554llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
555 CodeGenFunction &CGF, llvm::Function *Invoke, llvm::Type *BlockTy)
const {
559 auto *InvokeFT = Invoke->getFunctionType();
568 ArgTys.push_back(BlockTy);
569 ArgTypeNames.push_back(llvm::MDString::get(
C,
"__block_literal"));
570 AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0)));
571 ArgBaseTypeNames.push_back(llvm::MDString::get(
C,
"__block_literal"));
572 ArgTypeQuals.push_back(llvm::MDString::get(
C,
""));
573 AccessQuals.push_back(llvm::MDString::get(
C,
"none"));
574 ArgNames.push_back(llvm::MDString::get(
C,
"block_literal"));
575 for (
unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) {
576 ArgTys.push_back(InvokeFT->getParamType(I));
577 ArgTypeNames.push_back(llvm::MDString::get(
C,
"void*"));
578 AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3)));
579 AccessQuals.push_back(llvm::MDString::get(
C,
"none"));
580 ArgBaseTypeNames.push_back(llvm::MDString::get(
C,
"void*"));
581 ArgTypeQuals.push_back(llvm::MDString::get(
C,
""));
583 llvm::MDString::get(
C, (Twine(
"local_arg") + Twine(I)).str()));
585 std::string Name = Invoke->getName().str() +
"_kernel";
586 auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(
C), ArgTys,
false);
587 auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name,
589 F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
591 llvm::AttrBuilder KernelAttrs(
C);
595 KernelAttrs.addAttribute(
"enqueued-block");
596 F->addFnAttrs(KernelAttrs);
598 auto IP = CGF.
Builder.saveIP();
599 auto *BB = llvm::BasicBlock::Create(
C,
"entry", F);
600 Builder.SetInsertPoint(BB);
602 auto *BlockPtr = Builder.CreateAlloca(BlockTy,
nullptr);
603 BlockPtr->setAlignment(BlockAlign);
604 Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign);
605 auto *
Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0));
607 Args.push_back(Cast);
608 for (llvm::Argument &A : llvm::drop_begin(F->args()))
610 llvm::CallInst *call = Builder.CreateCall(Invoke, Args);
611 call->setCallingConv(Invoke->getCallingConv());
612 Builder.CreateRetVoid();
613 Builder.restoreIP(IP);
615 F->setMetadata(
"kernel_arg_addr_space", llvm::MDNode::get(
C, AddressQuals));
616 F->setMetadata(
"kernel_arg_access_qual", llvm::MDNode::get(
C, AccessQuals));
617 F->setMetadata(
"kernel_arg_type", llvm::MDNode::get(
C, ArgTypeNames));
618 F->setMetadata(
"kernel_arg_base_type",
619 llvm::MDNode::get(
C, ArgBaseTypeNames));
620 F->setMetadata(
"kernel_arg_type_qual", llvm::MDNode::get(
C, ArgTypeQuals));
622 F->setMetadata(
"kernel_arg_name", llvm::MDNode::get(
C, ArgNames));
628 llvm::Function *F,
const AMDGPUFlatWorkGroupSizeAttr *FlatWGS,
629 const ReqdWorkGroupSizeAttr *ReqdWGS, int32_t *MinThreadsVal,
630 int32_t *MaxThreadsVal) {
634 Min = FlatWGS->getMin()->EvaluateKnownConstInt(
getContext()).getExtValue();
635 Max = FlatWGS->getMax()->EvaluateKnownConstInt(
getContext()).getExtValue();
637 if (ReqdWGS &&
Min == 0 &&
Max == 0)
638 Min =
Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();
641 assert(
Min <=
Max &&
"Min must be less than or equal Max");
644 *MinThreadsVal =
Min;
646 *MaxThreadsVal =
Max;
647 std::string AttrVal = llvm::utostr(
Min) +
"," + llvm::utostr(
Max);
649 F->addFnAttr(
"amdgpu-flat-work-group-size", AttrVal);
651 assert(
Max == 0 &&
"Max must be zero");
655 llvm::Function *F,
const AMDGPUWavesPerEUAttr *
Attr) {
657 Attr->getMin()->EvaluateKnownConstInt(
getContext()).getExtValue();
660 ?
Attr->getMax()->EvaluateKnownConstInt(
getContext()).getExtValue()
664 assert((
Max == 0 ||
Min <=
Max) &&
"Min must be less than or equal Max");
666 std::string AttrVal = llvm::utostr(
Min);
668 AttrVal = AttrVal +
"," + llvm::utostr(
Max);
669 F->addFnAttr(
"amdgpu-waves-per-eu", AttrVal);
671 assert(
Max == 0 &&
"Max must be zero");
674std::unique_ptr<TargetCodeGenInfo>
676 return std::make_unique<AMDGPUTargetCodeGenInfo>(CGM.
getTypes());
static bool requiresAMDGPUProtectedVisibility(const Decl *D, llvm::GlobalValue *GV)
Defines the clang::TargetOptions class.
uint64_t getTargetNullPointerValue(QualType QT) const
Get target-dependent integer value for null pointer which is used for constant folding.
const TargetInfo & getTargetInfo() const
unsigned getTargetAddressSpace(LangAS AS) const
Attr - This represents one attribute.
ABIArgInfo - Helper class to encapsulate information about how a specific C type should be passed to ...
static ABIArgInfo getIgnore()
static ABIArgInfo getDirect(llvm::Type *T=nullptr, unsigned Offset=0, llvm::Type *Padding=nullptr, bool CanBeFlattened=true, unsigned Align=0)
static ABIArgInfo getIndirectAliased(CharUnits Alignment, unsigned AddrSpace, bool Realign=false, llvm::Type *Padding=nullptr)
Pass this in memory using the IR byref attribute.
virtual bool isHomogeneousAggregateBaseType(QualType Ty) const
virtual bool isHomogeneousAggregateSmallEnough(const Type *Base, uint64_t Members) const
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
@ RAA_DirectInMemory
Pass it on the stack using its defined layout.
CGFunctionInfo - Class to encapsulate the information about a function definition.
ABIArgInfo & getReturnInfo()
unsigned getCallingConvention() const
getCallingConvention - Return the user specified calling convention, which has been translated into a...
CanQualType getReturnType() const
MutableArrayRef< ArgInfo > arguments()
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::LLVMContext & getLLVMContext()
This class organizes the cross-function state that is used while generating LLVM code.
llvm::Module & getModule() const
void handleAMDGPUWavesPerEUAttr(llvm::Function *F, const AMDGPUWavesPerEUAttr *A)
Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to F.
const LangOptions & getLangOpts() const
CodeGenTypes & getTypes()
const TargetInfo & getTarget() const
void handleAMDGPUFlatWorkGroupSizeAttr(llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A, const ReqdWorkGroupSizeAttr *ReqdWGS=nullptr, int32_t *MinThreadsVal=nullptr, int32_t *MaxThreadsVal=nullptr)
Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute to F.
const llvm::DataLayout & getDataLayout() const
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
void addDefaultFunctionDefinitionAttributes(llvm::AttrBuilder &attrs)
Like the overload taking a Function &, but intended specifically for frontends that want to build on ...
This class organizes the cross-module state that is used while lowering AST types to LLVM types.
DefaultABIInfo - The default implementation for ABI specific details.
ABIArgInfo classifyArgumentType(QualType RetTy) const
ABIArgInfo classifyReturnType(QualType RetTy) const
void computeInfo(CGFunctionInfo &FI) const override
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override
EmitVAArg - Emit the target dependent code to load a value of.
TargetCodeGenInfo - This class organizes various target-specific codegeneration issues,...
virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const
virtual llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, SyncScope Scope, llvm::AtomicOrdering Ordering, llvm::LLVMContext &Ctx) const
Get the syncscope used in LLVM IR.
const T & getABIInfo() const
virtual unsigned getOpenCLKernelCallingConv() const
Get LLVM calling convention for OpenCL kernel.
virtual LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const
Get target favored AST address space of a global variable for languages other than OpenCL and CUDA.
virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const
setTargetAttributes - Provides a convenient hook to handle extra target-specific attributes for the g...
virtual bool shouldEmitDWARFBitFieldSeparators() const
virtual llvm::Constant * getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::PointerType *T, QualType QT) const
Get target specific null pointer.
virtual LangAS getASTAllocaAddressSpace() const
Get the AST address space for alloca.
virtual llvm::Value * createEnqueuedBlockKernel(CodeGenFunction &CGF, llvm::Function *BlockInvokeFunc, llvm::Type *BlockTy) const
Create an OpenCL kernel for an enqueued block.
virtual void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const
Provides a convenient hook to handle extra target-specific globals.
virtual bool shouldEmitStaticExternCAliases() const
Decl - This represents one declaration (or definition), e.g.
Represents a member of a struct/union/class.
Represents a function declaration or definition.
ExtInfo withCallingConv(CallingConv cc) const
FunctionType - C99 6.7.5.3 - Function Declarators.
ExtInfo getExtInfo() const
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
A (possibly-)qualified type.
LangAS getAddressSpace() const
Return the address space of this type.
bool isConstantStorage(const ASTContext &Ctx, bool ExcludeCtor, bool ExcludeDtor)
Represents a struct/union/class.
bool hasFlexibleArrayMember() const
field_range fields() const
A helper class that allows the use of isa/cast/dyncast to detect TagType objects of structs/unions/cl...
Scope - A scope is a transient data structure that is used while parsing the program.
TargetOptions & getTargetOpts() const
Retrieve the target options.
virtual std::optional< LangAS > getConstantAddressSpace() const
Return an AST address space which can be used opportunistically for constant global memory.
bool allowAMDGPUUnsafeFPAtomics() const
Returns whether or not the AMDGPU unsafe floating point atomics are allowed.
llvm::CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
The base class of the type hierarchy.
const T * getAs() const
Member-template getAs<specific type>'.
Represents a variable declaration or definition.
bool hasConstantInitialization() const
Determine whether this variable has constant initialization.
Represents a GCC generic vector type.
ABIArgInfo classifyArgumentType(CodeGenModule &CGM, CanQualType type)
Classify the rules for how to pass a particular type.
CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT, CGCXXABI &CXXABI)
bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI, const ABIInfo &Info)
std::unique_ptr< TargetCodeGenInfo > createAMDGPUTargetCodeGenInfo(CodeGenModule &CGM)
bool isAggregateTypeForABI(QualType T)
const Type * isSingleElementStruct(QualType T, ASTContext &Context)
isSingleElementStruct - Determine if a structure is a "single element struct", i.e.
QualType useFirstFieldIfTransparentUnion(QualType Ty)
Pass transparent unions as if they were the type of the first element.
bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays, bool AsIfNoUniqueAddr=false)
isEmptyRecord - Return true iff a structure contains only empty fields.
bool Cast(InterpState &S, CodePtr OpPC)
The JSON file list parser is used to communicate input to InstallAPI.
LangAS
Defines the address space values used by the address space qualifier of QualType.
const FunctionProtoType * T
SyncScope
Defines synch scope values used internally by clang.
LangAS getLangASFromTargetAS(unsigned TargetAS)