10#include "TargetInfo.h"
12#include "llvm/ADT/StringExtras.h"
13#include "llvm/Support/AMDGPUAddrSpace.h"
26 static const unsigned MaxNumRegsForArgsRet = 16;
28 uint64_t numRegsForType(QualType Ty)
const;
30 bool isHomogeneousAggregateBaseType(QualType Ty)
const override;
31 bool isHomogeneousAggregateSmallEnough(
const Type *Base,
32 uint64_t Members)
const override;
35 llvm::Type *coerceKernelArgumentType(llvm::Type *Ty,
unsigned FromAS,
36 unsigned ToAS)
const {
38 auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty);
39 if (PtrTy && PtrTy->getAddressSpace() == FromAS)
40 return llvm::PointerType::get(Ty->getContext(), ToAS);
45 explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) :
46 DefaultABIInfo(CGT) {}
49 ABIArgInfo classifyKernelArgumentType(QualType Ty)
const;
51 unsigned &NumRegsLeft)
const;
53 void computeInfo(CGFunctionInfo &FI)
const override;
54 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
55 AggValueSlot Slot)
const override;
57 llvm::FixedVectorType *
58 getOptimalVectorMemoryType(llvm::FixedVectorType *T,
59 const LangOptions &Opt)
const override {
63 if (T->getNumElements() == 3 && getDataLayout().getTypeSizeInBits(T) == 96)
65 return DefaultABIInfo::getOptimalVectorMemoryType(T, Opt);
69bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(
QualType Ty)
const {
73bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
74 const Type *Base, uint64_t Members)
const {
75 uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32;
78 return Members * NumRegs <= MaxNumRegsForArgsRet;
85static bool containsOnlyPackableIntegerTypes(
const RecordDecl *RD,
86 const ASTContext &Context) {
87 for (
const FieldDecl *Field : RD->
fields()) {
88 QualType FieldTy =
Field->getType();
94 if (
Field->isBitField()) {
100 if (!containsOnlyPackableIntegerTypes(NestedRD, Context))
107 QualType EltTy = AT->getElementType();
109 if (!containsOnlyPackableIntegerTypes(NestedRD, Context))
138uint64_t AMDGPUABIInfo::numRegsForType(QualType Ty)
const {
141 if (
const VectorType *VT = Ty->
getAs<VectorType>()) {
144 QualType EltTy = VT->getElementType();
145 uint64_t EltSize = getContext().getTypeSize(EltTy);
149 return (VT->getNumElements() + 1) / 2;
151 uint64_t EltNumRegs = (EltSize + 31) / 32;
152 return EltNumRegs * VT->getNumElements();
158 for (
const FieldDecl *Field : RD->
fields()) {
159 QualType FieldTy =
Field->getType();
160 NumRegs += numRegsForType(FieldTy);
166 return (getContext().getTypeSize(Ty) + 31) / 32;
169void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI)
const {
175 unsigned ArgumentIndex = 0;
178 unsigned NumRegsLeft = MaxNumRegsForArgsRet;
180 if (CC == llvm::CallingConv::AMDGPU_KERNEL) {
181 Arg.info = classifyKernelArgumentType(Arg.type);
183 bool FixedArgument = ArgumentIndex++ < numFixedArguments;
189RValue AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
190 QualType Ty, AggValueSlot Slot)
const {
191 const bool IsIndirect =
false;
192 const bool AllowHigherAlign =
false;
194 getContext().getTypeInfoInChars(Ty),
198ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy)
const {
221 bool ShouldPackToInt =
222 RD && containsOnlyPackableIntegerTypes(RD, getContext());
224 if (ShouldPackToInt) {
227 llvm::Type::getInt16Ty(getVMContext()));
231 llvm::Type::getInt32Ty(getVMContext()));
233 llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
238 if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
249ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty)
const {
255 Ty = QualType(SeltTy, 0);
257 llvm::Type *OrigLTy = CGT.ConvertType(Ty);
258 llvm::Type *LTy = OrigLTy;
259 if (getContext().getLangOpts().
HIP) {
260 LTy = coerceKernelArgumentType(
261 OrigLTy, getContext().getTargetAddressSpace(LangAS::Default),
262 getContext().getTargetAddressSpace(LangAS::cuda_device));
270 getContext().getTypeAlignInChars(Ty),
271 getContext().getTargetAddressSpace(LangAS::opencl_constant),
281ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
bool Variadic,
282 unsigned &NumRegsLeft)
const {
283 assert(NumRegsLeft <= MaxNumRegsForArgsRet &&
"register estimate underflow");
299 return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(),
322 bool ShouldPackToInt =
323 RD && containsOnlyPackableIntegerTypes(RD, getContext());
325 if (ShouldPackToInt) {
326 unsigned NumRegs = (
Size + 31) / 32;
327 NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
335 llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
340 if (NumRegsLeft > 0) {
341 uint64_t NumRegs = numRegsForType(Ty);
342 if (NumRegsLeft >= NumRegs) {
343 NumRegsLeft -= NumRegs;
351 getContext().getTypeAlignInChars(Ty),
352 getContext().getTargetAddressSpace(LangAS::opencl_private));
358 uint64_t NumRegs = numRegsForType(Ty);
359 NumRegsLeft -= std::min(NumRegs, uint64_t{NumRegsLeft});
365class AMDGPUTargetCodeGenInfo :
public TargetCodeGenInfo {
367 AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
368 : TargetCodeGenInfo(std::make_unique<AMDGPUABIInfo>(CGT)) {}
370 bool supportsLibCall()
const override {
return false; }
371 void setFunctionDeclAttributes(
const FunctionDecl *FD, llvm::Function *F,
372 CodeGenModule &CGM)
const;
374 void setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV,
375 CodeGen::CodeGenModule &M)
const override;
376 unsigned getDeviceKernelCallingConv()
const override;
378 llvm::Constant *getNullPointer(
const CodeGen::CodeGenModule &CGM,
379 llvm::PointerType *T, QualType QT)
const override;
381 LangAS getASTAllocaAddressSpace()
const override {
383 getABIInfo().getDataLayout().getAllocaAddrSpace());
386 LangAS getSRetAddrSpace(
const CXXRecordDecl *RD)
const override;
388 LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
389 const VarDecl *D)
const override;
390 StringRef getLLVMSyncScopeStr(
const LangOptions &LangOpts,
SyncScope Scope,
391 llvm::AtomicOrdering Ordering)
const override;
392 void setTargetAtomicMetadata(CodeGenFunction &CGF,
393 llvm::Instruction &AtomicInst,
394 const AtomicExpr *Expr =
nullptr)
const override;
395 llvm::Value *createEnqueuedBlockKernel(CodeGenFunction &CGF,
396 llvm::Function *BlockInvokeFunc,
397 llvm::Type *BlockTy)
const override;
398 bool shouldEmitStaticExternCAliases()
const override;
399 bool shouldEmitDWARFBitFieldSeparators()
const override;
405 llvm::GlobalValue *GV) {
406 if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility)
409 return !D->
hasAttr<OMPDeclareTargetDeclAttr>() &&
410 (D->
hasAttr<DeviceKernelAttr>() ||
413 (D->
hasAttr<CUDADeviceAttr>() || D->
hasAttr<CUDAConstantAttr>() ||
414 cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() ||
415 cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType())));
418void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
419 const FunctionDecl *FD, llvm::Function *F, CodeGenModule &M)
const {
420 const auto *ReqdWGS =
422 const bool IsOpenCLKernel =
426 const auto *FlatWGS = FD->
getAttr<AMDGPUFlatWorkGroupSizeAttr>();
427 if (ReqdWGS || FlatWGS) {
429 }
else if (IsOpenCLKernel || IsHIPKernel) {
432 const unsigned OpenCLDefaultMaxWorkGroupSize = 256;
433 const unsigned DefaultMaxWorkGroupSize =
434 IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize
436 std::string AttrVal =
437 std::string(
"1,") + llvm::utostr(DefaultMaxWorkGroupSize);
438 F->addFnAttr(
"amdgpu-flat-work-group-size", AttrVal);
441 if (
const auto *Attr = FD->
getAttr<AMDGPUWavesPerEUAttr>())
444 if (
const auto *Attr = FD->
getAttr<AMDGPUNumSGPRAttr>()) {
445 unsigned NumSGPR = Attr->getNumSGPR();
448 F->addFnAttr(
"amdgpu-num-sgpr", llvm::utostr(NumSGPR));
451 if (
const auto *Attr = FD->
getAttr<AMDGPUNumVGPRAttr>()) {
452 uint32_t NumVGPR = Attr->getNumVGPR();
455 F->addFnAttr(
"amdgpu-num-vgpr", llvm::utostr(NumVGPR));
458 if (
const auto *Attr = FD->
getAttr<AMDGPUMaxNumWorkGroupsAttr>()) {
459 uint32_t X = Attr->getMaxNumWorkGroupsX()
463 uint32_t Y = Attr->getMaxNumWorkGroupsY()
464 ? Attr->getMaxNumWorkGroupsY()
468 uint32_t Z = Attr->getMaxNumWorkGroupsZ()
469 ? Attr->getMaxNumWorkGroupsZ()
474 llvm::SmallString<32> AttrVal;
475 llvm::raw_svector_ostream
OS(AttrVal);
476 OS <<
X <<
',' << Y <<
',' << Z;
478 F->addFnAttr(
"amdgpu-max-num-workgroups", AttrVal.str());
481 if (
auto *Attr = FD->
getAttr<CUDAClusterDimsAttr>()) {
482 auto GetExprVal = [&](
const auto &E) {
483 return E ? E->EvaluateKnownConstInt(M.
getContext()).getExtValue() : 1;
485 unsigned X = GetExprVal(Attr->getX());
486 unsigned Y = GetExprVal(Attr->getY());
487 unsigned Z = GetExprVal(Attr->getZ());
488 llvm::SmallString<32> AttrVal;
489 llvm::raw_svector_ostream
OS(AttrVal);
490 OS <<
X <<
',' << Y <<
',' << Z;
491 F->addFnAttr(
"amdgpu-cluster-dims", AttrVal.str());
496 if ((IsOpenCLKernel &&
498 FD->
hasAttr<CUDANoClusterAttr>())
499 F->addFnAttr(
"amdgpu-cluster-dims",
"0,0,0");
502void AMDGPUTargetCodeGenInfo::setTargetAttributes(
503 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M)
const {
505 GV->setVisibility(llvm::GlobalValue::ProtectedVisibility);
506 GV->setDSOLocal(
true);
509 if (GV->isDeclaration())
512 llvm::Function *F = dyn_cast<llvm::Function>(GV);
516 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
518 setFunctionDeclAttributes(FD, F, M);
519 if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
520 F->addFnAttr(
"amdgpu-ieee",
"false");
521 if (getABIInfo().getCodeGenOpts().AMDGPUExpandWaitcntProfiling)
522 F->addFnAttr(
"amdgpu-expand-waitcnt-profiling");
525unsigned AMDGPUTargetCodeGenInfo::getDeviceKernelCallingConv()
const {
526 return llvm::CallingConv::AMDGPU_KERNEL;
534llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
535 const CodeGen::CodeGenModule &CGM, llvm::PointerType *PT,
538 return llvm::ConstantPointerNull::get(PT);
541 auto NPT = llvm::PointerType::get(
542 PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic));
543 return llvm::ConstantExpr::getAddrSpaceCast(
544 llvm::ConstantPointerNull::get(NPT), PT);
548AMDGPUTargetCodeGenInfo::getSRetAddrSpace(
const CXXRecordDecl *RD)
const {
552 return LangAS::Default;
553 return getASTAllocaAddressSpace();
557AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
558 const VarDecl *D)
const {
561 "Address space agnostic languages only");
565 return DefaultGlobalAS;
568 if (AddrSpace != LangAS::Default)
577 return DefaultGlobalAS;
580StringRef AMDGPUTargetCodeGenInfo::getLLVMSyncScopeStr(
581 const LangOptions &LangOpts,
SyncScope Scope,
582 llvm::AtomicOrdering Ordering)
const {
586 bool IsOneAs = (Scope >= SyncScope::OpenCLWorkGroup &&
587 Scope <= SyncScope::OpenCLSubGroup &&
588 Ordering != llvm::AtomicOrdering::SequentiallyConsistent);
591 case SyncScope::HIPSingleThread:
592 case SyncScope::SingleScope:
593 return IsOneAs ?
"singlethread-one-as" :
"singlethread";
594 case SyncScope::HIPWavefront:
595 case SyncScope::OpenCLSubGroup:
596 case SyncScope::WavefrontScope:
597 return IsOneAs ?
"wavefront-one-as" :
"wavefront";
598 case SyncScope::HIPCluster:
599 case SyncScope::ClusterScope:
600 assert(!IsOneAs &&
"OpenCL does not have cluster scope");
602 case SyncScope::HIPWorkgroup:
603 case SyncScope::OpenCLWorkGroup:
604 case SyncScope::WorkgroupScope:
605 return IsOneAs ?
"workgroup-one-as" :
"workgroup";
606 case SyncScope::HIPAgent:
607 case SyncScope::OpenCLDevice:
608 case SyncScope::DeviceScope:
609 return IsOneAs ?
"agent-one-as" :
"agent";
610 case SyncScope::SystemScope:
611 case SyncScope::HIPSystem:
612 case SyncScope::OpenCLAllSVMDevices:
613 return IsOneAs ?
"one-as" :
"";
615 llvm_unreachable(
"Unknown SyncScope enum");
618void AMDGPUTargetCodeGenInfo::setTargetAtomicMetadata(
619 CodeGenFunction &CGF, llvm::Instruction &AtomicInst,
620 const AtomicExpr *AE)
const {
621 auto *RMW = dyn_cast<llvm::AtomicRMWInst>(&AtomicInst);
622 auto *CmpX = dyn_cast<llvm::AtomicCmpXchgInst>(&AtomicInst);
629 if (((RMW && RMW->getPointerAddressSpace() == llvm::AMDGPUAS::FLAT_ADDRESS) ||
631 CmpX->getPointerAddressSpace() == llvm::AMDGPUAS::FLAT_ADDRESS)) &&
634 llvm::MDNode *ASRange = MDHelper.createRange(
635 llvm::APInt(32, llvm::AMDGPUAS::PRIVATE_ADDRESS),
636 llvm::APInt(32, llvm::AMDGPUAS::PRIVATE_ADDRESS + 1));
637 AtomicInst.setMetadata(llvm::LLVMContext::MD_noalias_addrspace, ASRange);
646 RMW->setMetadata(
"amdgpu.no.fine.grained.memory",
Empty);
648 RMW->setMetadata(
"amdgpu.no.remote.memory",
Empty);
650 RMW->getOperation() == llvm::AtomicRMWInst::FAdd &&
651 RMW->getType()->isFloatTy())
652 RMW->setMetadata(
"amdgpu.ignore.denormal.mode",
Empty);
655bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases()
const {
659bool AMDGPUTargetCodeGenInfo::shouldEmitDWARFBitFieldSeparators()
const {
663void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
664 const FunctionType *&FT)
const {
665 FT = getABIInfo().getContext().adjustFunctionType(
675static llvm::StructType *
677 llvm::Type *KernelDescriptorPtrTy) {
678 llvm::Type *Int32 = llvm::Type::getInt32Ty(
C);
679 return llvm::StructType::create(
C, {KernelDescriptorPtrTy, Int32, Int32},
680 "block.runtime.handle.t");
691llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
692 CodeGenFunction &CGF, llvm::Function *Invoke, llvm::Type *BlockTy)
const {
696 auto *InvokeFT = Invoke->getFunctionType();
697 llvm::SmallVector<llvm::Type *, 2> ArgTys;
698 llvm::SmallVector<llvm::Metadata *, 8> AddressQuals;
699 llvm::SmallVector<llvm::Metadata *, 8> AccessQuals;
700 llvm::SmallVector<llvm::Metadata *, 8> ArgTypeNames;
701 llvm::SmallVector<llvm::Metadata *, 8> ArgBaseTypeNames;
702 llvm::SmallVector<llvm::Metadata *, 8> ArgTypeQuals;
703 llvm::SmallVector<llvm::Metadata *, 8> ArgNames;
705 ArgTys.push_back(BlockTy);
706 ArgTypeNames.push_back(llvm::MDString::get(
C,
"__block_literal"));
707 AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0)));
708 ArgBaseTypeNames.push_back(llvm::MDString::get(
C,
"__block_literal"));
709 ArgTypeQuals.push_back(llvm::MDString::get(
C,
""));
710 AccessQuals.push_back(llvm::MDString::get(
C,
"none"));
711 ArgNames.push_back(llvm::MDString::get(
C,
"block_literal"));
712 for (
unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) {
713 ArgTys.push_back(InvokeFT->getParamType(I));
714 ArgTypeNames.push_back(llvm::MDString::get(
C,
"void*"));
715 AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3)));
716 AccessQuals.push_back(llvm::MDString::get(
C,
"none"));
717 ArgBaseTypeNames.push_back(llvm::MDString::get(
C,
"void*"));
718 ArgTypeQuals.push_back(llvm::MDString::get(
C,
""));
720 llvm::MDString::get(
C, (Twine(
"local_arg") + Twine(I)).str()));
724 const llvm::DataLayout &DL = Mod.getDataLayout();
726 llvm::Twine Name = Invoke->getName() +
"_kernel";
727 auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(
C), ArgTys,
false);
731 auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name,
733 F->setCallingConv(getDeviceKernelCallingConv());
735 llvm::AttrBuilder KernelAttrs(
C);
739 F->addFnAttrs(KernelAttrs);
741 auto IP = CGF.
Builder.saveIP();
742 auto *BB = llvm::BasicBlock::Create(
C,
"entry", F);
743 Builder.SetInsertPoint(BB);
744 const auto BlockAlign = DL.getPrefTypeAlign(BlockTy);
745 auto *BlockPtr = Builder.CreateAlloca(BlockTy,
nullptr);
746 BlockPtr->setAlignment(BlockAlign);
747 Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign);
748 auto *
Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0));
749 llvm::SmallVector<llvm::Value *, 2> Args;
750 Args.push_back(Cast);
751 for (llvm::Argument &A : llvm::drop_begin(F->args()))
753 llvm::CallInst *call = Builder.CreateCall(Invoke, Args);
754 call->setCallingConv(Invoke->getCallingConv());
755 Builder.CreateRetVoid();
756 Builder.restoreIP(IP);
758 F->setMetadata(
"kernel_arg_addr_space", llvm::MDNode::get(
C, AddressQuals));
759 F->setMetadata(
"kernel_arg_access_qual", llvm::MDNode::get(
C, AccessQuals));
760 F->setMetadata(
"kernel_arg_type", llvm::MDNode::get(
C, ArgTypeNames));
761 F->setMetadata(
"kernel_arg_base_type",
762 llvm::MDNode::get(
C, ArgBaseTypeNames));
763 F->setMetadata(
"kernel_arg_type_qual", llvm::MDNode::get(
C, ArgTypeQuals));
765 F->setMetadata(
"kernel_arg_name", llvm::MDNode::get(
C, ArgNames));
768 C, llvm::PointerType::get(
C, DL.getDefaultGlobalsAddressSpace()));
769 llvm::Constant *RuntimeHandleInitializer =
770 llvm::ConstantAggregateZero::get(HandleTy);
772 llvm::Twine RuntimeHandleName = F->getName() +
".runtime.handle";
783 auto *RuntimeHandle =
new llvm::GlobalVariable(
785 true, llvm::GlobalValue::InternalLinkage,
786 RuntimeHandleInitializer, RuntimeHandleName,
787 nullptr, llvm::GlobalValue::NotThreadLocal,
788 DL.getDefaultGlobalsAddressSpace(),
791 llvm::MDNode *HandleAsMD =
792 llvm::MDNode::get(
C, llvm::ValueAsMetadata::get(RuntimeHandle));
793 F->setMetadata(llvm::LLVMContext::MD_associated, HandleAsMD);
795 RuntimeHandle->setSection(
".amdgpu.kernel.runtime.handle");
799 return RuntimeHandle;
803 llvm::Function *F,
const AMDGPUFlatWorkGroupSizeAttr *FlatWGS,
804 const ReqdWorkGroupSizeAttr *ReqdWGS, int32_t *MinThreadsVal,
805 int32_t *MaxThreadsVal) {
808 auto Eval = [&](
Expr *E) {
809 return E->EvaluateKnownConstInt(
getContext()).getExtValue();
812 Min = Eval(FlatWGS->getMin());
813 Max = Eval(FlatWGS->getMax());
815 if (ReqdWGS &&
Min == 0 &&
Max == 0)
816 Min =
Max = Eval(ReqdWGS->getXDim()) * Eval(ReqdWGS->getYDim()) *
817 Eval(ReqdWGS->getZDim());
820 assert(
Min <=
Max &&
"Min must be less than or equal Max");
823 *MinThreadsVal =
Min;
825 *MaxThreadsVal =
Max;
826 std::string AttrVal = llvm::utostr(
Min) +
"," + llvm::utostr(
Max);
828 F->addFnAttr(
"amdgpu-flat-work-group-size", AttrVal);
830 assert(
Max == 0 &&
"Max must be zero");
834 llvm::Function *F,
const AMDGPUWavesPerEUAttr *
Attr) {
836 Attr->getMin()->EvaluateKnownConstInt(
getContext()).getExtValue();
839 ?
Attr->getMax()->EvaluateKnownConstInt(
getContext()).getExtValue()
843 assert((
Max == 0 ||
Min <=
Max) &&
"Min must be less than or equal Max");
845 std::string AttrVal = llvm::utostr(
Min);
847 AttrVal = AttrVal +
"," + llvm::utostr(
Max);
848 F->addFnAttr(
"amdgpu-waves-per-eu", AttrVal);
850 assert(
Max == 0 &&
"Max must be zero");
853std::unique_ptr<TargetCodeGenInfo>
855 return std::make_unique<AMDGPUTargetCodeGenInfo>(CGM.
getTypes());
static void setCUDAKernelCallingConvention(CanQualType &FTy, CodeGenModule &CGM, const FunctionDecl *FD)
Set calling convention for CUDA/HIP kernel.
static bool requiresAMDGPUProtectedVisibility(const Decl *D, llvm::GlobalValue *GV)
static llvm::StructType * getAMDGPURuntimeHandleType(llvm::LLVMContext &C, llvm::Type *KernelDescriptorPtrTy)
Return IR struct type for rtinfo struct in rocm-device-libs used for device enqueue.
Defines the C++ Decl subclasses, other than those for templates (found in DeclTemplate....
const ConstantArrayType * getAsConstantArrayType(QualType T) const
uint64_t getTargetNullPointerValue(QualType QT) const
Get target-dependent integer value for null pointer which is used for constant folding.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
const TargetInfo & getTargetInfo() const
unsigned getTargetAddressSpace(LangAS AS) const
bool threadPrivateMemoryAtomicsAreUndefined() const
Return true if atomics operations targeting allocations in private memory are undefined.
Attr - This represents one attribute.
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
static ABIArgInfo getIgnore()
static ABIArgInfo getDirect(llvm::Type *T=nullptr, unsigned Offset=0, llvm::Type *Padding=nullptr, bool CanBeFlattened=true, unsigned Align=0)
static ABIArgInfo getIndirectAliased(CharUnits Alignment, unsigned AddrSpace, bool Realign=false, llvm::Type *Padding=nullptr)
Pass this in memory using the IR byref attribute.
@ RAA_DirectInMemory
Pass it on the stack using its defined layout.
ABIArgInfo & getReturnInfo()
unsigned getCallingConvention() const
getCallingConvention - Return the user specified calling convention, which has been translated into a...
CanQualType getReturnType() const
MutableArrayRef< ArgInfo > arguments()
unsigned getNumRequiredArgs() const
llvm::LLVMContext & getLLVMContext()
This class organizes the cross-function state that is used while generating LLVM code.
llvm::Module & getModule() const
void handleAMDGPUWavesPerEUAttr(llvm::Function *F, const AMDGPUWavesPerEUAttr *A)
Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to F.
const LangOptions & getLangOpts() const
CodeGenTypes & getTypes()
const TargetInfo & getTarget() const
void addUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.used metadata.
void handleAMDGPUFlatWorkGroupSizeAttr(llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A, const ReqdWorkGroupSizeAttr *ReqdWGS=nullptr, int32_t *MinThreadsVal=nullptr, int32_t *MaxThreadsVal=nullptr)
Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute to F.
AtomicOptions getAtomicOpts()
Get the current Atomic options.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
void addDefaultFunctionDefinitionAttributes(llvm::AttrBuilder &attrs)
Like the overload taking a Function &, but intended specifically for frontends that want to build on ...
DefaultABIInfo - The default implementation for ABI specific details.
ABIArgInfo classifyArgumentType(QualType RetTy) const
ABIArgInfo classifyReturnType(QualType RetTy) const
Decl - This represents one declaration (or definition), e.g.
This represents one expression.
ExtInfo withCallingConv(CallingConv cc) const
ExtInfo getExtInfo() const
A (possibly-)qualified type.
LangAS getAddressSpace() const
Return the address space of this type.
bool isConstantStorage(const ASTContext &Ctx, bool ExcludeCtor, bool ExcludeDtor)
bool canPassInRegisters() const
Determine whether this class can be passed in registers.
bool hasFlexibleArrayMember() const
field_range fields() const
TargetOptions & getTargetOpts() const
Retrieve the target options.
virtual std::optional< LangAS > getConstantAddressSpace() const
Return an AST address space which can be used opportunistically for constant global memory.
virtual bool hasFeatureEnabled(const llvm::StringMap< bool > &Features, StringRef Name) const
Check if target has a given feature enabled.
llvm::StringMap< bool > FeatureMap
The map of which features have been enabled disabled based on the command line.
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
bool isFloatingType() const
const T * getAs() const
Member-template getAs<specific type>'.
bool hasConstantInitialization() const
Determine whether this variable has constant initialization.
ABIArgInfo classifyArgumentType(CodeGenModule &CGM, CanQualType type)
Classify the rules for how to pass a particular type.
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT, CGCXXABI &CXXABI)
bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI, const ABIInfo &Info)
std::unique_ptr< TargetCodeGenInfo > createAMDGPUTargetCodeGenInfo(CodeGenModule &CGM)
RValue emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType ValueTy, bool IsIndirect, TypeInfoChars ValueInfo, CharUnits SlotSizeAndAlign, bool AllowHigherAlign, AggValueSlot Slot, bool ForceRightAdjust=false)
Emit va_arg for a platform using the common void* representation, where arguments are simply emitted ...
bool isAggregateTypeForABI(QualType T)
const Type * isSingleElementStruct(QualType T, ASTContext &Context)
isSingleElementStruct - Determine if a structure is a "singleelement struct", i.e.
QualType useFirstFieldIfTransparentUnion(QualType Ty)
Pass transparent unions as if they were the type of the first element.
bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays, bool AsIfNoUniqueAddr=false)
isEmptyRecord - Return true iff a structure contains only empty fields.
@ OS
Indicates that the tracking object is a descendant of a referenced-counted OSObject,...
bool Cast(InterpState &S, CodePtr OpPC)
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
@ Type
The name was classified as a type.
LangAS
Defines the address space values used by the address space qualifier of QualType.
SyncScope
Defines sync scope values used internally by clang.
U cast(CodeGen::Address addr)
LangAS getLangASFromTargetAS(unsigned TargetAS)
bool getOption(AtomicOptionKind Kind) const