10#include "TargetInfo.h"
12#include "llvm/ADT/STLExtras.h"
13#include "llvm/ADT/StringExtras.h"
14#include "llvm/IR/CallingConv.h"
15#include "llvm/IR/IntrinsicsNVPTX.h"
16#include "llvm/Support/NVVMAttributes.h"
27class NVPTXTargetCodeGenInfo;
29class NVPTXABIInfo :
public ABIInfo {
30 NVPTXTargetCodeGenInfo &CGInfo;
33 NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
34 : ABIInfo(CGT), CGInfo(Info) {}
39 void computeInfo(CGFunctionInfo &FI)
const override;
40 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
41 AggValueSlot Slot)
const override;
42 bool isUnsupportedType(QualType T)
const;
43 ABIArgInfo coerceToIntArrayWithLimit(QualType Ty,
unsigned MaxSize)
const;
48 NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
49 : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}
51 void setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV,
52 CodeGen::CodeGenModule &M)
const override;
53 bool shouldEmitStaticExternCAliases()
const override;
55 StringRef getLLVMSyncScopeStr(
const LangOptions &LangOpts,
SyncScope Scope,
56 llvm::AtomicOrdering Ordering)
const override;
58 llvm::Constant *getNullPointer(
const CodeGen::CodeGenModule &CGM,
60 QualType QT)
const override;
62 llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType()
const override {
65 return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
68 llvm::Type *getCUDADeviceBuiltinTextureDeviceType()
const override {
71 return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
74 bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
75 LValue Src)
const override {
76 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
80 bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
81 LValue Src)
const override {
82 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
86 unsigned getDeviceKernelCallingConv()
const override {
87 return llvm::CallingConv::PTX_Kernel;
92 static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
96 static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
98 llvm::Value *Handle =
nullptr;
100 llvm::dyn_cast<llvm::Constant>(Src.getAddress().emitRawPointer(CGF));
102 if (
auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(
C))
103 C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
104 if (
auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(
C)) {
110 {GV},
"texsurf_handle");
118bool NVPTXABIInfo::isUnsupportedType(
QualType T)
const {
119 ASTContext &Context = getContext();
126 if (
const auto *EIT = T->
getAs<BitIntType>())
127 return EIT->getNumBits() >
133 return isUnsupportedType(AT->getElementType());
139 if (
const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
140 for (
const CXXBaseSpecifier &I : CXXRD->bases())
141 if (isUnsupportedType(I.getType()))
144 for (
const FieldDecl *I : RD->fields())
145 if (isUnsupportedType(I->getType()))
151ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
152 unsigned MaxSize)
const {
155 const uint64_t Alignment = getContext().getTypeAlign(Ty);
156 const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
157 llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
162ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy)
const {
166 if (getContext().getLangOpts().OpenMP &&
167 getContext().getLangOpts().OpenMPIsTargetDevice &&
168 isUnsupportedType(RetTy))
169 return coerceToIntArrayWithLimit(RetTy, 64);
177 RetTy = ED->getIntegerType();
183ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty)
const {
186 Ty = ED->getIntegerType();
192 if (getContext().getLangOpts().CUDAIsDevice) {
195 CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
198 CGInfo.getCUDADeviceBuiltinTextureDeviceType());
200 return getNaturalAlignIndirect(
201 Ty, getDataLayout().getAllocaAddrSpace(),
205 if (
const auto *EIT = Ty->
getAs<BitIntType>()) {
206 if ((EIT->getNumBits() > 128) ||
207 (!getContext().getTargetInfo().hasInt128Type() &&
208 EIT->getNumBits() > 64))
209 return getNaturalAlignIndirect(
210 Ty, getDataLayout().getAllocaAddrSpace(),
218void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI)
const {
222 for (
auto &&[ArgumentsCount, I] : llvm::enumerate(FI.
arguments()))
225 : ABIArgInfo::getDirect();
234RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
235 QualType Ty, AggValueSlot Slot)
const {
237 getContext().getTypeInfoInChars(Ty),
242void NVPTXTargetCodeGenInfo::setTargetAttributes(
243 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M)
const {
244 if (GV->isDeclaration())
246 const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
250 addNVVMMetadata(GV,
"surface", 1);
252 addNVVMMetadata(GV,
"texture", 1);
257 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
267 if (FD->
hasAttr<DeviceKernelAttr>() || FD->
hasAttr<CUDAGlobalAttr>()) {
270 F->addFnAttr(llvm::Attribute::NoInline);
271 if (FD->
hasAttr<CUDAGlobalAttr>()) {
272 F->setCallingConv(getDeviceKernelCallingConv());
274 for (
auto IV : llvm::enumerate(FD->
parameters()))
275 if (IV.value()->hasAttr<CUDAGridConstantAttr>())
276 F->addParamAttr(IV.index(),
277 llvm::Attribute::get(F->getContext(),
278 llvm::NVVMAttr::GridConstant));
280 if (CUDALaunchBoundsAttr *Attr = FD->
getAttr<CUDALaunchBoundsAttr>())
286void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
287 StringRef Name,
int Operand) {
288 llvm::Module *M = GV->getParent();
292 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(
"nvvm.annotations");
294 SmallVector<llvm::Metadata *, 5> MDVals = {
295 llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
296 llvm::ConstantAsMetadata::get(
297 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
300 MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
303bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases()
const {
307StringRef NVPTXTargetCodeGenInfo::getLLVMSyncScopeStr(
308 const LangOptions &LangOpts,
SyncScope Scope,
309 llvm::AtomicOrdering Ordering)
const {
311 case SyncScope::HIPSingleThread:
312 case SyncScope::SingleScope:
313 return "singlethread";
314 case SyncScope::HIPWavefront:
315 case SyncScope::OpenCLSubGroup:
316 case SyncScope::WavefrontScope:
317 case SyncScope::HIPWorkgroup:
318 case SyncScope::OpenCLWorkGroup:
319 case SyncScope::WorkgroupScope:
321 case SyncScope::HIPCluster:
322 case SyncScope::ClusterScope:
324 case SyncScope::HIPAgent:
325 case SyncScope::OpenCLDevice:
326 case SyncScope::DeviceScope:
328 case SyncScope::SystemScope:
329 case SyncScope::HIPSystem:
330 case SyncScope::OpenCLAllSVMDevices:
333 llvm_unreachable(
"Unknown SyncScope enum");
337NVPTXTargetCodeGenInfo::getNullPointer(
const CodeGen::CodeGenModule &CGM,
338 llvm::PointerType *PT,
341 if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(LangAS::opencl_local))
342 return llvm::ConstantPointerNull::get(PT);
344 auto NPT = llvm::PointerType::get(
345 PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic));
346 return llvm::ConstantExpr::getAddrSpaceCast(
347 llvm::ConstantPointerNull::get(NPT), PT);
352 const CUDALaunchBoundsAttr *
Attr,
353 int32_t *MaxThreadsVal,
354 int32_t *MinBlocksVal,
355 int32_t *MaxClusterRankVal) {
356 llvm::APSInt MaxThreads(32);
357 MaxThreads =
Attr->getMaxThreads()->EvaluateKnownConstInt(
getContext());
358 if (MaxThreads > 0) {
360 *MaxThreadsVal = MaxThreads.getExtValue();
362 F->addFnAttr(llvm::NVVMAttr::MaxNTID,
363 llvm::utostr(MaxThreads.getExtValue()));
369 if (
Attr->getMinBlocks()) {
370 llvm::APSInt MinBlocks(32);
371 MinBlocks =
Attr->getMinBlocks()->EvaluateKnownConstInt(
getContext());
374 *MinBlocksVal = MinBlocks.getExtValue();
376 F->addFnAttr(llvm::NVVMAttr::MinCTASm,
377 llvm::utostr(MinBlocks.getExtValue()));
380 if (
Attr->getMaxBlocks()) {
381 llvm::APSInt MaxBlocks(32);
382 MaxBlocks =
Attr->getMaxBlocks()->EvaluateKnownConstInt(
getContext());
384 if (MaxClusterRankVal)
385 *MaxClusterRankVal = MaxBlocks.getExtValue();
387 F->addFnAttr(llvm::NVVMAttr::MaxClusterRank,
388 llvm::utostr(MaxBlocks.getExtValue()));
393std::unique_ptr<TargetCodeGenInfo>
395 return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.
getTypes());
Provides definitions for the atomic synchronization scopes.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
const TargetInfo & getTargetInfo() const
Attr - This represents one attribute.
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
static ABIArgInfo getIgnore()
static ABIArgInfo getDirect(llvm::Type *T=nullptr, unsigned Offset=0, llvm::Type *Padding=nullptr, bool CanBeFlattened=true, unsigned Align=0)
static ABIArgInfo getExtend(QualType Ty, llvm::Type *T=nullptr)
ABIInfo - Target specific hooks for defining how a type should be passed or returned from functions.
ABIArgInfo & getReturnInfo()
unsigned getCallingConvention() const
getCallingConvention - Return the user specified calling convention, which has been translated into a...
CanQualType getReturnType() const
MutableArrayRef< ArgInfo > arguments()
void setEffectiveCallingConvention(unsigned Value)
unsigned getNumRequiredArgs() const
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
This class organizes the cross-function state that is used while generating LLVM code.
void handleCUDALaunchBoundsAttr(llvm::Function *F, const CUDALaunchBoundsAttr *A, int32_t *MaxThreadsVal=nullptr, int32_t *MinBlocksVal=nullptr, int32_t *MaxClusterRankVal=nullptr)
Emit the IR encoding to attach the CUDA launch bounds attribute to F.
const LangOptions & getLangOpts() const
CodeGenTypes & getTypes()
ASTContext & getContext() const
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
TargetCodeGenInfo - This class organizes various target-specific codegeneration issues,...
ArrayRef< ParmVarDecl * > parameters() const
A (possibly-)qualified type.
virtual bool hasInt128Type() const
Determine whether the __int128 type is supported on this target.
virtual bool hasFloat16Type() const
Determine whether the _Float16 type is supported on this target.
virtual bool hasFloat128Type() const
Determine whether the __float128 type is supported on this target.
bool isFloat16Type() const
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
bool isScalarType() const
bool isFloat128Type() const
bool isCUDADeviceBuiltinSurfaceType() const
Check if the type is the CUDA device builtin surface type.
bool isCUDADeviceBuiltinTextureType() const
Check if the type is the CUDA device builtin texture type.
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
EnumDecl * getAsEnumDecl() const
Retrieves the EnumDecl this type refers to.
bool isRealFloatingType() const
Floating point categories.
const T * getAs() const
Member-template getAs<specific type>'.
ABIArgInfo classifyArgumentType(CodeGenModule &CGM, CanQualType type)
Classify the rules for how to pass a particular type.
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI, const ABIInfo &Info)
std::unique_ptr< TargetCodeGenInfo > createNVPTXTargetCodeGenInfo(CodeGenModule &CGM)
RValue emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType ValueTy, bool IsIndirect, TypeInfoChars ValueInfo, CharUnits SlotSizeAndAlign, bool AllowHigherAlign, AggValueSlot Slot, bool ForceRightAdjust=false)
Emit va_arg for a platform using the common void* representation, where arguments are simply emitted ...
bool isAggregateTypeForABI(QualType T)
bool Div(InterpState &S, CodePtr OpPC)
1) Pops the RHS from the stack.
The JSON file list parser is used to communicate input to InstallAPI.
SyncScope
Defines sync scope values used internally by clang.
U cast(CodeGen::Address addr)