clang 19.0.0git
NVPTX.cpp
Go to the documentation of this file.
1//===- NVPTX.cpp ----------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ABIInfoImpl.h"
10#include "TargetInfo.h"
11#include "llvm/IR/IntrinsicsNVPTX.h"
12
13using namespace clang;
14using namespace clang::CodeGen;
15
16//===----------------------------------------------------------------------===//
17// NVPTX ABI Implementation
18//===----------------------------------------------------------------------===//
19
20namespace {
21
22class NVPTXTargetCodeGenInfo;
23
24class NVPTXABIInfo : public ABIInfo {
25 NVPTXTargetCodeGenInfo &CGInfo;
26
27public:
28 NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
29 : ABIInfo(CGT), CGInfo(Info) {}
30
33
34 void computeInfo(CGFunctionInfo &FI) const override;
36 QualType Ty) const override;
37 bool isUnsupportedType(QualType T) const;
38 ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
39};
40
41class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
42public:
43 NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
44 : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}
45
46 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
47 CodeGen::CodeGenModule &M) const override;
48 bool shouldEmitStaticExternCAliases() const override;
49
50 llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
51 llvm::PointerType *T,
52 QualType QT) const override;
53
54 llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
55 // On the device side, surface reference is represented as an object handle
56 // in 64-bit integer.
57 return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
58 }
59
60 llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
61 // On the device side, texture reference is represented as an object handle
62 // in 64-bit integer.
63 return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
64 }
65
67 LValue Src) const override {
68 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
69 return true;
70 }
71
73 LValue Src) const override {
74 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
75 return true;
76 }
77
78 // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
79 // resulting MDNode to the nvvm.annotations MDNode.
80 static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
81 int Operand);
82
83private:
84 static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
85 LValue Src) {
86 llvm::Value *Handle = nullptr;
87 llvm::Constant *C =
88 llvm::dyn_cast<llvm::Constant>(Src.getAddress(CGF).emitRawPointer(CGF));
89 // Lookup `addrspacecast` through the constant pointer if any.
90 if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
91 C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
92 if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
93 // Load the handle from the specific global variable using
94 // `nvvm.texsurf.handle.internal` intrinsic.
95 Handle = CGF.EmitRuntimeCall(
96 CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
97 {GV->getType()}),
98 {GV}, "texsurf_handle");
99 } else
100 Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
101 CGF.EmitStoreOfScalar(Handle, Dst);
102 }
103};
104
105/// Checks if the type is unsupported directly by the current target.
106bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
107 ASTContext &Context = getContext();
108 if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
109 return true;
110 if (!Context.getTargetInfo().hasFloat128Type() &&
111 (T->isFloat128Type() ||
112 (T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
113 return true;
114 if (const auto *EIT = T->getAs<BitIntType>())
115 return EIT->getNumBits() >
116 (Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
117 if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
118 Context.getTypeSize(T) > 64U)
119 return true;
120 if (const auto *AT = T->getAsArrayTypeUnsafe())
121 return isUnsupportedType(AT->getElementType());
122 const auto *RT = T->getAs<RecordType>();
123 if (!RT)
124 return false;
125 const RecordDecl *RD = RT->getDecl();
126
127 // If this is a C++ record, check the bases first.
128 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
129 for (const CXXBaseSpecifier &I : CXXRD->bases())
130 if (isUnsupportedType(I.getType()))
131 return true;
132
133 for (const FieldDecl *I : RD->fields())
134 if (isUnsupportedType(I->getType()))
135 return true;
136 return false;
137}
138
139/// Coerce the given type into an array with maximum allowed size of elements.
140ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
141 unsigned MaxSize) const {
142 // Alignment and Size are measured in bits.
143 const uint64_t Size = getContext().getTypeSize(Ty);
144 const uint64_t Alignment = getContext().getTypeAlign(Ty);
145 const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
146 llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
147 const uint64_t NumElements = (Size + Div - 1) / Div;
148 return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
149}
150
151ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
152 if (RetTy->isVoidType())
153 return ABIArgInfo::getIgnore();
154
155 if (getContext().getLangOpts().OpenMP &&
156 getContext().getLangOpts().OpenMPIsTargetDevice &&
157 isUnsupportedType(RetTy))
158 return coerceToIntArrayWithLimit(RetTy, 64);
159
160 // note: this is different from default ABI
161 if (!RetTy->isScalarType())
162 return ABIArgInfo::getDirect();
163
164 // Treat an enum type as its underlying type.
165 if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
166 RetTy = EnumTy->getDecl()->getIntegerType();
167
168 return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
170}
171
172ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
173 // Treat an enum type as its underlying type.
174 if (const EnumType *EnumTy = Ty->getAs<EnumType>())
175 Ty = EnumTy->getDecl()->getIntegerType();
176
177 // Return aggregates type as indirect by value
178 if (isAggregateTypeForABI(Ty)) {
179 // Under CUDA device compilation, tex/surf builtin types are replaced with
180 // object types and passed directly.
181 if (getContext().getLangOpts().CUDAIsDevice) {
184 CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
187 CGInfo.getCUDADeviceBuiltinTextureDeviceType());
188 }
189 return getNaturalAlignIndirect(Ty, /* byval */ true);
190 }
191
192 if (const auto *EIT = Ty->getAs<BitIntType>()) {
193 if ((EIT->getNumBits() > 128) ||
194 (!getContext().getTargetInfo().hasInt128Type() &&
195 EIT->getNumBits() > 64))
196 return getNaturalAlignIndirect(Ty, /* byval */ true);
197 }
198
199 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
201}
202
203void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
204 if (!getCXXABI().classifyReturnType(FI))
206 for (auto &I : FI.arguments())
207 I.info = classifyArgumentType(I.type);
208
209 // Always honor user-specified calling convention.
210 if (FI.getCallingConvention() != llvm::CallingConv::C)
211 return;
212
213 FI.setEffectiveCallingConvention(getRuntimeCC());
214}
215
216Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
217 QualType Ty) const {
218 llvm_unreachable("NVPTX does not support varargs");
219}
220
221void NVPTXTargetCodeGenInfo::setTargetAttributes(
222 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
223 if (GV->isDeclaration())
224 return;
225 const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
226 if (VD) {
227 if (M.getLangOpts().CUDA) {
229 addNVVMMetadata(GV, "surface", 1);
230 else if (VD->getType()->isCUDADeviceBuiltinTextureType())
231 addNVVMMetadata(GV, "texture", 1);
232 return;
233 }
234 }
235
236 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
237 if (!FD) return;
238
239 llvm::Function *F = cast<llvm::Function>(GV);
240
241 // Perform special handling in OpenCL mode
242 if (M.getLangOpts().OpenCL) {
243 // Use OpenCL function attributes to check for kernel functions
244 // By default, all functions are device functions
245 if (FD->hasAttr<OpenCLKernelAttr>()) {
246 // OpenCL __kernel functions get kernel metadata
247 // Create !{<func-ref>, metadata !"kernel", i32 1} node
248 addNVVMMetadata(F, "kernel", 1);
249 // And kernel functions are not subject to inlining
250 F->addFnAttr(llvm::Attribute::NoInline);
251 }
252 }
253
254 // Perform special handling in CUDA mode.
255 if (M.getLangOpts().CUDA) {
256 // CUDA __global__ functions get a kernel metadata entry. Since
257 // __global__ functions cannot be called from the device, we do not
258 // need to set the noinline attribute.
259 if (FD->hasAttr<CUDAGlobalAttr>()) {
260 // Create !{<func-ref>, metadata !"kernel", i32 1} node
261 addNVVMMetadata(F, "kernel", 1);
262 }
263 if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
265 }
266
267 // Attach kernel metadata directly if compiling for NVPTX.
268 if (FD->hasAttr<NVPTXKernelAttr>()) {
269 addNVVMMetadata(F, "kernel", 1);
270 }
271}
272
273void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
274 StringRef Name, int Operand) {
275 llvm::Module *M = GV->getParent();
276 llvm::LLVMContext &Ctx = M->getContext();
277
278 // Get "nvvm.annotations" metadata node
279 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
280
281 llvm::Metadata *MDVals[] = {
282 llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
283 llvm::ConstantAsMetadata::get(
284 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
285 // Append metadata to nvvm.annotations
286 MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
287}
288
289bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
290 return false;
291}
292
293llvm::Constant *
294NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
295 llvm::PointerType *PT,
296 QualType QT) const {
297 auto &Ctx = CGM.getContext();
298 if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(LangAS::opencl_local))
299 return llvm::ConstantPointerNull::get(PT);
300
301 auto NPT = llvm::PointerType::get(
302 PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic));
303 return llvm::ConstantExpr::getAddrSpaceCast(
304 llvm::ConstantPointerNull::get(NPT), PT);
305}
306}
307
309 const CUDALaunchBoundsAttr *Attr,
310 int32_t *MaxThreadsVal,
311 int32_t *MinBlocksVal,
312 int32_t *MaxClusterRankVal) {
313 // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
314 llvm::APSInt MaxThreads(32);
315 MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext());
316 if (MaxThreads > 0) {
317 if (MaxThreadsVal)
318 *MaxThreadsVal = MaxThreads.getExtValue();
319 if (F) {
320 // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
321 NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx",
322 MaxThreads.getExtValue());
323 }
324 }
325
326 // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
327 // was not specified in __launch_bounds__ or if the user specified a 0 value,
328 // we don't have to add a PTX directive.
329 if (Attr->getMinBlocks()) {
330 llvm::APSInt MinBlocks(32);
331 MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext());
332 if (MinBlocks > 0) {
333 if (MinBlocksVal)
334 *MinBlocksVal = MinBlocks.getExtValue();
335 if (F) {
336 // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
337 NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
338 MinBlocks.getExtValue());
339 }
340 }
341 }
342 if (Attr->getMaxBlocks()) {
343 llvm::APSInt MaxBlocks(32);
344 MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
345 if (MaxBlocks > 0) {
346 if (MaxClusterRankVal)
347 *MaxClusterRankVal = MaxBlocks.getExtValue();
348 if (F) {
349 // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
350 NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank",
351 MaxBlocks.getExtValue());
352 }
353 }
354 }
355}
356
357std::unique_ptr<TargetCodeGenInfo>
359 return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes());
360}
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:182
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2329
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:757
Attr - This represents one attribute.
Definition: Attr.h:42
A fixed int type of a specified bitwidth.
Definition: Type.h:7032
Represents a base class of a C++ class.
Definition: DeclCXX.h:146
Represents a C++ struct/union/class.
Definition: DeclCXX.h:258
ABIArgInfo - Helper class to encapsulate information about how a specific C type should be passed to ...
static ABIArgInfo getIgnore()
static ABIArgInfo getDirect(llvm::Type *T=nullptr, unsigned Offset=0, llvm::Type *Padding=nullptr, bool CanBeFlattened=true, unsigned Align=0)
static ABIArgInfo getExtend(QualType Ty, llvm::Type *T=nullptr)
ABIInfo - Target specific hooks for defining how a type should be passed or returned from functions.
Definition: ABIInfo.h:45
virtual CodeGen::Address EmitVAArg(CodeGen::CodeGenFunction &CGF, CodeGen::Address VAListAddr, QualType Ty) const =0
EmitVAArg - Emit the target dependent code to load a value of.
virtual void computeInfo(CodeGen::CGFunctionInfo &FI) const =0
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:111
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:220
CGFunctionInfo - Class to encapsulate the information about a function definition.
unsigned getCallingConvention() const
getCallingConvention - Return the user specified calling convention, which has been translated into a...
CanQualType getReturnType() const
MutableArrayRef< ArgInfo > arguments()
void setEffectiveCallingConvention(unsigned Value)
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
This class organizes the cross-function state that is used while generating LLVM code.
void handleCUDALaunchBoundsAttr(llvm::Function *F, const CUDALaunchBoundsAttr *A, int32_t *MaxThreadsVal=nullptr, int32_t *MinBlocksVal=nullptr, int32_t *MaxClusterRankVal=nullptr)
Emit the IR encoding to attach the CUDA launch bounds attribute to F.
Definition: NVPTX.cpp:308
const LangOptions & getLangOpts() const
ASTContext & getContext() const
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys=std::nullopt)
This class organizes the cross-module state that is used while lowering AST types to LLVM types.
Definition: CodeGenTypes.h:54
LValue - This represents an lvalue references.
Definition: CGValue.h:181
Address getAddress(CodeGenFunction &CGF) const
Definition: CGValue.h:370
TargetCodeGenInfo - This class organizes various target-specific codegeneration issues,...
Definition: TargetInfo.h:46
virtual bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst, LValue Src) const
Emit the device-side copy of the builtin surface type.
Definition: TargetInfo.h:396
virtual bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst, LValue Src) const
Emit the device-side copy of the builtin texture type.
Definition: TargetInfo.h:403
virtual llvm::Type * getCUDADeviceBuiltinSurfaceDeviceType() const
Return the device-side type for the CUDA device builtin surface type.
Definition: TargetInfo.h:379
const T & getABIInfo() const
Definition: TargetInfo.h:56
virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const
setTargetAttributes - Provides a convenient hook to handle extra target-specific attributes for the g...
Definition: TargetInfo.h:75
virtual llvm::Type * getCUDADeviceBuiltinTextureDeviceType() const
Return the device-side type for the CUDA device builtin texture type.
Definition: TargetInfo.h:384
virtual llvm::Constant * getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::PointerType *T, QualType QT) const
Get target specific null pointer.
Definition: TargetInfo.cpp:119
virtual bool shouldEmitStaticExternCAliases() const
Definition: TargetInfo.h:369
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:85
T * getAttr() const
Definition: DeclBase.h:580
bool hasAttr() const
Definition: DeclBase.h:584
A helper class that allows the use of isa/cast/dyncast to detect TagType objects of enums.
Definition: Type.h:5365
Represents a member of a struct/union/class.
Definition: Decl.h:3058
Represents a function declaration or definition.
Definition: Decl.h:1971
A (possibly-)qualified type.
Definition: Type.h:738
Represents a struct/union/class.
Definition: Decl.h:4169
field_range fields() const
Definition: Decl.h:4375
A helper class that allows the use of isa/cast/dyncast to detect TagType objects of structs/unions/cl...
Definition: Type.h:5339
Encodes a location in the source.
virtual bool hasInt128Type() const
Determine whether the __int128 type is supported on this target.
Definition: TargetInfo.h:634
virtual bool hasFloat16Type() const
Determine whether the _Float16 type is supported on this target.
Definition: TargetInfo.h:675
virtual bool hasFloat128Type() const
Determine whether the __float128 type is supported on this target.
Definition: TargetInfo.h:672
bool isVoidType() const
Definition: Type.h:7695
bool isFloat16Type() const
Definition: Type.h:7704
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:7735
bool isScalarType() const
Definition: Type.h:7794
bool isFloat128Type() const
Definition: Type.h:7720
bool isCUDADeviceBuiltinSurfaceType() const
Check if the type is the CUDA device builtin surface type.
Definition: Type.cpp:4906
bool isCUDADeviceBuiltinTextureType() const
Check if the type is the CUDA device builtin texture type.
Definition: Type.cpp:4913
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition: Type.h:7966
bool isRealFloatingType() const
Floating point categories.
Definition: Type.cpp:2254
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:7913
QualType getType() const
Definition: Decl.h:717
Represents a variable declaration or definition.
Definition: Decl.h:918
ABIArgInfo classifyArgumentType(CodeGenModule &CGM, CanQualType type)
Classify the rules for how to pass a particular type.
bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI, const ABIInfo &Info)
std::unique_ptr< TargetCodeGenInfo > createNVPTXTargetCodeGenInfo(CodeGenModule &CGM)
Definition: NVPTX.cpp:358
bool isAggregateTypeForABI(QualType T)
bool Div(InterpState &S, CodePtr OpPC)
1) Pops the RHS from the stack.
Definition: Interp.h:439
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T
unsigned long uint64_t
Definition: Format.h:5394