clang 23.0.0git
NVPTX.cpp
Go to the documentation of this file.
1//===- NVPTX.cpp ----------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ABIInfoImpl.h"
10#include "TargetInfo.h"
12#include "llvm/ADT/STLExtras.h"
13#include "llvm/ADT/StringExtras.h"
14#include "llvm/IR/CallingConv.h"
15#include "llvm/IR/IntrinsicsNVPTX.h"
16#include "llvm/Support/NVVMAttributes.h"
17
18using namespace clang;
19using namespace clang::CodeGen;
20
21//===----------------------------------------------------------------------===//
22// NVPTX ABI Implementation
23//===----------------------------------------------------------------------===//
24
25namespace {
26
27class NVPTXTargetCodeGenInfo;
28
29class NVPTXABIInfo : public ABIInfo {
30 NVPTXTargetCodeGenInfo &CGInfo;
31
32public:
33 NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
34 : ABIInfo(CGT), CGInfo(Info) {}
35
36 ABIArgInfo classifyReturnType(QualType RetTy) const;
37 ABIArgInfo classifyArgumentType(QualType Ty) const;
38
39 void computeInfo(CGFunctionInfo &FI) const override;
40 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
41 AggValueSlot Slot) const override;
42 bool isUnsupportedType(QualType T) const;
43 ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
44};
45
46class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
47public:
48 NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
49 : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}
50
51 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
52 CodeGen::CodeGenModule &M) const override;
53 bool shouldEmitStaticExternCAliases() const override;
54
55 StringRef getLLVMSyncScopeStr(const LangOptions &LangOpts, SyncScope Scope,
56 llvm::AtomicOrdering Ordering) const override;
57
58 llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
59 llvm::PointerType *T,
60 QualType QT) const override;
61
62 llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
63 // On the device side, surface reference is represented as an object handle
64 // in 64-bit integer.
65 return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
66 }
67
68 llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
69 // On the device side, texture reference is represented as an object handle
70 // in 64-bit integer.
71 return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
72 }
73
74 bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
75 LValue Src) const override {
76 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
77 return true;
78 }
79
80 bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
81 LValue Src) const override {
82 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
83 return true;
84 }
85
86 unsigned getDeviceKernelCallingConv() const override {
87 return llvm::CallingConv::PTX_Kernel;
88 }
89
90 // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
91 // resulting MDNode to the nvvm.annotations MDNode.
92 static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
93 int Operand);
94
95private:
96 static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
97 LValue Src) {
98 llvm::Value *Handle = nullptr;
99 llvm::Constant *C =
100 llvm::dyn_cast<llvm::Constant>(Src.getAddress().emitRawPointer(CGF));
101 // Lookup `addrspacecast` through the constant pointer if any.
102 if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
103 C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
104 if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
105 // Load the handle from the specific global variable using
106 // `nvvm.texsurf.handle.internal` intrinsic.
107 Handle = CGF.EmitRuntimeCall(
108 CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
109 {GV->getType()}),
110 {GV}, "texsurf_handle");
111 } else
112 Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
113 CGF.EmitStoreOfScalar(Handle, Dst);
114 }
115};
116
117/// Checks if the type is unsupported directly by the current target.
118bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
119 ASTContext &Context = getContext();
120 if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
121 return true;
122 if (!Context.getTargetInfo().hasFloat128Type() &&
123 (T->isFloat128Type() ||
124 (T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
125 return true;
126 if (const auto *EIT = T->getAs<BitIntType>())
127 return EIT->getNumBits() >
128 (Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
129 if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
130 Context.getTypeSize(T) > 64U)
131 return true;
132 if (const auto *AT = T->getAsArrayTypeUnsafe())
133 return isUnsupportedType(AT->getElementType());
134 const auto *RD = T->getAsRecordDecl();
135 if (!RD)
136 return false;
137
138 // If this is a C++ record, check the bases first.
139 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
140 for (const CXXBaseSpecifier &I : CXXRD->bases())
141 if (isUnsupportedType(I.getType()))
142 return true;
143
144 for (const FieldDecl *I : RD->fields())
145 if (isUnsupportedType(I->getType()))
146 return true;
147 return false;
148}
149
150/// Coerce the given type into an array with maximum allowed size of elements.
151ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
152 unsigned MaxSize) const {
153 // Alignment and Size are measured in bits.
154 const uint64_t Size = getContext().getTypeSize(Ty);
155 const uint64_t Alignment = getContext().getTypeAlign(Ty);
156 const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
157 llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
158 const uint64_t NumElements = (Size + Div - 1) / Div;
159 return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
160}
161
162ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
163 if (RetTy->isVoidType())
164 return ABIArgInfo::getIgnore();
165
166 if (getContext().getLangOpts().OpenMP &&
167 getContext().getLangOpts().OpenMPIsTargetDevice &&
168 isUnsupportedType(RetTy))
169 return coerceToIntArrayWithLimit(RetTy, 64);
170
171 // note: this is different from default ABI
172 if (!RetTy->isScalarType())
173 return ABIArgInfo::getDirect();
174
175 // Treat an enum type as its underlying type.
176 if (const auto *ED = RetTy->getAsEnumDecl())
177 RetTy = ED->getIntegerType();
178
179 return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
181}
182
183ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
184 // Treat an enum type as its underlying type.
185 if (const auto *ED = Ty->getAsEnumDecl())
186 Ty = ED->getIntegerType();
187
188 // Return aggregates type as indirect by value
189 if (isAggregateTypeForABI(Ty)) {
190 // Under CUDA device compilation, tex/surf builtin types are replaced with
191 // object types and passed directly.
192 if (getContext().getLangOpts().CUDAIsDevice) {
195 CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
198 CGInfo.getCUDADeviceBuiltinTextureDeviceType());
199 }
200 return getNaturalAlignIndirect(
201 Ty, /* AddrSpace */ getDataLayout().getAllocaAddrSpace(),
202 /* byval */ true);
203 }
204
205 if (const auto *EIT = Ty->getAs<BitIntType>()) {
206 if ((EIT->getNumBits() > 128) ||
207 (!getContext().getTargetInfo().hasInt128Type() &&
208 EIT->getNumBits() > 64))
209 return getNaturalAlignIndirect(
210 Ty, /* AddrSpace */ getDataLayout().getAllocaAddrSpace(),
211 /* byval */ true);
212 }
213
214 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
216}
217
218void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
219 if (!getCXXABI().classifyReturnType(FI))
221
222 for (auto &&[ArgumentsCount, I] : llvm::enumerate(FI.arguments()))
223 I.info = ArgumentsCount < FI.getNumRequiredArgs()
224 ? classifyArgumentType(I.type)
225 : ABIArgInfo::getDirect();
226
227 // Always honor user-specified calling convention.
228 if (FI.getCallingConvention() != llvm::CallingConv::C)
229 return;
230
231 FI.setEffectiveCallingConvention(getRuntimeCC());
232}
233
234RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
235 QualType Ty, AggValueSlot Slot) const {
236 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/false,
237 getContext().getTypeInfoInChars(Ty),
239 /*AllowHigherAlign=*/true, Slot);
240}
241
242void NVPTXTargetCodeGenInfo::setTargetAttributes(
243 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
244 if (GV->isDeclaration())
245 return;
246 const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
247 if (VD) {
248 if (M.getLangOpts().CUDA) {
250 addNVVMMetadata(GV, "surface", 1);
251 else if (VD->getType()->isCUDADeviceBuiltinTextureType())
252 addNVVMMetadata(GV, "texture", 1);
253 return;
254 }
255 }
256
257 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
258 if (!FD)
259 return;
260
261 llvm::Function *F = cast<llvm::Function>(GV);
262
263 // Perform special handling in OpenCL/CUDA mode
264 if (M.getLangOpts().OpenCL || M.getLangOpts().CUDA) {
265 // Use function attributes to check for kernel functions
266 // By default, all functions are device functions
267 if (FD->hasAttr<DeviceKernelAttr>() || FD->hasAttr<CUDAGlobalAttr>()) {
268 // OpenCL/CUDA kernel functions get kernel metadata
269 // And kernel functions are not subject to inlining
270 F->addFnAttr(llvm::Attribute::NoInline);
271 if (FD->hasAttr<CUDAGlobalAttr>()) {
272 F->setCallingConv(getDeviceKernelCallingConv());
273
274 for (auto IV : llvm::enumerate(FD->parameters()))
275 if (IV.value()->hasAttr<CUDAGridConstantAttr>())
276 F->addParamAttr(IV.index(),
277 llvm::Attribute::get(F->getContext(),
278 llvm::NVVMAttr::GridConstant));
279 }
280 if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
282 }
283 }
284}
285
286void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
287 StringRef Name, int Operand) {
288 llvm::Module *M = GV->getParent();
289 llvm::LLVMContext &Ctx = M->getContext();
290
291 // Get "nvvm.annotations" metadata node
292 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
293
294 SmallVector<llvm::Metadata *, 5> MDVals = {
295 llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
296 llvm::ConstantAsMetadata::get(
297 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
298
299 // Append metadata to nvvm.annotations
300 MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
301}
302
303bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
304 return false;
305}
306
307StringRef NVPTXTargetCodeGenInfo::getLLVMSyncScopeStr(
308 const LangOptions &LangOpts, SyncScope Scope,
309 llvm::AtomicOrdering Ordering) const {
310 switch (Scope) {
311 case SyncScope::HIPSingleThread:
312 case SyncScope::SingleScope:
313 return "singlethread";
314 case SyncScope::HIPWavefront:
315 case SyncScope::OpenCLSubGroup:
316 case SyncScope::WavefrontScope:
317 case SyncScope::HIPWorkgroup:
318 case SyncScope::OpenCLWorkGroup:
319 case SyncScope::WorkgroupScope:
320 return "block";
321 case SyncScope::HIPCluster:
322 case SyncScope::ClusterScope:
323 return "cluster";
324 case SyncScope::HIPAgent:
325 case SyncScope::OpenCLDevice:
326 case SyncScope::DeviceScope:
327 return "device";
328 case SyncScope::SystemScope:
329 case SyncScope::HIPSystem:
330 case SyncScope::OpenCLAllSVMDevices:
331 return "";
332 }
333 llvm_unreachable("Unknown SyncScope enum");
334}
335
336llvm::Constant *
337NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
338 llvm::PointerType *PT,
339 QualType QT) const {
340 auto &Ctx = CGM.getContext();
341 if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(LangAS::opencl_local))
342 return llvm::ConstantPointerNull::get(PT);
343
344 auto NPT = llvm::PointerType::get(
345 PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic));
346 return llvm::ConstantExpr::getAddrSpaceCast(
347 llvm::ConstantPointerNull::get(NPT), PT);
348}
349} // namespace
350
352 const CUDALaunchBoundsAttr *Attr,
353 int32_t *MaxThreadsVal,
354 int32_t *MinBlocksVal,
355 int32_t *MaxClusterRankVal) {
356 llvm::APSInt MaxThreads(32);
357 MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext());
358 if (MaxThreads > 0) {
359 if (MaxThreadsVal)
360 *MaxThreadsVal = MaxThreads.getExtValue();
361 if (F)
362 F->addFnAttr(llvm::NVVMAttr::MaxNTID,
363 llvm::utostr(MaxThreads.getExtValue()));
364 }
365
366 // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
367 // was not specified in __launch_bounds__ or if the user specified a 0 value,
368 // we don't have to add a PTX directive.
369 if (Attr->getMinBlocks()) {
370 llvm::APSInt MinBlocks(32);
371 MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext());
372 if (MinBlocks > 0) {
373 if (MinBlocksVal)
374 *MinBlocksVal = MinBlocks.getExtValue();
375 if (F)
376 F->addFnAttr(llvm::NVVMAttr::MinCTASm,
377 llvm::utostr(MinBlocks.getExtValue()));
378 }
379 }
380 if (Attr->getMaxBlocks()) {
381 llvm::APSInt MaxBlocks(32);
382 MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
383 if (MaxBlocks > 0) {
384 if (MaxClusterRankVal)
385 *MaxClusterRankVal = MaxBlocks.getExtValue();
386 if (F)
387 F->addFnAttr(llvm::NVVMAttr::MaxClusterRank,
388 llvm::utostr(MaxBlocks.getExtValue()));
389 }
390 }
391}
392
393std::unique_ptr<TargetCodeGenInfo>
395 return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes());
396}
Provides definitions for the atomic synchronization scopes.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
const TargetInfo & getTargetInfo() const
Definition ASTContext.h:917
Attr - This represents one attribute.
Definition Attr.h:46
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
static ABIArgInfo getIgnore()
static ABIArgInfo getDirect(llvm::Type *T=nullptr, unsigned Offset=0, llvm::Type *Padding=nullptr, bool CanBeFlattened=true, unsigned Align=0)
static ABIArgInfo getExtend(QualType Ty, llvm::Type *T=nullptr)
ABIInfo - Target specific hooks for defining how a type should be passed or returned from functions.
Definition ABIInfo.h:48
unsigned getCallingConvention() const
getCallingConvention - Return the user specified calling convention, which has been translated into a...
CanQualType getReturnType() const
MutableArrayRef< ArgInfo > arguments()
void setEffectiveCallingConvention(unsigned Value)
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
This class organizes the cross-function state that is used while generating LLVM code.
void handleCUDALaunchBoundsAttr(llvm::Function *F, const CUDALaunchBoundsAttr *A, int32_t *MaxThreadsVal=nullptr, int32_t *MinBlocksVal=nullptr, int32_t *MaxClusterRankVal=nullptr)
Emit the IR encoding to attach the CUDA launch bounds attribute to F.
Definition NVPTX.cpp:351
const LangOptions & getLangOpts() const
ASTContext & getContext() const
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
TargetCodeGenInfo - This class organizes various target-specific codegeneration issues,...
Definition TargetInfo.h:49
T * getAttr() const
Definition DeclBase.h:573
bool hasAttr() const
Definition DeclBase.h:577
ArrayRef< ParmVarDecl * > parameters() const
Definition Decl.h:2789
A (possibly-)qualified type.
Definition TypeBase.h:937
virtual bool hasInt128Type() const
Determine whether the __int128 type is supported on this target.
Definition TargetInfo.h:679
virtual bool hasFloat16Type() const
Determine whether the _Float16 type is supported on this target.
Definition TargetInfo.h:721
virtual bool hasFloat128Type() const
Determine whether the __float128 type is supported on this target.
Definition TargetInfo.h:718
bool isVoidType() const
Definition TypeBase.h:9034
bool isFloat16Type() const
Definition TypeBase.h:9043
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition Type.h:41
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition TypeBase.h:9078
bool isScalarType() const
Definition TypeBase.h:9140
bool isFloat128Type() const
Definition TypeBase.h:9063
bool isCUDADeviceBuiltinSurfaceType() const
Check if the type is the CUDA device builtin surface type.
Definition Type.cpp:5411
bool isCUDADeviceBuiltinTextureType() const
Check if the type is the CUDA device builtin texture type.
Definition Type.cpp:5420
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition TypeBase.h:9314
EnumDecl * getAsEnumDecl() const
Retrieves the EnumDecl this type refers to.
Definition Type.h:53
bool isRealFloatingType() const
Floating point categories.
Definition Type.cpp:2358
const T * getAs() const
Member-template getAs<specific type>'.
Definition TypeBase.h:9261
QualType getType() const
Definition Decl.h:723
ABIArgInfo classifyArgumentType(CodeGenModule &CGM, CanQualType type)
Classify the rules for how to pass a particular type.
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
Definition CGValue.h:146
bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI, const ABIInfo &Info)
std::unique_ptr< TargetCodeGenInfo > createNVPTXTargetCodeGenInfo(CodeGenModule &CGM)
Definition NVPTX.cpp:394
RValue emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType ValueTy, bool IsIndirect, TypeInfoChars ValueInfo, CharUnits SlotSizeAndAlign, bool AllowHigherAlign, AggValueSlot Slot, bool ForceRightAdjust=false)
Emit va_arg for a platform using the common void* representation, where arguments are simply emitted ...
bool isAggregateTypeForABI(QualType T)
bool Div(InterpState &S, CodePtr OpPC)
1) Pops the RHS from the stack.
Definition Interp.h:648
The JSON file list parser is used to communicate input to InstallAPI.
SyncScope
Defines sync scope values used internally by clang.
Definition SyncScope.h:42
U cast(CodeGen::Address addr)
Definition Address.h:327
unsigned long uint64_t