20#include "llvm/ADT/StringExtras.h"
21#include "llvm/Support/raw_ostream.h"
27 const Decl *d, cir::VisibilityKind visibility) {
28 if (visibility != cir::VisibilityKind::Hidden)
31 return !d->
hasAttr<OMPDeclareTargetDeclAttr>() &&
32 (d->
hasAttr<DeviceKernelAttr>() ||
36 cast<VarDecl>(d)->getType()->isCUDADeviceBuiltinSurfaceType() ||
37 cast<VarDecl>(d)->getType()->isCUDADeviceBuiltinTextureType())));
44handleAMDGPUFlatWorkGroupSizeAttr(
const FunctionDecl *fd, cir::FuncOp func,
46 bool isOpenCLKernel,
bool isHIPKernel) {
47 const auto *flatWGS = fd->
getAttr<AMDGPUFlatWorkGroupSizeAttr>();
51 if (flatWGS || reqdWGS) {
54 min = flatWGS->getMin()
57 max = flatWGS->getMax()
61 if (reqdWGS &&
min == 0 &&
max == 0) {
62 min =
max = reqdWGS->getXDim()
73 assert(
min <=
max &&
"Min must be less than or equal Max");
74 std::string attrVal = llvm::utostr(
min) +
"," + llvm::utostr(
max);
75 func->setAttr(
"cir.amdgpu-flat-work-group-size",
76 builder.getStringAttr(attrVal));
78 assert(
max == 0 &&
"Max must be zero");
80 }
else if (isOpenCLKernel || isHIPKernel) {
83 const unsigned openCLDefaultMaxWorkGroupSize = 256;
84 const unsigned defaultMaxWorkGroupSize =
85 isOpenCLKernel ? openCLDefaultMaxWorkGroupSize
88 std::string(
"1,") + llvm::utostr(defaultMaxWorkGroupSize);
89 func->setAttr(
"cir.amdgpu-flat-work-group-size",
90 builder.getStringAttr(attrVal));
95static void handleAMDGPUWavesPerEUAttr(
const FunctionDecl *fd, cir::FuncOp func,
98 const auto *
attr = fd->
getAttr<AMDGPUWavesPerEUAttr>();
110 assert((
max == 0 ||
min <=
max) &&
"Min must be less than or equal Max");
111 std::string attrVal = llvm::utostr(
min);
113 attrVal = attrVal +
"," + llvm::utostr(
max);
114 func->setAttr(
"cir.amdgpu-waves-per-eu", builder.getStringAttr(attrVal));
116 assert(
max == 0 &&
"Max must be zero");
121static void handleAMDGPUNumSGPRAttr(
const FunctionDecl *fd, cir::FuncOp func,
124 const auto *
attr = fd->
getAttr<AMDGPUNumSGPRAttr>();
128 uint32_t numSGPR =
attr->getNumSGPR();
130 func->setAttr(
"cir.amdgpu-num-sgpr",
131 builder.getStringAttr(llvm::utostr(numSGPR)));
136static void handleAMDGPUNumVGPRAttr(
const FunctionDecl *fd, cir::FuncOp func,
139 const auto *
attr = fd->
getAttr<AMDGPUNumVGPRAttr>();
143 uint32_t numVGPR =
attr->getNumVGPR();
145 func->setAttr(
"cir.amdgpu-num-vgpr",
146 builder.getStringAttr(llvm::utostr(numVGPR)));
151static void handleAMDGPUMaxNumWorkGroupsAttr(
const FunctionDecl *fd,
155 const auto *
attr = fd->
getAttr<AMDGPUMaxNumWorkGroupsAttr>();
158 uint32_t x =
attr->getMaxNumWorkGroupsX()
161 uint32_t y =
attr->getMaxNumWorkGroupsY()
162 ?
attr->getMaxNumWorkGroupsY()
166 uint32_t z =
attr->getMaxNumWorkGroupsZ()
167 ?
attr->getMaxNumWorkGroupsZ()
173 llvm::raw_svector_ostream os(attrVal);
174 os << x <<
',' << y <<
',' << z;
175 func->setAttr(
"cir.amdgpu-max-num-workgroups",
176 builder.getStringAttr(attrVal.str()));
180static void handleAMDGPUClusterDimsAttr(
const FunctionDecl *fd,
183 bool isOpenCLKernel) {
185 if (
const auto *
attr = fd->
getAttr<CUDAClusterDimsAttr>()) {
186 auto getExprVal = [&](
const Expr *e) {
187 return e ? e->EvaluateKnownConstInt(cgm.
getASTContext()).getExtValue()
190 unsigned x = getExprVal(
attr->getX());
191 unsigned y = getExprVal(
attr->getY());
192 unsigned z = getExprVal(
attr->getZ());
195 llvm::raw_svector_ostream os(attrVal);
196 os << x <<
',' << y <<
',' << z;
197 func->setAttr(
"cir.amdgpu-cluster-dims",
198 builder.getStringAttr(attrVal.str()));
202 if ((isOpenCLKernel &&
205 fd->
hasAttr<CUDANoClusterAttr>()) {
206 func->setAttr(
"cir.amdgpu-cluster-dims", builder.getStringAttr(
"0,0,0"));
211static void handleAMDGPUIEEEAttr(cir::FuncOp func,
CIRGenModule &cgm,
214 func->setAttr(
"cir.amdgpu-ieee", builder.getStringAttr(
"false"));
218static void handleAMDGPUExpandWaitcntProfilingAttr(cir::FuncOp func,
222 func->setAttr(
"cir.amdgpu-expand-waitcnt-profiling",
223 builder.getStringAttr(
""));
231 if (func.isDeclaration())
236 const auto *fd = dyn_cast_or_null<FunctionDecl>(
decl);
238 const bool isOpenCLKernel =
240 const bool isHIPKernel =
245 handleAMDGPUFlatWorkGroupSizeAttr(fd, func, cgm, builder, isOpenCLKernel,
247 handleAMDGPUWavesPerEUAttr(fd, func, cgm, builder);
248 handleAMDGPUNumSGPRAttr(fd, func, cgm, builder);
249 handleAMDGPUNumVGPRAttr(fd, func, cgm, builder);
250 handleAMDGPUMaxNumWorkGroupsAttr(fd, func, cgm, builder);
251 handleAMDGPUClusterDimsAttr(fd, func, cgm, builder, isOpenCLKernel);
253 handleAMDGPUIEEEAttr(func, cgm, builder);
254 handleAMDGPUExpandWaitcntProfilingAttr(func, cgm, builder);
__DEVICE__ int min(int __a, int __b)
__DEVICE__ int max(int __a, int __b)
const TargetInfo & getTargetInfo() const
This class organizes the cross-function state that is used while generating CIR code.
clang::ASTContext & getASTContext() const
CIRGenBuilderTy & getBuilder()
const clang::CodeGenOptions & getCodeGenOpts() const
const clang::LangOptions & getLangOpts() const
This represents one expression.
Represents a function declaration or definition.
Exposes information about the current target.
TargetOptions & getTargetOpts() const
Retrieve the target options.
virtual bool hasFeatureEnabled(const llvm::StringMap< bool > &Features, StringRef Name) const
Check if target has a given feature enabled.
llvm::StringMap< bool > FeatureMap
The map of which features have been enabled disabled based on the command line.
Defines the clang::TargetInfo interface.
void setAMDGPUTargetFunctionAttributes(const clang::Decl *decl, cir::FuncOp func, CIRGenModule &cgm)
Set AMDGPU-specific function attributes for HIP kernels.
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
bool requiresAMDGPUProtectedVisibility(const clang::Decl *d, cir::VisibilityKind visibility)
Check if AMDGPU protected visibility is required.
const internal::VariadicAllOfMatcher< Decl > decl
Matches declarations.
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
U cast(CodeGen::Address addr)