clang 23.0.0git
AMDGPU.cpp
Go to the documentation of this file.
1//===---- AMDGPU.cpp - AMDGPU-specific CIR CodeGen ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides AMDGPU-specific CIR CodeGen logic for function attributes.
10//
11//===----------------------------------------------------------------------===//
12
13#include "../CIRGenModule.h"
14#include "../TargetInfo.h"
15
16#include "clang/AST/Attr.h"
17#include "clang/AST/Decl.h"
20#include "llvm/ADT/StringExtras.h"
21#include "llvm/Support/raw_ostream.h"
22
23using namespace clang;
24using namespace clang::CIRGen;
25
27 const Decl *d, cir::VisibilityKind visibility) {
28 if (visibility != cir::VisibilityKind::Hidden)
29 return false;
30
31 return !d->hasAttr<OMPDeclareTargetDeclAttr>() &&
32 (d->hasAttr<DeviceKernelAttr>() ||
33 (isa<FunctionDecl>(d) && d->hasAttr<CUDAGlobalAttr>()) ||
34 (isa<VarDecl>(d) &&
35 (d->hasAttr<CUDADeviceAttr>() || d->hasAttr<CUDAConstantAttr>() ||
36 cast<VarDecl>(d)->getType()->isCUDADeviceBuiltinSurfaceType() ||
37 cast<VarDecl>(d)->getType()->isCUDADeviceBuiltinTextureType())));
38}
39
40namespace {
41
42/// Handle amdgpu-flat-work-group-size attribute.
43static void
44handleAMDGPUFlatWorkGroupSizeAttr(const FunctionDecl *fd, cir::FuncOp func,
45 CIRGenModule &cgm, CIRGenBuilderTy &builder,
46 bool isOpenCLKernel, bool isHIPKernel) {
47 const auto *flatWGS = fd->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
48 const auto *reqdWGS =
49 cgm.getLangOpts().OpenCL ? fd->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
50
51 if (flatWGS || reqdWGS) {
52 unsigned min = 0, max = 0;
53 if (flatWGS) {
54 min = flatWGS->getMin()
55 ->EvaluateKnownConstInt(cgm.getASTContext())
56 .getExtValue();
57 max = flatWGS->getMax()
58 ->EvaluateKnownConstInt(cgm.getASTContext())
59 .getExtValue();
60 }
61 if (reqdWGS && min == 0 && max == 0) {
62 min = max = reqdWGS->getXDim()
63 ->EvaluateKnownConstInt(cgm.getASTContext())
64 .getExtValue() *
65 reqdWGS->getYDim()
66 ->EvaluateKnownConstInt(cgm.getASTContext())
67 .getExtValue() *
68 reqdWGS->getZDim()
69 ->EvaluateKnownConstInt(cgm.getASTContext())
70 .getExtValue();
71 }
72 if (min != 0) {
73 assert(min <= max && "Min must be less than or equal Max");
74 std::string attrVal = llvm::utostr(min) + "," + llvm::utostr(max);
75 func->setAttr("cir.amdgpu-flat-work-group-size",
76 builder.getStringAttr(attrVal));
77 } else {
78 assert(max == 0 && "Max must be zero");
79 }
80 } else if (isOpenCLKernel || isHIPKernel) {
81 // By default, restrict the maximum size to a value specified by
82 // --gpu-max-threads-per-block=n or its default value for HIP.
83 const unsigned openCLDefaultMaxWorkGroupSize = 256;
84 const unsigned defaultMaxWorkGroupSize =
85 isOpenCLKernel ? openCLDefaultMaxWorkGroupSize
86 : cgm.getLangOpts().GPUMaxThreadsPerBlock;
87 std::string attrVal =
88 std::string("1,") + llvm::utostr(defaultMaxWorkGroupSize);
89 func->setAttr("cir.amdgpu-flat-work-group-size",
90 builder.getStringAttr(attrVal));
91 }
92}
93
94/// Handle amdgpu-waves-per-eu attribute.
95static void handleAMDGPUWavesPerEUAttr(const FunctionDecl *fd, cir::FuncOp func,
96 CIRGenModule &cgm,
97 CIRGenBuilderTy &builder) {
98 const auto *attr = fd->getAttr<AMDGPUWavesPerEUAttr>();
99 if (!attr)
100 return;
101 unsigned min =
102 attr->getMin()->EvaluateKnownConstInt(cgm.getASTContext()).getExtValue();
103 unsigned max = attr->getMax()
104 ? attr->getMax()
105 ->EvaluateKnownConstInt(cgm.getASTContext())
106 .getExtValue()
107 : 0;
108
109 if (min != 0) {
110 assert((max == 0 || min <= max) && "Min must be less than or equal Max");
111 std::string attrVal = llvm::utostr(min);
112 if (max != 0)
113 attrVal = attrVal + "," + llvm::utostr(max);
114 func->setAttr("cir.amdgpu-waves-per-eu", builder.getStringAttr(attrVal));
115 } else {
116 assert(max == 0 && "Max must be zero");
117 }
118}
119
120/// Handle amdgpu-num-sgpr attribute.
121static void handleAMDGPUNumSGPRAttr(const FunctionDecl *fd, cir::FuncOp func,
122 CIRGenModule &cgm,
123 CIRGenBuilderTy &builder) {
124 const auto *attr = fd->getAttr<AMDGPUNumSGPRAttr>();
125 if (!attr)
126 return;
127
128 uint32_t numSGPR = attr->getNumSGPR();
129 if (numSGPR != 0) {
130 func->setAttr("cir.amdgpu-num-sgpr",
131 builder.getStringAttr(llvm::utostr(numSGPR)));
132 }
133}
134
135/// Handle amdgpu-num-vgpr attribute.
136static void handleAMDGPUNumVGPRAttr(const FunctionDecl *fd, cir::FuncOp func,
137 CIRGenModule &cgm,
138 CIRGenBuilderTy &builder) {
139 const auto *attr = fd->getAttr<AMDGPUNumVGPRAttr>();
140 if (!attr)
141 return;
142
143 uint32_t numVGPR = attr->getNumVGPR();
144 if (numVGPR != 0) {
145 func->setAttr("cir.amdgpu-num-vgpr",
146 builder.getStringAttr(llvm::utostr(numVGPR)));
147 }
148}
149
150/// Handle amdgpu-max-num-workgroups attribute.
151static void handleAMDGPUMaxNumWorkGroupsAttr(const FunctionDecl *fd,
152 cir::FuncOp func,
153 CIRGenModule &cgm,
154 CIRGenBuilderTy &builder) {
155 const auto *attr = fd->getAttr<AMDGPUMaxNumWorkGroupsAttr>();
156 if (!attr)
157 return;
158 uint32_t x = attr->getMaxNumWorkGroupsX()
159 ->EvaluateKnownConstInt(cgm.getASTContext())
160 .getExtValue();
161 uint32_t y = attr->getMaxNumWorkGroupsY()
162 ? attr->getMaxNumWorkGroupsY()
163 ->EvaluateKnownConstInt(cgm.getASTContext())
164 .getExtValue()
165 : 1;
166 uint32_t z = attr->getMaxNumWorkGroupsZ()
167 ? attr->getMaxNumWorkGroupsZ()
168 ->EvaluateKnownConstInt(cgm.getASTContext())
169 .getExtValue()
170 : 1;
171
172 llvm::SmallString<32> attrVal;
173 llvm::raw_svector_ostream os(attrVal);
174 os << x << ',' << y << ',' << z;
175 func->setAttr("cir.amdgpu-max-num-workgroups",
176 builder.getStringAttr(attrVal.str()));
177}
178
179/// Handle amdgpu-cluster-dims attribute.
180static void handleAMDGPUClusterDimsAttr(const FunctionDecl *fd,
181 cir::FuncOp func, CIRGenModule &cgm,
182 CIRGenBuilderTy &builder,
183 bool isOpenCLKernel) {
184
185 if (const auto *attr = fd->getAttr<CUDAClusterDimsAttr>()) {
186 auto getExprVal = [&](const Expr *e) {
187 return e ? e->EvaluateKnownConstInt(cgm.getASTContext()).getExtValue()
188 : 1;
189 };
190 unsigned x = getExprVal(attr->getX());
191 unsigned y = getExprVal(attr->getY());
192 unsigned z = getExprVal(attr->getZ());
193
194 llvm::SmallString<32> attrVal;
195 llvm::raw_svector_ostream os(attrVal);
196 os << x << ',' << y << ',' << z;
197 func->setAttr("cir.amdgpu-cluster-dims",
198 builder.getStringAttr(attrVal.str()));
199 }
200
201 const TargetInfo &targetInfo = cgm.getASTContext().getTargetInfo();
202 if ((isOpenCLKernel &&
203 targetInfo.hasFeatureEnabled(targetInfo.getTargetOpts().FeatureMap,
204 "clusters")) ||
205 fd->hasAttr<CUDANoClusterAttr>()) {
206 func->setAttr("cir.amdgpu-cluster-dims", builder.getStringAttr("0,0,0"));
207 }
208}
209
210/// Handle amdgpu-ieee attribute.
211static void handleAMDGPUIEEEAttr(cir::FuncOp func, CIRGenModule &cgm,
212 CIRGenBuilderTy &builder) {
213 if (!cgm.getCodeGenOpts().EmitIEEENaNCompliantInsts)
214 func->setAttr("cir.amdgpu-ieee", builder.getStringAttr("false"));
215}
216
217/// Handle amdgpu-expand-waitcnt-profiling attribute.
218static void handleAMDGPUExpandWaitcntProfilingAttr(cir::FuncOp func,
219 CIRGenModule &cgm,
220 CIRGenBuilderTy &builder) {
221 if (cgm.getCodeGenOpts().AMDGPUExpandWaitcntProfiling)
222 func->setAttr("cir.amdgpu-expand-waitcnt-profiling",
223 builder.getStringAttr(""));
224}
225
226} // namespace
227
229 cir::FuncOp func,
230 CIRGenModule &cgm) {
231 if (func.isDeclaration())
232 return;
233
234 CIRGenBuilderTy &builder = cgm.getBuilder();
235
236 const auto *fd = dyn_cast_or_null<FunctionDecl>(decl);
237 if (fd) {
238 const bool isOpenCLKernel =
239 cgm.getLangOpts().OpenCL && fd->hasAttr<DeviceKernelAttr>();
240 const bool isHIPKernel =
241 cgm.getLangOpts().HIP && fd->hasAttr<CUDAGlobalAttr>();
242
243 // TODO(CIR): Set amdgpu_kernel calling convention for HIP kernels.
244
245 handleAMDGPUFlatWorkGroupSizeAttr(fd, func, cgm, builder, isOpenCLKernel,
246 isHIPKernel);
247 handleAMDGPUWavesPerEUAttr(fd, func, cgm, builder);
248 handleAMDGPUNumSGPRAttr(fd, func, cgm, builder);
249 handleAMDGPUNumVGPRAttr(fd, func, cgm, builder);
250 handleAMDGPUMaxNumWorkGroupsAttr(fd, func, cgm, builder);
251 handleAMDGPUClusterDimsAttr(fd, func, cgm, builder, isOpenCLKernel);
252 }
253 handleAMDGPUIEEEAttr(func, cgm, builder);
254 handleAMDGPUExpandWaitcntProfilingAttr(func, cgm, builder);
255}
__DEVICE__ int min(int __a, int __b)
__DEVICE__ int max(int __a, int __b)
const TargetInfo & getTargetInfo() const
Definition ASTContext.h:917
This class organizes the cross-function state that is used while generating CIR code.
clang::ASTContext & getASTContext() const
CIRGenBuilderTy & getBuilder()
const clang::CodeGenOptions & getCodeGenOpts() const
const clang::LangOptions & getLangOpts() const
T * getAttr() const
Definition DeclBase.h:573
bool hasAttr() const
Definition DeclBase.h:577
This represents one expression.
Definition Expr.h:112
Represents a function declaration or definition.
Definition Decl.h:2015
Exposes information about the current target.
Definition TargetInfo.h:227
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition TargetInfo.h:327
virtual bool hasFeatureEnabled(const llvm::StringMap< bool > &Features, StringRef Name) const
Check if target has a given feature enabled.
llvm::StringMap< bool > FeatureMap
The map of which features have been enabled disabled based on the command line.
Defines the clang::TargetInfo interface.
void setAMDGPUTargetFunctionAttributes(const clang::Decl *decl, cir::FuncOp func, CIRGenModule &cgm)
Set AMDGPU-specific function attributes for HIP kernels.
Definition AMDGPU.cpp:228
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
bool requiresAMDGPUProtectedVisibility(const clang::Decl *d, cir::VisibilityKind visibility)
Check if AMDGPU protected visibility is required.
Definition AMDGPU.cpp:26
const internal::VariadicAllOfMatcher< Decl > decl
Matches declarations.
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
U cast(CodeGen::Address addr)
Definition Address.h:327