clang 23.0.0git
AMDGPU.cpp
Go to the documentation of this file.
1//===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements AMDGPU TargetInfo objects.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
19#include "llvm/ADT/SmallString.h"
20#include "llvm/TargetParser/AMDGPUTargetParser.h"
21using namespace clang;
22using namespace clang::targets;
23
24namespace clang {
25namespace targets {
26
27// If you edit the description strings, make sure you update
28// getPointerWidthV().
29
30const LangASMap AMDGPUTargetInfo::AMDGPUAddrSpaceMap = {
31 llvm::AMDGPUAS::FLAT_ADDRESS, // Default
32 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
33 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
34 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
35 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
36 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
37 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
38 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
39 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
40 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
41 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
42 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global
43 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_device
44 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_host
45 llvm::AMDGPUAS::LOCAL_ADDRESS, // sycl_local
46 llvm::AMDGPUAS::PRIVATE_ADDRESS, // sycl_private
47 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
48 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
49 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
50 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
51 llvm::AMDGPUAS::CONSTANT_ADDRESS, // hlsl_constant
52 // FIXME(pr/122103): hlsl_private -> PRIVATE is wrong, but at least this
53 // will break loudly.
54 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_private
55 llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_device
56 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_input
57 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_output
58 llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_push_constant
59};
60
61} // namespace targets
62} // namespace clang
63
64static constexpr int NumBuiltins =
66
67#define GET_BUILTIN_STR_TABLE
68#include "clang/Basic/BuiltinsAMDGPU.inc"
69#undef GET_BUILTIN_STR_TABLE
70
71static constexpr Builtin::Info BuiltinInfos[] = {
72#define GET_BUILTIN_INFOS
73#include "clang/Basic/BuiltinsAMDGPU.inc"
74#undef GET_BUILTIN_INFOS
75};
76static_assert(std::size(BuiltinInfos) == NumBuiltins);
77
78const char *const AMDGPUTargetInfo::GCCRegNames[] = {
79 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
80 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
81 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
82 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
83 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
84 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
85 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
86 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
87 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
88 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
89 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
90 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
91 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
92 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
93 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
94 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
95 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
96 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
97 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
98 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
99 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
100 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
101 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
102 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
103 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
104 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
105 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
106 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
107 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
108 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
109 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
110 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
111 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
112 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
113 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
114 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
115 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
116 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
117 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
118 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
119 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
120 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
121 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
122 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
123 "flat_scratch_lo", "flat_scratch_hi",
124 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
125 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
126 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
127 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
128 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
129 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
130 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
131 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
132 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
133 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
134 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
135 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
136 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
137 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
138 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
139 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
140 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
141 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
142 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
143 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
144 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
145 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
146 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
147 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
148 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
149 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
150 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
151 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
152 "a252", "a253", "a254", "a255"
153};
154
158
160 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
161 const std::vector<std::string> &FeatureVec) const {
162
163 using namespace llvm::AMDGPU;
164
165 if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
166 return false;
167
168 auto HasError = fillAMDGPUFeatureMap(CPU, getTriple(), Features);
169 switch (HasError.first) {
170 default:
171 break;
172 case llvm::AMDGPU::INVALID_FEATURE_COMBINATION:
173 Diags.Report(diag::err_invalid_feature_combination) << HasError.second;
174 return false;
175 case llvm::AMDGPU::UNSUPPORTED_TARGET_FEATURE:
176 Diags.Report(diag::err_opt_not_valid_on_target) << HasError.second;
177 return false;
178 }
179
180 return true;
181}
182
184 SmallVectorImpl<StringRef> &Values) const {
185 if (getTriple().isAMDGCN())
186 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
187 else
188 llvm::AMDGPU::fillValidArchListR600(Values);
189}
190
191AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
192 const TargetOptions &Opts)
193 : TargetInfo(Triple),
194 GPUKind(Triple.isAMDGCN() ? llvm::AMDGPU::parseArchAMDGCN(Opts.CPU)
195 : llvm::AMDGPU::parseArchR600(Opts.CPU)),
196 GPUFeatures(Triple.isAMDGCN() ? llvm::AMDGPU::getArchAttrAMDGCN(GPUKind)
197 : llvm::AMDGPU::getArchAttrR600(GPUKind)) {
199
200 AddrSpaceMap = &AMDGPUAddrSpaceMap;
202
203 if (Triple.isAMDGCN()) {
204 // __bf16 is always available as a load/store only type on AMDGCN.
206 BFloat16Format = &llvm::APFloat::BFloat();
207 }
208
209 // TODO: This is not really true for targets without half support, but also
210 // should just be assumed true for the dummy target.
211 HasFastHalfType = true;
212 HasFloat16 = true;
213 WavefrontSize = (GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32) ? 32 : 64;
214
215 // Set pointer width and alignment for the generic address space.
217 if (getMaxPointerWidth() == 64) {
218 LongWidth = LongAlign = 64;
222 }
223
225 CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
226
227 for (auto F : {"image-insts", "gws", "vmem-to-lds-load-insts"}) {
228 if (GPUKind != llvm::AMDGPU::GK_NONE)
229 ReadOnlyFeatures.insert(F);
230 }
231 HalfArgsAndReturns = true;
232}
233
235 const TargetInfo *Aux) {
236 TargetInfo::adjust(Diags, Opts, Aux);
238}
239
244
246 MacroBuilder &Builder) const {
247 Builder.defineMacro("__AMD__");
248 Builder.defineMacro("__AMDGPU__");
249
250 if (getTriple().isAMDGCN())
251 Builder.defineMacro("__AMDGCN__");
252 else
253 Builder.defineMacro("__R600__");
254
255 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
256 // removed in the near future.
257 if (hasFMAF())
258 Builder.defineMacro("__HAS_FMAF__");
259 if (hasFastFMAF())
260 Builder.defineMacro("FP_FAST_FMAF");
261 if (hasLDEXPF())
262 Builder.defineMacro("__HAS_LDEXPF__");
263 if (hasFP64())
264 Builder.defineMacro("__HAS_FP64__");
265 if (hasFastFMA())
266 Builder.defineMacro("FP_FAST_FMA");
267 if (HasFastHalfType)
268 Builder.defineMacro("FP_FAST_FMA_HALF");
269
270 Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));
271
272 // Legacy HIP host code relies on these default attributes to be defined.
273 bool IsHIPHost = Opts.HIP && !Opts.CUDAIsDevice;
274 if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
275 return;
276
277 llvm::SmallString<16> CanonName =
278 (getTriple().isAMDGCN() ? getArchNameAMDGCN(GPUKind)
279 : getArchNameR600(GPUKind));
280
281 // Sanitize the name of generic targets.
282 // e.g. gfx10-1-generic -> gfx10_1_generic
283 if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
284 GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
285 llvm::replace(CanonName, '-', '_');
286 }
287
288 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
289 // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
290 if (getTriple().isAMDGCN() && !IsHIPHost) {
291 assert(StringRef(CanonName).starts_with("gfx") &&
292 "Invalid amdgcn canonical name");
293 StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind);
294 Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) +
295 Twine("__"));
296 Builder.defineMacro("__amdgcn_processor__",
297 Twine("\"") + Twine(CanonName) + Twine("\""));
298 Builder.defineMacro("__amdgcn_target_id__",
299 Twine("\"") + Twine(*getTargetID()) + Twine("\""));
300 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
301 auto Loc = OffloadArchFeatures.find(F);
302 if (Loc != OffloadArchFeatures.end()) {
303 std::string NewF = F.str();
304 llvm::replace(NewF, '-', '_');
305 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
306 Twine("__"),
307 Loc->second ? "1" : "0");
308 }
309 }
310 }
311
313 Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
314}
315
317 assert(HalfFormat == Aux->HalfFormat);
318 assert(FloatFormat == Aux->FloatFormat);
319 assert(DoubleFormat == Aux->DoubleFormat);
320
321 // On x86_64 long double is 80-bit extended precision format, which is
322 // not supported by AMDGPU. 128-bit floating point format is also not
323 // supported by AMDGPU. Therefore keep its own format for these two types.
324 auto SaveLongDoubleFormat = LongDoubleFormat;
325 auto SaveFloat128Format = Float128Format;
326 auto SaveLongDoubleWidth = LongDoubleWidth;
327 auto SaveLongDoubleAlign = LongDoubleAlign;
328 copyAuxTarget(Aux);
329 LongDoubleFormat = SaveLongDoubleFormat;
330 Float128Format = SaveFloat128Format;
331 LongDoubleWidth = SaveLongDoubleWidth;
332 LongDoubleAlign = SaveLongDoubleAlign;
333 // For certain builtin types support on the host target, claim they are
334 // support to pass the compilation of the host code during the device-side
335 // compilation.
336 // FIXME: As the side effect, we also accept `__float128` uses in the device
337 // code. To rejct these builtin types supported in the host target but not in
338 // the device target, one approach would support `device_builtin` attribute
339 // so that we could tell the device builtin types from the host ones. The
340 // also solves the different representations of the same builtin type, such
341 // as `size_t` in the MSVC environment.
342 if (Aux->hasFloat128Type()) {
343 HasFloat128 = true;
345 }
346}
Defines the Diagnostic-related interfaces.
static constexpr llvm::StringTable BuiltinStrings
Definition ARM.cpp:1115
static constexpr Builtin::Info BuiltinInfos[]
Definition Builtins.cpp:38
static constexpr unsigned NumBuiltins
Definition Builtins.cpp:32
Defines enum values for all the target-independent builtin functions.
Defines the clang::LangOptions interface.
Defines the clang::MacroBuilder utility class.
Enumerates target-specific builtins in their own namespaces within namespace clang.
Concrete class used by the front-end to report problems and issues.
Definition Diagnostic.h:233
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
void copyAuxTarget(const TargetInfo *Aux)
Copy type and layout related info.
TargetInfo(const llvm::Triple &T)
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
const LangASMap * AddrSpaceMap
Definition TargetInfo.h:260
AtomicOptions AtomicOpts
Definition TargetInfo.h:313
virtual void adjust(DiagnosticsEngine &Diags, LangOptions &Opts, const TargetInfo *Aux)
Set forced language options.
unsigned char MaxAtomicPromoteWidth
Definition TargetInfo.h:253
bool UseAddrSpaceMapMangling
Specify if mangling based on address space map should be used or not for language specific address sp...
Definition TargetInfo.h:386
void resetDataLayout()
Set the data layout based on current triple and ABI.
llvm::StringSet ReadOnlyFeatures
Definition TargetInfo.h:310
virtual bool hasFloat128Type() const
Determine whether the __float128 type is supported on this target.
Definition TargetInfo.h:718
virtual bool initFeatureMap(llvm::StringMap< bool > &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector< std::string > &FeatureVec) const
Initialize the map with the default set of target features for the CPU this should include all legal ...
unsigned char MaxAtomicInlineWidth
Definition TargetInfo.h:253
Options for controlling the target.
void setAuxTarget(const TargetInfo *Aux) override
Definition AMDGPU.cpp:316
ArrayRef< const char * > getGCCRegNames() const override
Definition AMDGPU.cpp:155
AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
Definition AMDGPU.cpp:191
uint64_t getPointerWidthV(LangAS AS) const override
Definition AMDGPU.h:102
void fillValidCPUList(SmallVectorImpl< StringRef > &Values) const override
Fill a SmallVectorImpl with the valid values to setCPU.
Definition AMDGPU.cpp:183
void adjust(DiagnosticsEngine &Diags, LangOptions &Opts, const TargetInfo *Aux) override
Set forced language options.
Definition AMDGPU.cpp:234
bool initFeatureMap(llvm::StringMap< bool > &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector< std::string > &FeatureVec) const override
Initialize the map with the default set of target features for the CPU this should include all legal ...
Definition AMDGPU.cpp:159
std::optional< std::string > getTargetID() const override
Returns the target ID if supported.
Definition AMDGPU.h:488
void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override
===-— Other target property query methods -----------------------—===//
Definition AMDGPU.cpp:245
llvm::SmallVector< Builtin::InfosShard > getTargetBuiltins() const override
Return information about target-specific builtins for the current primary target, and info about whic...
Definition AMDGPU.cpp:241
uint64_t getMaxPointerWidth() const override
Return the maximum width of pointers on this target.
Definition AMDGPU.h:131
AMDGPU builtins.
The JSON file list parser is used to communicate input to InstallAPI.
llvm::SmallVector< llvm::StringRef, 4 > getAllPossibleTargetIDFeatures(const llvm::Triple &T, llvm::StringRef Processor)
Get all feature strings that can be used in target ID for Processor.
Definition TargetID.cpp:41
unsigned[(unsigned) LangAS::FirstTargetAddressSpace] LangASMap
The type of a lookup table which maps from language-specific address spaces to target-specific ones.
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
The info used to represent each builtin.
Definition Builtins.h:80
const llvm::fltSemantics * DoubleFormat
Definition TargetInfo.h:144
const llvm::fltSemantics * LongDoubleFormat
Definition TargetInfo.h:144
const llvm::fltSemantics * Float128Format
Definition TargetInfo.h:144
const llvm::fltSemantics * FloatFormat
Definition TargetInfo.h:143
const llvm::fltSemantics * HalfFormat
Definition TargetInfo.h:143
const llvm::fltSemantics * BFloat16Format
Definition TargetInfo.h:143