clang 17.0.0git
AMDGPU.cpp
Go to the documentation of this file.
1//===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements AMDGPU TargetInfo objects.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
20using namespace clang;
21using namespace clang::targets;
22
23namespace clang {
24namespace targets {
25
26// If you edit the description strings, make sure you update
27// getPointerWidthV().
28
29static const char *const DataLayoutStringR600 =
30 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
31 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
32
33static const char *const DataLayoutStringAMDGCN =
34 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
35 "-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
36 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
37 "-ni:7:8";
38
39const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
40 Generic, // Default
41 Global, // opencl_global
42 Local, // opencl_local
43 Constant, // opencl_constant
44 Private, // opencl_private
45 Generic, // opencl_generic
46 Global, // opencl_global_device
47 Global, // opencl_global_host
48 Global, // cuda_device
49 Constant, // cuda_constant
50 Local, // cuda_shared
51 Global, // sycl_global
52 Global, // sycl_global_device
53 Global, // sycl_global_host
54 Local, // sycl_local
55 Private, // sycl_private
56 Generic, // ptr32_sptr
57 Generic, // ptr32_uptr
58 Generic, // ptr64
59 Generic, // hlsl_groupshared
60};
61
62const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
63 Private, // Default
64 Global, // opencl_global
65 Local, // opencl_local
66 Constant, // opencl_constant
67 Private, // opencl_private
68 Generic, // opencl_generic
69 Global, // opencl_global_device
70 Global, // opencl_global_host
71 Global, // cuda_device
72 Constant, // cuda_constant
73 Local, // cuda_shared
74 // SYCL address space values for this map are dummy
75 Generic, // sycl_global
76 Generic, // sycl_global_device
77 Generic, // sycl_global_host
78 Generic, // sycl_local
79 Generic, // sycl_private
80 Generic, // ptr32_sptr
81 Generic, // ptr32_uptr
82 Generic, // ptr64
83 Generic, // hlsl_groupshared
84
85};
86} // namespace targets
87} // namespace clang
88
89static constexpr Builtin::Info BuiltinInfo[] = {
90#define BUILTIN(ID, TYPE, ATTRS) \
91 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
92#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
93 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
94#include "clang/Basic/BuiltinsAMDGPU.def"
95};
96
97const char *const AMDGPUTargetInfo::GCCRegNames[] = {
98 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
99 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
100 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
101 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
102 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
103 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
104 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
105 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
106 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
107 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
108 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
109 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
110 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
111 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
112 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
113 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
114 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
115 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
116 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
117 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
118 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
119 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
120 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
121 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
122 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
123 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
124 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
125 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
126 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
127 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
128 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
129 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
130 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
131 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
132 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
133 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
134 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
135 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
136 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
137 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
138 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
139 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
140 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
141 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
142 "flat_scratch_lo", "flat_scratch_hi",
143 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
144 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
145 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
146 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
147 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
148 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
149 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
150 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
151 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
152 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
153 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
154 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
155 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
156 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
157 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
158 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
159 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
160 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
161 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
162 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
163 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
164 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
165 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
166 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
167 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
168 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
169 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
170 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
171 "a252", "a253", "a254", "a255"
172};
173
175 return llvm::ArrayRef(GCCRegNames);
176}
177
179 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
180 const std::vector<std::string> &FeatureVec) const {
181
182 using namespace llvm::AMDGPU;
183 fillAMDGPUFeatureMap(CPU, getTriple(), Features);
184 if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
185 return false;
186
187 // TODO: Should move this logic into TargetParser
188 std::string ErrorMsg;
189 if (!insertWaveSizeFeature(CPU, getTriple(), Features, ErrorMsg)) {
190 Diags.Report(diag::err_invalid_feature_combination) << ErrorMsg;
191 return false;
192 }
193
194 return true;
195}
196
198 SmallVectorImpl<StringRef> &Values) const {
199 if (isAMDGCN(getTriple()))
200 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
201 else
202 llvm::AMDGPU::fillValidArchListR600(Values);
203}
204
205void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
206 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
207}
208
209AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
210 const TargetOptions &Opts)
211 : TargetInfo(Triple),
212 GPUKind(isAMDGCN(Triple) ?
213 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
214 llvm::AMDGPU::parseArchR600(Opts.CPU)),
215 GPUFeatures(isAMDGCN(Triple) ?
216 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
217 llvm::AMDGPU::getArchAttrR600(GPUKind)) {
220
221 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
222 !isAMDGCN(Triple));
224
225 if (isAMDGCN(Triple)) {
226 // __bf16 is always available as a load/store only type on AMDGCN.
228 BFloat16Format = &llvm::APFloat::BFloat();
229 }
230
231 HasLegalHalfType = true;
232 HasFloat16 = true;
233 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
235
236 // Set pointer width and alignment for the generic address space.
238 if (getMaxPointerWidth() == 64) {
239 LongWidth = LongAlign = 64;
243 }
244
246 CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
247}
248
250 TargetInfo::adjust(Diags, Opts);
251 // ToDo: There are still a few places using default address space as private
252 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
253 // can be removed from the following line.
254 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
255 !isAMDGCN(getTriple()));
256}
257
261}
262
264 MacroBuilder &Builder) const {
265 Builder.defineMacro("__AMD__");
266 Builder.defineMacro("__AMDGPU__");
267
268 if (isAMDGCN(getTriple()))
269 Builder.defineMacro("__AMDGCN__");
270 else
271 Builder.defineMacro("__R600__");
272
273 if (GPUKind != llvm::AMDGPU::GK_NONE) {
274 StringRef CanonName = isAMDGCN(getTriple()) ?
275 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
276 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
277 // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
278 if (isAMDGCN(getTriple())) {
279 assert(CanonName.startswith("gfx") && "Invalid amdgcn canonical name");
280 Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) +
281 Twine("__"));
282 }
283 if (isAMDGCN(getTriple())) {
284 Builder.defineMacro("__amdgcn_processor__",
285 Twine("\"") + Twine(CanonName) + Twine("\""));
286 Builder.defineMacro("__amdgcn_target_id__",
287 Twine("\"") + Twine(*getTargetID()) + Twine("\""));
288 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
289 auto Loc = OffloadArchFeatures.find(F);
290 if (Loc != OffloadArchFeatures.end()) {
291 std::string NewF = F.str();
292 std::replace(NewF.begin(), NewF.end(), '-', '_');
293 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
294 Twine("__"),
295 Loc->second ? "1" : "0");
296 }
297 }
298 }
299 }
300
302 Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
303
304 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
305 // removed in the near future.
306 if (hasFMAF())
307 Builder.defineMacro("__HAS_FMAF__");
308 if (hasFastFMAF())
309 Builder.defineMacro("FP_FAST_FMAF");
310 if (hasLDEXPF())
311 Builder.defineMacro("__HAS_LDEXPF__");
312 if (hasFP64())
313 Builder.defineMacro("__HAS_FP64__");
314 if (hasFastFMA())
315 Builder.defineMacro("FP_FAST_FMA");
316
317 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
318 Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));
319}
320
322 assert(HalfFormat == Aux->HalfFormat);
323 assert(FloatFormat == Aux->FloatFormat);
324 assert(DoubleFormat == Aux->DoubleFormat);
325
326 // On x86_64 long double is 80-bit extended precision format, which is
327 // not supported by AMDGPU. 128-bit floating point format is also not
328 // supported by AMDGPU. Therefore keep its own format for these two types.
329 auto SaveLongDoubleFormat = LongDoubleFormat;
330 auto SaveFloat128Format = Float128Format;
331 auto SaveLongDoubleWidth = LongDoubleWidth;
332 auto SaveLongDoubleAlign = LongDoubleAlign;
333 copyAuxTarget(Aux);
334 LongDoubleFormat = SaveLongDoubleFormat;
335 Float128Format = SaveFloat128Format;
336 LongDoubleWidth = SaveLongDoubleWidth;
337 LongDoubleAlign = SaveLongDoubleAlign;
338 // For certain builtin types support on the host target, claim they are
339 // support to pass the compilation of the host code during the device-side
340 // compilation.
341 // FIXME: As the side effect, we also accept `__float128` uses in the device
342 // code. To rejct these builtin types supported in the host target but not in
343 // the device target, one approach would support `device_builtin` attribute
344 // so that we could tell the device builtin types from the host ones. The
345 // also solves the different representations of the same builtin type, such
346 // as `size_t` in the MSVC environment.
347 if (Aux->hasFloat128Type()) {
348 HasFloat128 = true;
350 }
351}
Defines the Diagnostic-related interfaces.
static constexpr Builtin::Info BuiltinInfo[]
Definition: AMDGPU.cpp:89
static constexpr Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:32
Defines enum values for all the target-independent builtin functions.
Defines the clang::LangOptions interface.
Defines the clang::MacroBuilder utility class.
Enumerates target-specific builtins in their own namespaces within namespace clang.
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:192
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1542
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:82
Exposes information about the current target.
Definition: TargetInfo.h:206
void copyAuxTarget(const TargetInfo *Aux)
Copy type and layout related info.
Definition: TargetInfo.cpp:988
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:1197
const LangASMap * AddrSpaceMap
Definition: TargetInfo.h:236
void resetDataLayout(StringRef DL, const char *UserLabelPrefix="")
Definition: TargetInfo.cpp:189
unsigned char MaxAtomicPromoteWidth
Definition: TargetInfo.h:230
bool UseAddrSpaceMapMangling
Specify if mangling based on address space map should be used or not for language specific address sp...
Definition: TargetInfo.h:339
virtual void adjust(DiagnosticsEngine &Diags, LangOptions &Opts)
Set forced language options.
Definition: TargetInfo.cpp:391
virtual bool hasFloat128Type() const
Determine whether the __float128 type is supported on this target.
Definition: TargetInfo.h:648
virtual bool initFeatureMap(llvm::StringMap< bool > &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector< std::string > &FeatureVec) const
Initialize the map with the default set of target features for the CPU this should include all legal ...
Definition: TargetInfo.cpp:521
unsigned char MaxAtomicInlineWidth
Definition: TargetInfo.h:230
unsigned AllowAMDGPUUnsafeFPAtomics
Definition: TargetInfo.h:253
Options for controlling the target.
Definition: TargetOptions.h:26
bool AllowAMDGPUUnsafeFPAtomics
If enabled, allow AMDGPU unsafe floating point atomics.
Definition: TargetOptions.h:79
void setAuxTarget(const TargetInfo *Aux) override
Definition: AMDGPU.cpp:321
ArrayRef< const char * > getGCCRegNames() const override
Definition: AMDGPU.cpp:174
AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
Definition: AMDGPU.cpp:209
uint64_t getPointerWidthV(LangAS AS) const override
Definition: AMDGPU.h:101
void fillValidCPUList(SmallVectorImpl< StringRef > &Values) const override
Fill a SmallVectorImpl with the valid values to setCPU.
Definition: AMDGPU.cpp:197
bool initFeatureMap(llvm::StringMap< bool > &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector< std::string > &FeatureVec) const override
Initialize the map with the default set of target features for the CPU this should include all legal ...
Definition: AMDGPU.cpp:178
void setAddressSpaceMap(bool DefaultIsPrivate)
Definition: AMDGPU.cpp:205
void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override
Set forced language options.
Definition: AMDGPU.cpp:249
std::optional< std::string > getTargetID() const override
Returns the target ID if supported.
Definition: AMDGPU.h:462
void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override
===-— Other target property query methods -----------------------—===//
Definition: AMDGPU.cpp:263
uint64_t getMaxPointerWidth() const override
Return the maximum width of pointers on this target.
Definition: AMDGPU.h:116
ArrayRef< Builtin::Info > getTargetBuiltins() const override
Return information about target-specific builtins for the current primary target, and info about whic...
Definition: AMDGPU.cpp:258
static const char *const DataLayoutStringR600
Definition: AMDGPU.cpp:29
static const char *const DataLayoutStringAMDGCN
Definition: AMDGPU.cpp:33
unsigned[(unsigned) LangAS::FirstTargetAddressSpace] LangASMap
The type of a lookup table which maps from language-specific address spaces to target-specific ones.
Definition: AddressSpaces.h:73
llvm::SmallVector< llvm::StringRef, 4 > getAllPossibleTargetIDFeatures(const llvm::Triple &T, llvm::StringRef Processor)
Get all feature strings that can be used in target ID for Processor.
Definition: TargetID.cpp:38
YAML serialization mapping.
Definition: Dominators.h:30
const llvm::fltSemantics * DoubleFormat
Definition: TargetInfo.h:132
const llvm::fltSemantics * LongDoubleFormat
Definition: TargetInfo.h:132
const llvm::fltSemantics * Float128Format
Definition: TargetInfo.h:132
const llvm::fltSemantics * FloatFormat
Definition: TargetInfo.h:131
const llvm::fltSemantics * HalfFormat
Definition: TargetInfo.h:131
const llvm::fltSemantics * BFloat16Format
Definition: TargetInfo.h:131