clang  9.0.0svn
AMDGPU.cpp
Go to the documentation of this file.
1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 
21 using namespace clang;
22 using namespace clang::targets;
23 
24 namespace clang {
25 namespace targets {
26 
27 // If you edit the description strings, make sure you update
28 // getPointerWidthV().
29 
30 static const char *const DataLayoutStringR600 =
31  "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
33 
34 static const char *const DataLayoutStringAMDGCN =
35  "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
37  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
38  "-ni:7";
39 
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41  Generic, // Default
42  Global, // opencl_global
43  Local, // opencl_local
44  Constant, // opencl_constant
45  Private, // opencl_private
46  Generic, // opencl_generic
47  Global, // cuda_device
48  Constant, // cuda_constant
49  Local // cuda_shared
50 };
51 
52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
53  Private, // Default
54  Global, // opencl_global
55  Local, // opencl_local
56  Constant, // opencl_constant
57  Private, // opencl_private
58  Generic, // opencl_generic
59  Global, // cuda_device
60  Constant, // cuda_constant
61  Local // cuda_shared
62 };
63 } // namespace targets
64 } // namespace clang
65 
66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
67 #define BUILTIN(ID, TYPE, ATTRS) \
68  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
70  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
71 #include "clang/Basic/BuiltinsAMDGPU.def"
72 };
73 
74 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
75  "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
76  "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
77  "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
78  "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
79  "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
80  "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
81  "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
82  "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
83  "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
84  "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
85  "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
86  "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
87  "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
88  "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
89  "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
90  "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
91  "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
92  "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
93  "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
94  "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
95  "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
96  "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
97  "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
98  "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
99  "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
100  "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
101  "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
102  "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
103  "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
104  "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
105  "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
106  "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
107  "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
108  "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
109  "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
110  "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
111  "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
112  "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
113  "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
114  "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
115  "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
116  "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
117  "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
118  "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
119  "flat_scratch_lo", "flat_scratch_hi"
120 };
121 
123  return llvm::makeArrayRef(GCCRegNames);
124 }
125 
127  llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
128  const std::vector<std::string> &FeatureVec) const {
129 
130  using namespace llvm::AMDGPU;
131 
132  // XXX - What does the member GPU mean if device name string passed here?
133  if (isAMDGCN(getTriple())) {
134  if (CPU.empty())
135  CPU = "gfx600";
136 
137  switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
138  case GK_GFX906:
139  Features["dl-insts"] = true;
140  Features["dot1-insts"] = true;
141  Features["dot2-insts"] = true;
142  LLVM_FALLTHROUGH;
143  case GK_GFX909:
144  case GK_GFX904:
145  case GK_GFX902:
146  case GK_GFX900:
147  Features["gfx9-insts"] = true;
148  LLVM_FALLTHROUGH;
149  case GK_GFX810:
150  case GK_GFX803:
151  case GK_GFX802:
152  case GK_GFX801:
153  Features["vi-insts"] = true;
154  Features["16-bit-insts"] = true;
155  Features["dpp"] = true;
156  Features["s-memrealtime"] = true;
157  LLVM_FALLTHROUGH;
158  case GK_GFX704:
159  case GK_GFX703:
160  case GK_GFX702:
161  case GK_GFX701:
162  case GK_GFX700:
163  Features["ci-insts"] = true;
164  LLVM_FALLTHROUGH;
165  case GK_GFX601:
166  case GK_GFX600:
167  break;
168  case GK_NONE:
169  return false;
170  default:
171  llvm_unreachable("Unhandled GPU!");
172  }
173  } else {
174  if (CPU.empty())
175  CPU = "r600";
176 
177  switch (llvm::AMDGPU::parseArchR600(CPU)) {
178  case GK_CAYMAN:
179  case GK_CYPRESS:
180  case GK_RV770:
181  case GK_RV670:
182  // TODO: Add fp64 when implemented.
183  break;
184  case GK_TURKS:
185  case GK_CAICOS:
186  case GK_BARTS:
187  case GK_SUMO:
188  case GK_REDWOOD:
189  case GK_JUNIPER:
190  case GK_CEDAR:
191  case GK_RV730:
192  case GK_RV710:
193  case GK_RS880:
194  case GK_R630:
195  case GK_R600:
196  break;
197  default:
198  llvm_unreachable("Unhandled GPU!");
199  }
200  }
201 
202  return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
203 }
204 
206  TargetOptions &TargetOpts) const {
207  bool hasFP32Denormals = false;
208  bool hasFP64Denormals = false;
209 
210  for (auto &I : TargetOpts.FeaturesAsWritten) {
211  if (I == "+fp32-denormals" || I == "-fp32-denormals")
212  hasFP32Denormals = true;
213  if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
214  hasFP64Denormals = true;
215  }
216  if (!hasFP32Denormals)
217  TargetOpts.Features.push_back(
218  (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
219  ? '+' : '-') + Twine("fp32-denormals"))
220  .str());
221  // Always do not flush fp64 or fp16 denorms.
222  if (!hasFP64Denormals && hasFP64())
223  TargetOpts.Features.push_back("+fp64-fp16-denormals");
224 }
225 
227  SmallVectorImpl<StringRef> &Values) const {
228  if (isAMDGCN(getTriple()))
229  llvm::AMDGPU::fillValidArchListAMDGCN(Values);
230  else
231  llvm::AMDGPU::fillValidArchListR600(Values);
232 }
233 
234 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
235  AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
236 }
237 
238 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
239  const TargetOptions &Opts)
240  : TargetInfo(Triple),
241  GPUKind(isAMDGCN(Triple) ?
242  llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
243  llvm::AMDGPU::parseArchR600(Opts.CPU)),
244  GPUFeatures(isAMDGCN(Triple) ?
245  llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
246  llvm::AMDGPU::getArchAttrR600(GPUKind)) {
249  assert(DataLayout->getAllocaAddrSpace() == Private);
250 
251  setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
252  !isAMDGCN(Triple));
254 
255  // Set pointer width and alignment for target address space 0.
256  PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
257  if (getMaxPointerWidth() == 64) {
258  LongWidth = LongAlign = 64;
262  }
263 
265 }
266 
268  TargetInfo::adjust(Opts);
269  // ToDo: There are still a few places using default address space as private
270  // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
271  // can be removed from the following line.
272  setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
273  !isAMDGCN(getTriple()));
274 }
275 
277  return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
279 }
280 
282  MacroBuilder &Builder) const {
283  Builder.defineMacro("__AMD__");
284  Builder.defineMacro("__AMDGPU__");
285 
286  if (isAMDGCN(getTriple()))
287  Builder.defineMacro("__AMDGCN__");
288  else
289  Builder.defineMacro("__R600__");
290 
291  if (GPUKind != llvm::AMDGPU::GK_NONE) {
292  StringRef CanonName = isAMDGCN(getTriple()) ?
293  getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
294  Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
295  }
296 
297  // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
298  // removed in the near future.
299  if (hasFMAF())
300  Builder.defineMacro("__HAS_FMAF__");
301  if (hasFastFMAF())
302  Builder.defineMacro("FP_FAST_FMAF");
303  if (hasLDEXPF())
304  Builder.defineMacro("__HAS_LDEXPF__");
305  if (hasFP64())
306  Builder.defineMacro("__HAS_FP64__");
307  if (hasFastFMA())
308  Builder.defineMacro("FP_FAST_FMA");
309 }
310 
312  assert(HalfFormat == Aux->HalfFormat);
313  assert(FloatFormat == Aux->FloatFormat);
314  assert(DoubleFormat == Aux->DoubleFormat);
315 
316  // On x86_64 long double is 80-bit extended precision format, which is
317  // not supported by AMDGPU. 128-bit floating point format is also not
318  // supported by AMDGPU. Therefore keep its own format for these two types.
319  auto SaveLongDoubleFormat = LongDoubleFormat;
320  auto SaveFloat128Format = Float128Format;
321  copyAuxTarget(Aux);
322  LongDoubleFormat = SaveLongDoubleFormat;
323  Float128Format = SaveFloat128Format;
324 }
bool initFeatureMap(llvm::StringMap< bool > &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector< std::string > &FeatureVec) const override
Initialize the map with the default set of target features for the CPU this should include all legal ...
Definition: AMDGPU.cpp:126
Defines the clang::MacroBuilder utility class.
virtual void adjust(LangOptions &Opts)
Set forced language options.
Definition: TargetInfo.cpp:324
const llvm::fltSemantics * FloatFormat
Definition: TargetInfo.h:100
DominatorTree GraphTraits specialization so the DominatorTree can be iterable by generic graph iterat...
Definition: Dominators.h:29
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:966
Options for controlling the target.
Definition: TargetOptions.h:26
void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override
===-— Other target property query methods --------------------——===//
Definition: AMDGPU.cpp:281
void fillValidCPUList(SmallVectorImpl< StringRef > &Values) const override
Fill a SmallVectorImpl with the valid values to setCPU.
Definition: AMDGPU.cpp:226
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:49
unsigned char MaxAtomicPromoteWidth
Definition: TargetInfo.h:177
const llvm::fltSemantics * HalfFormat
Definition: TargetInfo.h:100
const llvm::fltSemantics * Float128Format
Definition: TargetInfo.h:100
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:148
ArrayRef< const char * > getGCCRegNames() const override
Definition: AMDGPU.cpp:122
void setAddressSpaceMap(bool DefaultIsPrivate)
Definition: AMDGPU.cpp:234
static const char *const GCCRegNames[]
Definition: X86.cpp:43
return Out str()
Exposes information about the current target.
Definition: TargetInfo.h:161
virtual bool initFeatureMap(llvm::StringMap< bool > &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector< std::string > &FeatureVec) const
Initialize the map with the default set of target features for the CPU this should include all legal ...
Definition: TargetInfo.cpp:385
Defines the clang::LangOptions interface.
void adjustTargetOptions(const CodeGenOptions &CGOpts, TargetOptions &TargetOpts) const override
Adjust target options based on codegen options.
Definition: AMDGPU.cpp:205
void resetDataLayout(StringRef DL)
Definition: TargetInfo.h:199
void setAuxTarget(const TargetInfo *Aux) override
Definition: AMDGPU.cpp:311
const llvm::fltSemantics * LongDoubleFormat
Definition: TargetInfo.h:100
Enumerates target-specific builtins in their own namespaces within namespace clang.
std::vector< std::string > Features
The list of target specific features to enable or disable – this should be a list of strings startin...
Definition: TargetOptions.h:55
static const char *const DataLayoutStringR600
Definition: AMDGPU.cpp:30
std::vector< std::string > FeaturesAsWritten
The list of target specific features to enable or disable, as written on the command line...
Definition: TargetOptions.h:51
AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
Definition: AMDGPU.cpp:238
Dataflow Directional Tag Classes.
static const char *const DataLayoutStringAMDGCN
Definition: AMDGPU.cpp:34
unsigned[(unsigned) LangAS::FirstTargetAddressSpace] LangASMap
The type of a lookup table which maps from language-specific address spaces to target-specific ones...
Definition: AddressSpaces.h:53
ArrayRef< Builtin::Info > getTargetBuiltins() const override
Return information about target-specific builtins for the current primary target, and info about whic...
Definition: AMDGPU.cpp:276
CodeGenOptions - Track various options which control how the code is optimized and passed to the back...
void copyAuxTarget(const TargetInfo *Aux)
Copy type and layout related info.
Definition: TargetInfo.cpp:800
void adjust(LangOptions &Opts) override
Set forced language options.
Definition: AMDGPU.cpp:267
const llvm::fltSemantics * DoubleFormat
Definition: TargetInfo.h:100
unsigned char MaxAtomicInlineWidth
Definition: TargetInfo.h:177
void defineMacro(const Twine &Name, const Twine &Value="1")
Append a #define line for macro of the form "\#define Name Value\n".
Definition: MacroBuilder.h:29
std::unique_ptr< llvm::DataLayout > DataLayout
Definition: TargetInfo.h:179
Defines enum values for all the target-independent builtin functions.
uint64_t getMaxPointerWidth() const override
Return the maximum width of pointers on this target.
Definition: AMDGPU.h:103
bool UseAddrSpaceMapMangling
Specify if mangling based on address space map should be used or not for language specific address sp...
Definition: TargetInfo.h:265