clang  8.0.0svn
AMDGPU.cpp
Go to the documentation of this file.
1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements AMDGPU TargetInfo objects.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "clang/Basic/Builtins.h"
20 #include "llvm/ADT/StringSwitch.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32  "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
34 
35 static const char *const DataLayoutStringAMDGCN =
36  "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
39 
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41  Generic, // Default
42  Global, // opencl_global
43  Local, // opencl_local
44  Constant, // opencl_constant
45  Private, // opencl_private
46  Generic, // opencl_generic
47  Global, // cuda_device
48  Constant, // cuda_constant
49  Local // cuda_shared
50 };
51 
52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
53  Private, // Default
54  Global, // opencl_global
55  Local, // opencl_local
56  Constant, // opencl_constant
57  Private, // opencl_private
58  Generic, // opencl_generic
59  Global, // cuda_device
60  Constant, // cuda_constant
61  Local // cuda_shared
62 };
63 } // namespace targets
64 } // namespace clang
65 
66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
67 #define BUILTIN(ID, TYPE, ATTRS) \
68  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
70  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
71 #include "clang/Basic/BuiltinsAMDGPU.def"
72 };
73 
74 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
75  "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
76  "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
77  "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
78  "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
79  "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
80  "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
81  "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
82  "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
83  "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
84  "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
85  "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
86  "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
87  "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
88  "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
89  "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
90  "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
91  "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
92  "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
93  "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
94  "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
95  "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
96  "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
97  "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
98  "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
99  "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
100  "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
101  "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
102  "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
103  "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
104  "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
105  "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
106  "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
107  "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
108  "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
109  "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
110  "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
111  "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
112  "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
113  "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
114  "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
115  "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
116  "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
117  "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
118  "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
119  "flat_scratch_lo", "flat_scratch_hi"
120 };
121 
123  return llvm::makeArrayRef(GCCRegNames);
124 }
125 
127  llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
128  const std::vector<std::string> &FeatureVec) const {
129 
130  using namespace llvm::AMDGPU;
131 
132  // XXX - What does the member GPU mean if device name string passed here?
133  if (isAMDGCN(getTriple())) {
134  if (CPU.empty())
135  CPU = "gfx600";
136 
137  switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
138  case GK_GFX906:
139  Features["dl-insts"] = true;
140  LLVM_FALLTHROUGH;
141  case GK_GFX909:
142  case GK_GFX904:
143  case GK_GFX902:
144  case GK_GFX900:
145  Features["gfx9-insts"] = true;
146  LLVM_FALLTHROUGH;
147  case GK_GFX810:
148  case GK_GFX803:
149  case GK_GFX802:
150  case GK_GFX801:
151  Features["vi-insts"] = true;
152  Features["16-bit-insts"] = true;
153  Features["dpp"] = true;
154  Features["s-memrealtime"] = true;
155  LLVM_FALLTHROUGH;
156  case GK_GFX704:
157  case GK_GFX703:
158  case GK_GFX702:
159  case GK_GFX701:
160  case GK_GFX700:
161  Features["ci-insts"] = true;
162  LLVM_FALLTHROUGH;
163  case GK_GFX601:
164  case GK_GFX600:
165  break;
166  case GK_NONE:
167  return false;
168  default:
169  llvm_unreachable("Unhandled GPU!");
170  }
171  } else {
172  if (CPU.empty())
173  CPU = "r600";
174 
175  switch (llvm::AMDGPU::parseArchR600(CPU)) {
176  case GK_CAYMAN:
177  case GK_CYPRESS:
178  case GK_RV770:
179  case GK_RV670:
180  // TODO: Add fp64 when implemented.
181  break;
182  case GK_TURKS:
183  case GK_CAICOS:
184  case GK_BARTS:
185  case GK_SUMO:
186  case GK_REDWOOD:
187  case GK_JUNIPER:
188  case GK_CEDAR:
189  case GK_RV730:
190  case GK_RV710:
191  case GK_RS880:
192  case GK_R630:
193  case GK_R600:
194  break;
195  default:
196  llvm_unreachable("Unhandled GPU!");
197  }
198  }
199 
200  return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
201 }
202 
204  TargetOptions &TargetOpts) const {
205  bool hasFP32Denormals = false;
206  bool hasFP64Denormals = false;
207 
208  for (auto &I : TargetOpts.FeaturesAsWritten) {
209  if (I == "+fp32-denormals" || I == "-fp32-denormals")
210  hasFP32Denormals = true;
211  if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
212  hasFP64Denormals = true;
213  }
214  if (!hasFP32Denormals)
215  TargetOpts.Features.push_back(
216  (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
217  ? '+' : '-') + Twine("fp32-denormals"))
218  .str());
219  // Always do not flush fp64 or fp16 denorms.
220  if (!hasFP64Denormals && hasFP64())
221  TargetOpts.Features.push_back("+fp64-fp16-denormals");
222 }
223 
225  SmallVectorImpl<StringRef> &Values) const {
226  if (isAMDGCN(getTriple()))
227  llvm::AMDGPU::fillValidArchListAMDGCN(Values);
228  else
229  llvm::AMDGPU::fillValidArchListR600(Values);
230 }
231 
232 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
233  AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
234 }
235 
236 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
237  const TargetOptions &Opts)
238  : TargetInfo(Triple),
239  GPUKind(isAMDGCN(Triple) ?
240  llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
241  llvm::AMDGPU::parseArchR600(Opts.CPU)),
242  GPUFeatures(isAMDGCN(Triple) ?
243  llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
244  llvm::AMDGPU::getArchAttrR600(GPUKind)) {
247  assert(DataLayout->getAllocaAddrSpace() == Private);
248 
249  setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
250  !isAMDGCN(Triple));
252 
253  // Set pointer width and alignment for target address space 0.
254  PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
255  if (getMaxPointerWidth() == 64) {
256  LongWidth = LongAlign = 64;
260  }
261 
263 }
264 
266  TargetInfo::adjust(Opts);
267  // ToDo: There are still a few places using default address space as private
268  // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
269  // can be removed from the following line.
270  setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
271  !isAMDGCN(getTriple()));
272 }
273 
275  return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
277 }
278 
280  MacroBuilder &Builder) const {
281  Builder.defineMacro("__AMD__");
282  Builder.defineMacro("__AMDGPU__");
283 
284  if (isAMDGCN(getTriple()))
285  Builder.defineMacro("__AMDGCN__");
286  else
287  Builder.defineMacro("__R600__");
288 
289  if (GPUKind != llvm::AMDGPU::GK_NONE) {
290  StringRef CanonName = isAMDGCN(getTriple()) ?
291  getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
292  Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
293  }
294 
295  // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
296  // removed in the near future.
297  if (hasFMAF())
298  Builder.defineMacro("__HAS_FMAF__");
299  if (hasFastFMAF())
300  Builder.defineMacro("FP_FAST_FMAF");
301  if (hasLDEXPF())
302  Builder.defineMacro("__HAS_LDEXPF__");
303  if (hasFP64())
304  Builder.defineMacro("__HAS_FP64__");
305  if (hasFastFMA())
306  Builder.defineMacro("FP_FAST_FMA");
307 }
bool initFeatureMap(llvm::StringMap< bool > &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector< std::string > &FeatureVec) const override
Initialize the map with the default set of target features for the CPU this should include all legal ...
Definition: AMDGPU.cpp:126
Defines the clang::MacroBuilder utility class.
virtual void adjust(LangOptions &Opts)
Set forced language options.
Definition: TargetInfo.cpp:324
IntType IntPtrType
Definition: TargetInfo.h:221
DominatorTree GraphTraits specialization so the DominatorTree can be iterable by generic graph iterat...
Definition: Dominators.h:30
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:949
Options for controlling the target.
Definition: TargetOptions.h:26
void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override
===-— Other target property query methods --------------------——===//
Definition: AMDGPU.cpp:279
void fillValidCPUList(SmallVectorImpl< StringRef > &Values) const override
Fill a SmallVectorImpl with the valid values to setCPU.
Definition: AMDGPU.cpp:224
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:50
unsigned char MaxAtomicPromoteWidth
Definition: TargetInfo.h:107
unsigned char PointerWidth
Definition: TargetInfo.h:67
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:149
ArrayRef< const char * > getGCCRegNames() const override
Definition: AMDGPU.cpp:122
unsigned char LongWidth
Definition: TargetInfo.h:75
void setAddressSpaceMap(bool DefaultIsPrivate)
Definition: AMDGPU.cpp:232
static const char *const GCCRegNames[]
Definition: X86.cpp:44
return Out str()
Exposes information about the current target.
Definition: TargetInfo.h:54
virtual bool initFeatureMap(llvm::StringMap< bool > &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector< std::string > &FeatureVec) const
Initialize the map with the default set of target features for the CPU this should include all legal ...
Definition: TargetInfo.cpp:385
Defines the clang::LangOptions interface.
void adjustTargetOptions(const CodeGenOptions &CGOpts, TargetOptions &TargetOpts) const override
Adjust target options based on codegen options.
Definition: AMDGPU.cpp:203
void resetDataLayout(StringRef DL)
Definition: TargetInfo.h:134
IntType PtrDiffType
Definition: TargetInfo.h:221
Enumerates target-specific builtins in their own namespaces within namespace clang.
std::vector< std::string > Features
The list of target specific features to enable or disable – this should be a list of strings startin...
Definition: TargetOptions.h:55
static const char *const DataLayoutStringR600
Definition: AMDGPU.cpp:31
std::vector< std::string > FeaturesAsWritten
The list of target specific features to enable or disable, as written on the command line...
Definition: TargetOptions.h:51
unsigned char PointerAlign
Definition: TargetInfo.h:67
AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
Definition: AMDGPU.cpp:236
Dataflow Directional Tag Classes.
static const char *const DataLayoutStringAMDGCN
Definition: AMDGPU.cpp:35
unsigned[(unsigned) LangAS::FirstTargetAddressSpace] LangASMap
The type of a lookup table which maps from language-specific address spaces to target-specific ones...
Definition: AddressSpaces.h:54
unsigned char LongAlign
Definition: TargetInfo.h:75
ArrayRef< Builtin::Info > getTargetBuiltins() const override
Return information about target-specific builtins for the current primary target, and info about whic...
Definition: AMDGPU.cpp:274
CodeGenOptions - Track various options which control how the code is optimized and passed to the back...
void adjust(LangOptions &Opts) override
Set forced language options.
Definition: AMDGPU.cpp:265
unsigned char MaxAtomicInlineWidth
Definition: TargetInfo.h:107
void defineMacro(const Twine &Name, const Twine &Value="1")
Append a #define line for macro of the form "\#define Name Value\n".
Definition: MacroBuilder.h:30
std::unique_ptr< llvm::DataLayout > DataLayout
Definition: TargetInfo.h:112
Defines enum values for all the target-independent builtin functions.
uint64_t getMaxPointerWidth() const override
Return the maximum width of pointers on this target.
Definition: AMDGPU.h:104
bool UseAddrSpaceMapMangling
Specify if mangling based on address space map should be used or not for language specific address sp...
Definition: TargetInfo.h:255