clang  8.0.0svn
AMDGPU.cpp
Go to the documentation of this file.
1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements AMDGPU TargetInfo objects.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "clang/Basic/Builtins.h"
20 #include "llvm/ADT/StringSwitch.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32  "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
34 
35 static const char *const DataLayoutStringAMDGCN =
36  "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
39 
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41  Generic, // Default
42  Global, // opencl_global
43  Local, // opencl_local
44  Constant, // opencl_constant
45  Private, // opencl_private
46  Generic, // opencl_generic
47  Global, // cuda_device
48  Constant, // cuda_constant
49  Local // cuda_shared
50 };
51 
52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
53  Private, // Default
54  Global, // opencl_global
55  Local, // opencl_local
56  Constant, // opencl_constant
57  Private, // opencl_private
58  Generic, // opencl_generic
59  Global, // cuda_device
60  Constant, // cuda_constant
61  Local // cuda_shared
62 };
63 } // namespace targets
64 } // namespace clang
65 
66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
67 #define BUILTIN(ID, TYPE, ATTRS) \
68  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
70  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
71 #include "clang/Basic/BuiltinsAMDGPU.def"
72 };
73 
74 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
75  "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
76  "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
77  "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
78  "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
79  "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
80  "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
81  "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
82  "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
83  "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
84  "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
85  "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
86  "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
87  "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
88  "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
89  "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
90  "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
91  "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
92  "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
93  "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
94  "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
95  "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
96  "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
97  "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
98  "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
99  "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
100  "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
101  "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
102  "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
103  "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
104  "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
105  "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
106  "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
107  "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
108  "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
109  "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
110  "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
111  "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
112  "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
113  "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
114  "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
115  "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
116  "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
117  "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
118  "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
119  "flat_scratch_lo", "flat_scratch_hi"
120 };
121 
123  return llvm::makeArrayRef(GCCRegNames);
124 }
125 
127  llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
128  const std::vector<std::string> &FeatureVec) const {
129 
130  using namespace llvm::AMDGPU;
131 
132  // XXX - What does the member GPU mean if device name string passed here?
133  if (isAMDGCN(getTriple())) {
134  if (CPU.empty())
135  CPU = "gfx600";
136 
137  switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
138  case GK_GFX906:
139  Features["dl-insts"] = true;
140  LLVM_FALLTHROUGH;
141  case GK_GFX904:
142  case GK_GFX902:
143  case GK_GFX900:
144  Features["gfx9-insts"] = true;
145  LLVM_FALLTHROUGH;
146  case GK_GFX810:
147  case GK_GFX803:
148  case GK_GFX802:
149  case GK_GFX801:
150  Features["vi-insts"] = true;
151  Features["16-bit-insts"] = true;
152  Features["dpp"] = true;
153  Features["s-memrealtime"] = true;
154  LLVM_FALLTHROUGH;
155  case GK_GFX704:
156  case GK_GFX703:
157  case GK_GFX702:
158  case GK_GFX701:
159  case GK_GFX700:
160  Features["ci-insts"] = true;
161  LLVM_FALLTHROUGH;
162  case GK_GFX601:
163  case GK_GFX600:
164  break;
165  case GK_NONE:
166  return false;
167  default:
168  llvm_unreachable("Unhandled GPU!");
169  }
170  } else {
171  if (CPU.empty())
172  CPU = "r600";
173 
174  switch (llvm::AMDGPU::parseArchR600(CPU)) {
175  case GK_CAYMAN:
176  case GK_CYPRESS:
177  case GK_RV770:
178  case GK_RV670:
179  // TODO: Add fp64 when implemented.
180  break;
181  case GK_TURKS:
182  case GK_CAICOS:
183  case GK_BARTS:
184  case GK_SUMO:
185  case GK_REDWOOD:
186  case GK_JUNIPER:
187  case GK_CEDAR:
188  case GK_RV730:
189  case GK_RV710:
190  case GK_RS880:
191  case GK_R630:
192  case GK_R600:
193  break;
194  default:
195  llvm_unreachable("Unhandled GPU!");
196  }
197  }
198 
199  return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
200 }
201 
203  TargetOptions &TargetOpts) const {
204  bool hasFP32Denormals = false;
205  bool hasFP64Denormals = false;
206 
207  for (auto &I : TargetOpts.FeaturesAsWritten) {
208  if (I == "+fp32-denormals" || I == "-fp32-denormals")
209  hasFP32Denormals = true;
210  if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
211  hasFP64Denormals = true;
212  }
213  if (!hasFP32Denormals)
214  TargetOpts.Features.push_back(
215  (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
216  ? '+' : '-') + Twine("fp32-denormals"))
217  .str());
218  // Always do not flush fp64 or fp16 denorms.
219  if (!hasFP64Denormals && hasFP64())
220  TargetOpts.Features.push_back("+fp64-fp16-denormals");
221 }
222 
224  SmallVectorImpl<StringRef> &Values) const {
225  if (isAMDGCN(getTriple()))
226  llvm::AMDGPU::fillValidArchListAMDGCN(Values);
227  else
228  llvm::AMDGPU::fillValidArchListR600(Values);
229 }
230 
231 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
232  AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
233 }
234 
235 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
236  const TargetOptions &Opts)
237  : TargetInfo(Triple),
238  GPUKind(isAMDGCN(Triple) ?
239  llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
240  llvm::AMDGPU::parseArchR600(Opts.CPU)),
241  GPUFeatures(isAMDGCN(Triple) ?
242  llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
243  llvm::AMDGPU::getArchAttrR600(GPUKind)) {
246  assert(DataLayout->getAllocaAddrSpace() == Private);
247 
248  setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
249  !isAMDGCN(Triple));
251 
252  // Set pointer width and alignment for target address space 0.
253  PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
254  if (getMaxPointerWidth() == 64) {
255  LongWidth = LongAlign = 64;
259  }
260 
262 }
263 
265  TargetInfo::adjust(Opts);
266  // ToDo: There are still a few places using default address space as private
267  // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
268  // can be removed from the following line.
269  setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
270  !isAMDGCN(getTriple()));
271 }
272 
274  return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
276 }
277 
279  MacroBuilder &Builder) const {
280  Builder.defineMacro("__AMD__");
281  Builder.defineMacro("__AMDGPU__");
282 
283  if (isAMDGCN(getTriple()))
284  Builder.defineMacro("__AMDGCN__");
285  else
286  Builder.defineMacro("__R600__");
287 
288  if (GPUKind != llvm::AMDGPU::GK_NONE) {
289  StringRef CanonName = isAMDGCN(getTriple()) ?
290  getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
291  Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
292  }
293 
294  // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
295  // removed in the near future.
296  if (hasFMAF())
297  Builder.defineMacro("__HAS_FMAF__");
298  if (hasFastFMAF())
299  Builder.defineMacro("FP_FAST_FMAF");
300  if (hasLDEXPF())
301  Builder.defineMacro("__HAS_LDEXPF__");
302  if (hasFP64())
303  Builder.defineMacro("__HAS_FP64__");
304  if (hasFastFMA())
305  Builder.defineMacro("FP_FAST_FMA");
306 }
bool initFeatureMap(llvm::StringMap< bool > &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector< std::string > &FeatureVec) const override
Initialize the map with the default set of target features for the CPU this should include all legal ...
Definition: AMDGPU.cpp:126
Defines the clang::MacroBuilder utility class.
virtual void adjust(LangOptions &Opts)
Set forced language options.
Definition: TargetInfo.cpp:324
IntType IntPtrType
Definition: TargetInfo.h:221
DominatorTree GraphTraits specialization so the DominatorTree can be iterable by generic graph iterat...
Definition: Dominators.h:30
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:949
Options for controlling the target.
Definition: TargetOptions.h:26
void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override
===-— Other target property query methods --------------------——===//
Definition: AMDGPU.cpp:278
void fillValidCPUList(SmallVectorImpl< StringRef > &Values) const override
Fill a SmallVectorImpl with the valid values to setCPU.
Definition: AMDGPU.cpp:223
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:50
unsigned char MaxAtomicPromoteWidth
Definition: TargetInfo.h:107
unsigned char PointerWidth
Definition: TargetInfo.h:67
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:149
ArrayRef< const char * > getGCCRegNames() const override
Definition: AMDGPU.cpp:122
unsigned char LongWidth
Definition: TargetInfo.h:75
void setAddressSpaceMap(bool DefaultIsPrivate)
Definition: AMDGPU.cpp:231
static const char *const GCCRegNames[]
Definition: X86.cpp:44
Exposes information about the current target.
Definition: TargetInfo.h:54
virtual bool initFeatureMap(llvm::StringMap< bool > &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector< std::string > &FeatureVec) const
Initialize the map with the default set of target features for the CPU this should include all legal ...
Definition: TargetInfo.cpp:385
Defines the clang::LangOptions interface.
void adjustTargetOptions(const CodeGenOptions &CGOpts, TargetOptions &TargetOpts) const override
Adjust target options based on codegen options.
Definition: AMDGPU.cpp:202
void resetDataLayout(StringRef DL)
Definition: TargetInfo.h:134
IntType PtrDiffType
Definition: TargetInfo.h:221
Enumerates target-specific builtins in their own namespaces within namespace clang.
std::vector< std::string > Features
The list of target specific features to enable or disable – this should be a list of strings startin...
Definition: TargetOptions.h:55
static const char *const DataLayoutStringR600
Definition: AMDGPU.cpp:31
std::vector< std::string > FeaturesAsWritten
The list of target specific features to enable or disable, as written on the command line...
Definition: TargetOptions.h:51
unsigned char PointerAlign
Definition: TargetInfo.h:67
AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
Definition: AMDGPU.cpp:235
Dataflow Directional Tag Classes.
static const char *const DataLayoutStringAMDGCN
Definition: AMDGPU.cpp:35
unsigned[(unsigned) LangAS::FirstTargetAddressSpace] LangASMap
The type of a lookup table which maps from language-specific address spaces to target-specific ones...
Definition: AddressSpaces.h:54
unsigned char LongAlign
Definition: TargetInfo.h:75
ArrayRef< Builtin::Info > getTargetBuiltins() const override
Return information about target-specific builtins for the current primary target, and info about whic...
Definition: AMDGPU.cpp:273
CodeGenOptions - Track various options which control how the code is optimized and passed to the back...
void adjust(LangOptions &Opts) override
Set forced language options.
Definition: AMDGPU.cpp:264
unsigned char MaxAtomicInlineWidth
Definition: TargetInfo.h:107
void defineMacro(const Twine &Name, const Twine &Value="1")
Append a #define line for macro of the form "\#define Name Value\n".
Definition: MacroBuilder.h:30
std::unique_ptr< llvm::DataLayout > DataLayout
Definition: TargetInfo.h:112
Defines enum values for all the target-independent builtin functions.
uint64_t getMaxPointerWidth() const override
Return the maximum width of pointers on this target.
Definition: AMDGPU.h:104
bool UseAddrSpaceMapMangling
Specify if mangling based on address space map should be used or not for language specific address sp...
Definition: TargetInfo.h:255