clang  16.0.0git
Cuda.cpp
Go to the documentation of this file.
1 //===--- Cuda.cpp - Cuda Tool and ToolChain Implementations -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Cuda.h"
10 #include "CommonArgs.h"
11 #include "clang/Basic/Cuda.h"
12 #include "clang/Config/config.h"
14 #include "clang/Driver/Distro.h"
15 #include "clang/Driver/Driver.h"
17 #include "clang/Driver/InputInfo.h"
18 #include "clang/Driver/Options.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/Option/ArgList.h"
22 #include "llvm/Support/FileSystem.h"
23 #include "llvm/Support/Host.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/Process.h"
26 #include "llvm/Support/Program.h"
27 #include "llvm/Support/TargetParser.h"
28 #include "llvm/Support/VirtualFileSystem.h"
29 #include <system_error>
30 
31 using namespace clang::driver;
32 using namespace clang::driver::toolchains;
33 using namespace clang::driver::tools;
34 using namespace clang;
35 using namespace llvm::opt;
36 
37 namespace {
38 
39 CudaVersion getCudaVersion(uint32_t raw_version) {
40  if (raw_version < 7050)
41  return CudaVersion::CUDA_70;
42  if (raw_version < 8000)
43  return CudaVersion::CUDA_75;
44  if (raw_version < 9000)
45  return CudaVersion::CUDA_80;
46  if (raw_version < 9010)
47  return CudaVersion::CUDA_90;
48  if (raw_version < 9020)
49  return CudaVersion::CUDA_91;
50  if (raw_version < 10000)
51  return CudaVersion::CUDA_92;
52  if (raw_version < 10010)
53  return CudaVersion::CUDA_100;
54  if (raw_version < 10020)
55  return CudaVersion::CUDA_101;
56  if (raw_version < 11000)
57  return CudaVersion::CUDA_102;
58  if (raw_version < 11010)
59  return CudaVersion::CUDA_110;
60  if (raw_version < 11020)
61  return CudaVersion::CUDA_111;
62  if (raw_version < 11030)
63  return CudaVersion::CUDA_112;
64  if (raw_version < 11040)
65  return CudaVersion::CUDA_113;
66  if (raw_version < 11050)
67  return CudaVersion::CUDA_114;
68  if (raw_version < 11060)
69  return CudaVersion::CUDA_115;
70  if (raw_version < 11070)
71  return CudaVersion::CUDA_116;
72  if (raw_version < 11080)
73  return CudaVersion::CUDA_117;
74  if (raw_version < 11090)
75  return CudaVersion::CUDA_118;
76  return CudaVersion::NEW;
77 }
78 
79 CudaVersion parseCudaHFile(llvm::StringRef Input) {
80  // Helper lambda which skips the words if the line starts with them or returns
81  // None otherwise.
82  auto StartsWithWords =
83  [](llvm::StringRef Line,
85  for (StringRef word : words) {
86  if (!Line.consume_front(word))
87  return {};
88  Line = Line.ltrim();
89  }
90  return Line;
91  };
92 
93  Input = Input.ltrim();
94  while (!Input.empty()) {
95  if (auto Line =
96  StartsWithWords(Input.ltrim(), {"#", "define", "CUDA_VERSION"})) {
97  uint32_t RawVersion;
98  Line->consumeInteger(10, RawVersion);
99  return getCudaVersion(RawVersion);
100  }
101  // Find next non-empty line.
102  Input = Input.drop_front(Input.find_first_of("\n\r")).ltrim();
103  }
104  return CudaVersion::UNKNOWN;
105 }
106 } // namespace
107 
109  if (Version > CudaVersion::PARTIALLY_SUPPORTED) {
110  std::string VersionString = CudaVersionToString(Version);
111  if (!VersionString.empty())
112  VersionString.insert(0, " ");
113  D.Diag(diag::warn_drv_new_cuda_version)
114  << VersionString
117  } else if (Version > CudaVersion::FULLY_SUPPORTED)
118  D.Diag(diag::warn_drv_partially_supported_cuda_version)
119  << CudaVersionToString(Version);
120 }
121 
123  const Driver &D, const llvm::Triple &HostTriple,
124  const llvm::opt::ArgList &Args)
125  : D(D) {
126  struct Candidate {
127  std::string Path;
128  bool StrictChecking;
129 
130  Candidate(std::string Path, bool StrictChecking = false)
131  : Path(Path), StrictChecking(StrictChecking) {}
132  };
133  SmallVector<Candidate, 4> Candidates;
134 
135  // In decreasing order so we prefer newer versions to older versions.
136  std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
137  auto &FS = D.getVFS();
138 
139  if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
140  Candidates.emplace_back(
141  Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
142  } else if (HostTriple.isOSWindows()) {
143  for (const char *Ver : Versions)
144  Candidates.emplace_back(
145  D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
146  Ver);
147  } else {
148  if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
149  // Try to find ptxas binary. If the executable is located in a directory
150  // called 'bin/', its parent directory might be a good guess for a valid
151  // CUDA installation.
152  // However, some distributions might installs 'ptxas' to /usr/bin. In that
153  // case the candidate would be '/usr' which passes the following checks
154  // because '/usr/include' exists as well. To avoid this case, we always
155  // check for the directory potentially containing files for libdevice,
156  // even if the user passes -nocudalib.
157  if (llvm::ErrorOr<std::string> ptxas =
158  llvm::sys::findProgramByName("ptxas")) {
159  SmallString<256> ptxasAbsolutePath;
160  llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
161 
162  StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
163  if (llvm::sys::path::filename(ptxasDir) == "bin")
164  Candidates.emplace_back(
165  std::string(llvm::sys::path::parent_path(ptxasDir)),
166  /*StrictChecking=*/true);
167  }
168  }
169 
170  Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
171  for (const char *Ver : Versions)
172  Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
173 
174  Distro Dist(FS, llvm::Triple(llvm::sys::getProcessTriple()));
175  if (Dist.IsDebian() || Dist.IsUbuntu())
176  // Special case for Debian to have nvidia-cuda-toolkit work
177  // out of the box. More info on http://bugs.debian.org/882505
178  Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
179  }
180 
181  bool NoCudaLib = Args.hasArg(options::OPT_nogpulib);
182 
183  for (const auto &Candidate : Candidates) {
184  InstallPath = Candidate.Path;
185  if (InstallPath.empty() || !FS.exists(InstallPath))
186  continue;
187 
188  BinPath = InstallPath + "/bin";
189  IncludePath = InstallPath + "/include";
190  LibDevicePath = InstallPath + "/nvvm/libdevice";
191 
192  if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
193  continue;
194  bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking);
195  if (CheckLibDevice && !FS.exists(LibDevicePath))
196  continue;
197 
198  // On Linux, we have both lib and lib64 directories, and we need to choose
199  // based on our triple. On MacOS, we have only a lib directory.
200  //
201  // It's sufficient for our purposes to be flexible: If both lib and lib64
202  // exist, we choose whichever one matches our triple. Otherwise, if only
203  // lib exists, we use it.
204  if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
205  LibPath = InstallPath + "/lib64";
206  else if (FS.exists(InstallPath + "/lib"))
207  LibPath = InstallPath + "/lib";
208  else
209  continue;
210 
211  Version = CudaVersion::UNKNOWN;
212  if (auto CudaHFile = FS.getBufferForFile(InstallPath + "/include/cuda.h"))
213  Version = parseCudaHFile((*CudaHFile)->getBuffer());
214  // As the last resort, make an educated guess between CUDA-7.0, which had
215  // old-style libdevice bitcode, and an unknown recent CUDA version.
216  if (Version == CudaVersion::UNKNOWN) {
217  Version = FS.exists(LibDevicePath + "/libdevice.10.bc")
220  }
221 
222  if (Version >= CudaVersion::CUDA_90) {
223  // CUDA-9+ uses single libdevice file for all GPU variants.
224  std::string FilePath = LibDevicePath + "/libdevice.10.bc";
225  if (FS.exists(FilePath)) {
226  for (int Arch = (int)CudaArch::SM_30, E = (int)CudaArch::LAST; Arch < E;
227  ++Arch) {
228  CudaArch GpuArch = static_cast<CudaArch>(Arch);
229  if (!IsNVIDIAGpuArch(GpuArch))
230  continue;
231  std::string GpuArchName(CudaArchToString(GpuArch));
232  LibDeviceMap[GpuArchName] = FilePath;
233  }
234  }
235  } else {
236  std::error_code EC;
237  for (llvm::vfs::directory_iterator LI = FS.dir_begin(LibDevicePath, EC),
238  LE;
239  !EC && LI != LE; LI = LI.increment(EC)) {
240  StringRef FilePath = LI->path();
241  StringRef FileName = llvm::sys::path::filename(FilePath);
242  // Process all bitcode filenames that look like
243  // libdevice.compute_XX.YY.bc
244  const StringRef LibDeviceName = "libdevice.";
245  if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
246  continue;
247  StringRef GpuArch = FileName.slice(
248  LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
249  LibDeviceMap[GpuArch] = FilePath.str();
250  // Insert map entries for specific devices with this compute
251  // capability. NVCC's choice of the libdevice library version is
252  // rather peculiar and depends on the CUDA version.
253  if (GpuArch == "compute_20") {
254  LibDeviceMap["sm_20"] = std::string(FilePath);
255  LibDeviceMap["sm_21"] = std::string(FilePath);
256  LibDeviceMap["sm_32"] = std::string(FilePath);
257  } else if (GpuArch == "compute_30") {
258  LibDeviceMap["sm_30"] = std::string(FilePath);
259  if (Version < CudaVersion::CUDA_80) {
260  LibDeviceMap["sm_50"] = std::string(FilePath);
261  LibDeviceMap["sm_52"] = std::string(FilePath);
262  LibDeviceMap["sm_53"] = std::string(FilePath);
263  }
264  LibDeviceMap["sm_60"] = std::string(FilePath);
265  LibDeviceMap["sm_61"] = std::string(FilePath);
266  LibDeviceMap["sm_62"] = std::string(FilePath);
267  } else if (GpuArch == "compute_35") {
268  LibDeviceMap["sm_35"] = std::string(FilePath);
269  LibDeviceMap["sm_37"] = std::string(FilePath);
270  } else if (GpuArch == "compute_50") {
271  if (Version >= CudaVersion::CUDA_80) {
272  LibDeviceMap["sm_50"] = std::string(FilePath);
273  LibDeviceMap["sm_52"] = std::string(FilePath);
274  LibDeviceMap["sm_53"] = std::string(FilePath);
275  }
276  }
277  }
278  }
279 
280  // Check that we have found at least one libdevice that we can link in if
281  // -nocudalib hasn't been specified.
282  if (LibDeviceMap.empty() && !NoCudaLib)
283  continue;
284 
285  IsValid = true;
286  break;
287  }
288 }
289 
291  const ArgList &DriverArgs, ArgStringList &CC1Args) const {
292  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
293  // Add cuda_wrappers/* to our system include path. This lets us wrap
294  // standard library headers.
296  llvm::sys::path::append(P, "include");
297  llvm::sys::path::append(P, "cuda_wrappers");
298  CC1Args.push_back("-internal-isystem");
299  CC1Args.push_back(DriverArgs.MakeArgString(P));
300  }
301 
302  if (DriverArgs.hasArg(options::OPT_nogpuinc))
303  return;
304 
305  if (!isValid()) {
306  D.Diag(diag::err_drv_no_cuda_installation);
307  return;
308  }
309 
310  CC1Args.push_back("-include");
311  CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
312 }
313 
315  CudaArch Arch) const {
316  if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
317  ArchsWithBadVersion[(int)Arch])
318  return;
319 
320  auto MinVersion = MinVersionForCudaArch(Arch);
321  auto MaxVersion = MaxVersionForCudaArch(Arch);
322  if (Version < MinVersion || Version > MaxVersion) {
323  ArchsWithBadVersion[(int)Arch] = true;
324  D.Diag(diag::err_drv_cuda_version_unsupported)
325  << CudaArchToString(Arch) << CudaVersionToString(MinVersion)
326  << CudaVersionToString(MaxVersion) << InstallPath
327  << CudaVersionToString(Version);
328  }
329 }
330 
331 void CudaInstallationDetector::print(raw_ostream &OS) const {
332  if (isValid())
333  OS << "Found CUDA installation: " << InstallPath << ", version "
334  << CudaVersionToString(Version) << "\n";
335 }
336 
337 namespace {
338 /// Debug info level for the NVPTX devices. We may need to emit different debug
339 /// info level for the host and for the device itselfi. This type controls
340 /// emission of the debug info for the devices. It either prohibits disable info
341 /// emission completely, or emits debug directives only, or emits same debug
342 /// info as for the host.
343 enum DeviceDebugInfoLevel {
344  DisableDebugInfo, /// Do not emit debug info for the devices.
345  DebugDirectivesOnly, /// Emit only debug directives.
346  EmitSameDebugInfoAsHost, /// Use the same debug info level just like for the
347  /// host.
348 };
349 } // anonymous namespace
350 
351 /// Define debug info level for the NVPTX devices. If the debug info for both
352 /// the host and device are disabled (-g0/-ggdb0 or no debug options at all). If
353 /// only debug directives are requested for the both host and device
354 /// (-gline-directvies-only), or the debug info only for the device is disabled
355 /// (optimization is on and --cuda-noopt-device-debug was not specified), the
356 /// debug directves only must be emitted for the device. Otherwise, use the same
357 /// debug info level just like for the host (with the limitations of only
358 /// supported DWARF2 standard).
359 static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) {
360  const Arg *A = Args.getLastArg(options::OPT_O_Group);
361  bool IsDebugEnabled = !A || A->getOption().matches(options::OPT_O0) ||
362  Args.hasFlag(options::OPT_cuda_noopt_device_debug,
363  options::OPT_no_cuda_noopt_device_debug,
364  /*Default=*/false);
365  if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
366  const Option &Opt = A->getOption();
367  if (Opt.matches(options::OPT_gN_Group)) {
368  if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
369  return DisableDebugInfo;
370  if (Opt.matches(options::OPT_gline_directives_only))
371  return DebugDirectivesOnly;
372  }
373  return IsDebugEnabled ? EmitSameDebugInfoAsHost : DebugDirectivesOnly;
374  }
375  return willEmitRemarks(Args) ? DebugDirectivesOnly : DisableDebugInfo;
376 }
377 
378 void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
379  const InputInfo &Output,
380  const InputInfoList &Inputs,
381  const ArgList &Args,
382  const char *LinkingOutput) const {
383  const auto &TC =
384  static_cast<const toolchains::CudaToolChain &>(getToolChain());
385  assert(TC.getTriple().isNVPTX() && "Wrong platform");
386 
387  StringRef GPUArchName;
388  // If this is an OpenMP action we need to extract the device architecture
389  // from the -march=arch option. This option may come from -Xopenmp-target
390  // flag or the default value.
392  GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
393  assert(!GPUArchName.empty() && "Must have an architecture passed in.");
394  } else
395  GPUArchName = JA.getOffloadingArch();
396 
397  // Obtain architecture from the action.
398  CudaArch gpu_arch = StringToCudaArch(GPUArchName);
399  assert(gpu_arch != CudaArch::UNKNOWN &&
400  "Device action expected to have an architecture.");
401 
402  // Check that our installation's ptxas supports gpu_arch.
403  if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
404  TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
405  }
406 
407  ArgStringList CmdArgs;
408  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
409  DeviceDebugInfoLevel DIKind = mustEmitDebugInfo(Args);
410  if (DIKind == EmitSameDebugInfoAsHost) {
411  // ptxas does not accept -g option if optimization is enabled, so
412  // we ignore the compiler's -O* options if we want debug info.
413  CmdArgs.push_back("-g");
414  CmdArgs.push_back("--dont-merge-basicblocks");
415  CmdArgs.push_back("--return-at-end");
416  } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
417  // Map the -O we received to -O{0,1,2,3}.
418  //
419  // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
420  // default, so it may correspond more closely to the spirit of clang -O2.
421 
422  // -O3 seems like the least-bad option when -Osomething is specified to
423  // clang but it isn't handled below.
424  StringRef OOpt = "3";
425  if (A->getOption().matches(options::OPT_O4) ||
426  A->getOption().matches(options::OPT_Ofast))
427  OOpt = "3";
428  else if (A->getOption().matches(options::OPT_O0))
429  OOpt = "0";
430  else if (A->getOption().matches(options::OPT_O)) {
431  // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
432  OOpt = llvm::StringSwitch<const char *>(A->getValue())
433  .Case("1", "1")
434  .Case("2", "2")
435  .Case("3", "3")
436  .Case("s", "2")
437  .Case("z", "2")
438  .Default("2");
439  }
440  CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
441  } else {
442  // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
443  // to no optimizations, but ptxas's default is -O3.
444  CmdArgs.push_back("-O0");
445  }
446  if (DIKind == DebugDirectivesOnly)
447  CmdArgs.push_back("-lineinfo");
448 
449  // Pass -v to ptxas if it was passed to the driver.
450  if (Args.hasArg(options::OPT_v))
451  CmdArgs.push_back("-v");
452 
453  CmdArgs.push_back("--gpu-name");
454  CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
455  CmdArgs.push_back("--output-file");
456  const char *OutputFileName = Args.MakeArgString(TC.getInputFilename(Output));
457  if (std::string(OutputFileName) != std::string(Output.getFilename()))
458  C.addTempFile(OutputFileName);
459  CmdArgs.push_back(OutputFileName);
460  for (const auto& II : Inputs)
461  CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
462 
463  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
464  CmdArgs.push_back(Args.MakeArgString(A));
465 
466  bool Relocatable = false;
468  // In OpenMP we need to generate relocatable code.
469  Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
470  options::OPT_fnoopenmp_relocatable_target,
471  /*Default=*/true);
472  else if (JA.isOffloading(Action::OFK_Cuda))
473  Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
474  options::OPT_fno_gpu_rdc, /*Default=*/false);
475 
476  if (Relocatable)
477  CmdArgs.push_back("-c");
478 
479  const char *Exec;
480  if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
481  Exec = A->getValue();
482  else
483  Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
484  C.addCommand(std::make_unique<Command>(
485  JA, *this,
487  "--options-file"},
488  Exec, CmdArgs, Inputs, Output));
489 }
490 
491 static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
492  bool includePTX = true;
493  for (Arg *A : Args) {
494  if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
495  A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
496  continue;
497  A->claim();
498  const StringRef ArchStr = A->getValue();
499  if (ArchStr == "all" || ArchStr == gpu_arch) {
500  includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
501  continue;
502  }
503  }
504  return includePTX;
505 }
506 
507 // All inputs to this linker must be from CudaDeviceActions, as we need to look
508 // at the Inputs' Actions in order to figure out which GPU architecture they
509 // correspond to.
510 void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
511  const InputInfo &Output,
512  const InputInfoList &Inputs,
513  const ArgList &Args,
514  const char *LinkingOutput) const {
515  const auto &TC =
516  static_cast<const toolchains::CudaToolChain &>(getToolChain());
517  assert(TC.getTriple().isNVPTX() && "Wrong platform");
518 
519  ArgStringList CmdArgs;
520  if (TC.CudaInstallation.version() <= CudaVersion::CUDA_100)
521  CmdArgs.push_back("--cuda");
522  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
523  CmdArgs.push_back(Args.MakeArgString("--create"));
524  CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
525  if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
526  CmdArgs.push_back("-g");
527 
528  for (const auto& II : Inputs) {
529  auto *A = II.getAction();
530  assert(A->getInputs().size() == 1 &&
531  "Device offload action is expected to have a single input");
532  const char *gpu_arch_str = A->getOffloadingArch();
533  assert(gpu_arch_str &&
534  "Device action expected to have associated a GPU architecture!");
535  CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
536 
537  if (II.getType() == types::TY_PP_Asm &&
538  !shouldIncludePTX(Args, gpu_arch_str))
539  continue;
540  // We need to pass an Arch of the form "sm_XX" for cubin files and
541  // "compute_XX" for ptx.
542  const char *Arch = (II.getType() == types::TY_PP_Asm)
543  ? CudaArchToVirtualArchString(gpu_arch)
544  : gpu_arch_str;
545  CmdArgs.push_back(
546  Args.MakeArgString(llvm::Twine("--image=profile=") + Arch +
547  ",file=" + getToolChain().getInputFilename(II)));
548  }
549 
550  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
551  CmdArgs.push_back(Args.MakeArgString(A));
552 
553  const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
554  C.addCommand(std::make_unique<Command>(
555  JA, *this,
557  "--options-file"},
558  Exec, CmdArgs, Inputs, Output));
559 }
560 
561 void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
562  const llvm::opt::ArgList &Args,
563  std::vector<StringRef> &Features) {
564  if (Args.hasArg(options::OPT_cuda_feature_EQ)) {
565  StringRef PtxFeature =
566  Args.getLastArgValue(options::OPT_cuda_feature_EQ, "+ptx42");
567  Features.push_back(Args.MakeArgString(PtxFeature));
568  return;
569  }
570  CudaInstallationDetector CudaInstallation(D, Triple, Args);
571 
572  // New CUDA versions often introduce new instructions that are only supported
573  // by new PTX version, so we need to raise PTX level to enable them in NVPTX
574  // back-end.
575  const char *PtxFeature = nullptr;
576  switch (CudaInstallation.version()) {
577 #define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \
578  case CudaVersion::CUDA_##CUDA_VER: \
579  PtxFeature = "+ptx" #PTX_VER; \
580  break;
581  CASE_CUDA_VERSION(118, 78);
582  CASE_CUDA_VERSION(117, 77);
583  CASE_CUDA_VERSION(116, 76);
584  CASE_CUDA_VERSION(115, 75);
585  CASE_CUDA_VERSION(114, 74);
586  CASE_CUDA_VERSION(113, 73);
587  CASE_CUDA_VERSION(112, 72);
588  CASE_CUDA_VERSION(111, 71);
589  CASE_CUDA_VERSION(110, 70);
590  CASE_CUDA_VERSION(102, 65);
591  CASE_CUDA_VERSION(101, 64);
592  CASE_CUDA_VERSION(100, 63);
593  CASE_CUDA_VERSION(92, 61);
594  CASE_CUDA_VERSION(91, 61);
595  CASE_CUDA_VERSION(90, 60);
596 #undef CASE_CUDA_VERSION
597  default:
598  PtxFeature = "+ptx42";
599  }
600  Features.push_back(PtxFeature);
601 }
602 
603 /// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
604 /// which isn't properly a linker but nonetheless performs the step of stitching
605 /// together object files from the assembler into a single blob.
606 
607 CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
608  const ToolChain &HostTC, const ArgList &Args)
609  : ToolChain(D, Triple, Args), HostTC(HostTC),
610  CudaInstallation(D, HostTC.getTriple(), Args) {
611  if (CudaInstallation.isValid()) {
614  }
615  // Lookup binaries into the driver directory, this is used to
616  // discover the clang-offload-bundler executable.
617  getProgramPaths().push_back(getDriver().Dir);
618 }
619 
621  // Only object files are changed, for example assembly files keep their .s
622  // extensions. If the user requested device-only compilation don't change it.
623  if (Input.getType() != types::TY_Object || getDriver().offloadDeviceOnly())
624  return ToolChain::getInputFilename(Input);
625 
626  // Replace extension for object files with cubin because nvlink relies on
627  // these particular file names.
629  llvm::sys::path::replace_extension(Filename, "cubin");
630  return std::string(Filename.str());
631 }
632 
634  const llvm::opt::ArgList &DriverArgs,
635  llvm::opt::ArgStringList &CC1Args,
636  Action::OffloadKind DeviceOffloadingKind) const {
637  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
638 
639  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
640  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
641  assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
642  DeviceOffloadingKind == Action::OFK_Cuda) &&
643  "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
644 
645  if (DeviceOffloadingKind == Action::OFK_Cuda) {
646  CC1Args.append(
647  {"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"});
648 
649  if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
650  options::OPT_fno_cuda_approx_transcendentals, false))
651  CC1Args.push_back("-fcuda-approx-transcendentals");
652  }
653 
654  if (DriverArgs.hasArg(options::OPT_nogpulib))
655  return;
656 
657  if (DeviceOffloadingKind == Action::OFK_OpenMP &&
658  DriverArgs.hasArg(options::OPT_S))
659  return;
660 
661  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
662  if (LibDeviceFile.empty()) {
663  getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
664  return;
665  }
666 
667  CC1Args.push_back("-mlink-builtin-bitcode");
668  CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
669 
670  clang::CudaVersion CudaInstallationVersion = CudaInstallation.version();
671 
672  if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
673  options::OPT_fno_cuda_short_ptr, false))
674  CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
675 
676  if (CudaInstallationVersion >= CudaVersion::UNKNOWN)
677  CC1Args.push_back(
678  DriverArgs.MakeArgString(Twine("-target-sdk-version=") +
679  CudaVersionToString(CudaInstallationVersion)));
680 
681  if (DeviceOffloadingKind == Action::OFK_OpenMP) {
682  if (CudaInstallationVersion < CudaVersion::CUDA_92) {
683  getDriver().Diag(
684  diag::err_drv_omp_offload_target_cuda_version_not_support)
685  << CudaVersionToString(CudaInstallationVersion);
686  return;
687  }
688 
689  // Link the bitcode library late if we're using device LTO.
690  if (getDriver().isUsingLTO(/* IsOffload */ true))
691  return;
692 
693  addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, GpuArch.str(),
694  getTriple());
695  }
696 }
697 
699  const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
700  const llvm::fltSemantics *FPType) const {
702  if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
703  DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
704  options::OPT_fno_gpu_flush_denormals_to_zero, false))
705  return llvm::DenormalMode::getPreserveSign();
706  }
707 
709  return llvm::DenormalMode::getIEEE();
710 }
711 
712 bool CudaToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
713  const Option &O = A->getOption();
714  return (O.matches(options::OPT_gN_Group) &&
715  !O.matches(options::OPT_gmodules)) ||
716  O.matches(options::OPT_g_Flag) ||
717  O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) ||
718  O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) ||
719  O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) ||
720  O.matches(options::OPT_gdwarf_5) ||
721  O.matches(options::OPT_gcolumn_info);
722 }
723 
725  codegenoptions::DebugInfoKind &DebugInfoKind, const ArgList &Args) const {
726  switch (mustEmitDebugInfo(Args)) {
727  case DisableDebugInfo:
729  break;
730  case DebugDirectivesOnly:
732  break;
733  case EmitSameDebugInfoAsHost:
734  // Use same debug info level as the host.
735  break;
736  }
737 }
738 
739 void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
740  ArgStringList &CC1Args) const {
741  // Check our CUDA version if we're going to include the CUDA headers.
742  if (!DriverArgs.hasArg(options::OPT_nogpuinc) &&
743  !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
744  StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
745  assert(!Arch.empty() && "Must have an explicit GPU arch.");
747  }
748  CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
749 }
750 
751 llvm::opt::DerivedArgList *
752 CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
753  StringRef BoundArch,
754  Action::OffloadKind DeviceOffloadKind) const {
755  DerivedArgList *DAL =
756  HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
757  if (!DAL)
758  DAL = new DerivedArgList(Args.getBaseArgs());
759 
760  const OptTable &Opts = getDriver().getOpts();
761 
762  // For OpenMP device offloading, append derived arguments. Make sure
763  // flags are not duplicated.
764  // Also append the compute capability.
765  if (DeviceOffloadKind == Action::OFK_OpenMP) {
766  for (Arg *A : Args)
767  if (!llvm::is_contained(*DAL, A))
768  DAL->append(A);
769 
770  if (!DAL->hasArg(options::OPT_march_EQ))
771  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
772  !BoundArch.empty() ? BoundArch
773  : CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
774 
775  return DAL;
776  }
777 
778  for (Arg *A : Args) {
779  DAL->append(A);
780  }
781 
782  if (!BoundArch.empty()) {
783  DAL->eraseArg(options::OPT_march_EQ);
784  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
785  }
786  return DAL;
787 }
788 
790  return new tools::NVPTX::Assembler(*this);
791 }
792 
794  return new tools::NVPTX::Linker(*this);
795 }
796 
797 void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
799 }
800 
802 CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
803  return HostTC.GetCXXStdlibType(Args);
804 }
805 
806 void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
807  ArgStringList &CC1Args) const {
808  HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
809 
810  if (!DriverArgs.hasArg(options::OPT_nogpuinc) && CudaInstallation.isValid())
811  CC1Args.append(
812  {"-internal-isystem",
813  DriverArgs.MakeArgString(CudaInstallation.getIncludePath())});
814 }
815 
817  ArgStringList &CC1Args) const {
818  HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
819 }
820 
821 void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
822  ArgStringList &CC1Args) const {
823  HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
824 }
825 
827  // The CudaToolChain only supports sanitizers in the sense that it allows
828  // sanitizer arguments on the command line if they are supported by the host
829  // toolchain. The CudaToolChain will actually ignore any command line
830  // arguments for any of these "supported" sanitizers. That means that no
831  // sanitization of device code is actually supported at this time.
832  //
833  // This behavior is necessary because the host and device toolchains
834  // invocations often share the command line, so the device toolchain must
835  // tolerate flags meant only for the host toolchain.
837 }
838 
840  const ArgList &Args) const {
841  return HostTC.computeMSVCVersion(D, Args);
842 }
clang::driver::toolchains
Definition: AIX.h:55
clang::CudaVersion::CUDA_91
@ CUDA_91
clang::driver::Distro::IsUbuntu
bool IsUbuntu() const
Definition: Distro.h:130
clang::driver::Action::getOffloadingArch
const char * getOffloadingArch() const
Definition: Action.h:210
clang::driver::tools::NVPTX::getNVPTXTargetFeatures
void getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args, std::vector< StringRef > &Features)
Definition: Cuda.cpp:561
clang::CudaVersion::CUDA_114
@ CUDA_114
clang::driver::ResponseFileSupport::RF_Full
@ RF_Full
Definition: Job.h:48
clang::driver::toolchains::CudaToolChain::addClangWarningOptions
void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override
Add warning options that need to be passed to cc1 for this target.
Definition: Cuda.cpp:797
clang::CudaVersion::CUDA_100
@ CUDA_100
clang::driver::toolchains::CudaToolChain::AddIAMCUIncludeArgs
void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use MCU GCC toolchain includes.
Definition: Cuda.cpp:821
clang::CudaArchToVirtualArchString
const char * CudaArchToVirtualArchString(CudaArch A)
Definition: Cuda.cpp:149
clang::driver::ToolChain::getProgramPaths
path_list & getProgramPaths()
Definition: ToolChain.h:277
clang::CudaVersion::NEW
@ NEW
Driver.h
string
string(SUBSTRING ${CMAKE_CURRENT_BINARY_DIR} 0 ${PATH_LIB_START} PATH_HEAD) string(SUBSTRING $
Definition: CMakeLists.txt:22
clang::CudaVersion::CUDA_70
@ CUDA_70
clang::driver::ToolChain::computeMSVCVersion
virtual VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const
On Windows, returns the MSVC compatibility version.
Definition: ToolChain.cpp:1135
clang::driver::tools::NVPTX::Assembler
Definition: Cuda.h:88
clang::driver::Action::OFK_Cuda
@ OFK_Cuda
Definition: Action.h:93
llvm::SmallVector
Definition: LLVM.h:38
clang::driver::Distro::IsDebian
bool IsDebian() const
Definition: Distro.h:126
clang::driver::toolchains::CudaToolChain
Definition: Cuda.h:123
clang::driver::toolchains::CudaToolChain::TranslateArgs
llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: Cuda.cpp:752
clang::driver::ToolChain::AddClangCXXStdlibIncludeArgs
virtual void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: ToolChain.cpp:989
clang::driver::tools
Definition: AIX.h:17
clang::driver::toolchains::CudaToolChain::buildAssembler
Tool * buildAssembler() const override
Definition: Cuda.cpp:789
clang::driver::CudaInstallationDetector::isValid
bool isValid() const
Check whether we detected a valid Cuda install.
Definition: Cuda.h:58
clang::driver::ToolChain::getDriver
const Driver & getDriver() const
Definition: ToolChain.h:232
Filename
StringRef Filename
Definition: Format.cpp:2715
int
__device__ int
Definition: __clang_hip_libdevice_declares.h:63
clang::CudaVersion::CUDA_101
@ CUDA_101
clang::MinVersionForCudaArch
CudaVersion MinVersionForCudaArch(CudaArch A)
Get the earliest CudaVersion that supports the given CudaArch.
Definition: Cuda.cpp:167
clang::codegenoptions::NoDebugInfo
@ NoDebugInfo
Don't generate debug info.
Definition: DebugInfoOptions.h:22
clang::driver::toolchains::CudaToolChain::computeMSVCVersion
VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const override
On Windows, returns the MSVC compatibility version.
Definition: Cuda.cpp:839
clang::StringToCudaArch
CudaArch StringToCudaArch(llvm::StringRef S)
Definition: Cuda.cpp:158
llvm::Optional
Definition: LLVM.h:40
clang::driver::ToolChain::addClangWarningOptions
virtual void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const
Add warning options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:821
clang::driver::Driver::getOpts
const llvm::opt::OptTable & getOpts() const
Definition: Driver.h:365
clang::driver::toolchains::CudaToolChain::getSupportedSanitizers
SanitizerMask getSupportedSanitizers() const override
Return sanitizers which are available in this toolchain.
Definition: Cuda.cpp:826
clang::driver::ToolChain::getInputFilename
virtual std::string getInputFilename(const InputInfo &Input) const
Some toolchains need to modify the file name, for example to replace the extension for object files w...
Definition: ToolChain.cpp:293
clang::CudaVersion::CUDA_117
@ CUDA_117
clang::driver::InputInfo
InputInfo - Wrapper for information about an input source.
Definition: InputInfo.h:22
clang::codegenoptions::DebugDirectivesOnly
@ DebugDirectivesOnly
Emit only debug directives with the line numbers data.
Definition: DebugInfoOptions.h:31
clang::driver::Action::OFK_OpenMP
@ OFK_OpenMP
Definition: Action.h:94
InputInfo.h
clang::driver::toolchains::CudaToolChain::CudaInstallation
CudaInstallationDetector CudaInstallation
Definition: Cuda.h:185
clang::driver::Driver::Diag
DiagnosticBuilder Diag(unsigned DiagID) const
Definition: Driver.h:142
clang::CudaVersion::CUDA_90
@ CUDA_90
clang::driver::Tool
Tool - Information on a specific compilation tool.
Definition: Tool.h:32
Options.h
clang::driver::toolchains::CudaToolChain::supportsDebugInfoOption
bool supportsDebugInfoOption(const llvm::opt::Arg *A) const override
Does this toolchain supports given debug info option or not.
Definition: Cuda.cpp:712
clang::driver::toolchains::CudaToolChain::buildLinker
Tool * buildLinker() const override
Definition: Cuda.cpp:793
clang::interp::LE
bool LE(InterpState &S, CodePtr OpPC)
Definition: Interp.h:504
clang::driver::toolchains::CudaToolChain::HostTC
const ToolChain & HostTC
Definition: Cuda.h:184
clang::CudaVersion::CUDA_102
@ CUDA_102
llvm::opt
Definition: DiagnosticOptions.h:19
DriverDiagnostic.h
clang::CudaVersion::CUDA_110
@ CUDA_110
clang::CudaVersion
CudaVersion
Definition: Cuda.h:20
clang::driver::ToolChain::AddClangSystemIncludeArgs
virtual void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Add the clang cc1 arguments for system include paths.
Definition: ToolChain.cpp:809
clang::driver::ToolChain::CXXStdlibType
CXXStdlibType
Definition: ToolChain.h:95
clang::driver::CudaInstallationDetector::getBinPath
StringRef getBinPath() const
Get the detected path to Cuda's bin directory.
Definition: Cuda.h:70
llvm::SmallString
Definition: LLVM.h:37
clang::driver::ToolChain::getTriple
const llvm::Triple & getTriple() const
Definition: ToolChain.h:234
clang::driver::Driver::SysRoot
std::string SysRoot
sysroot, if present
Definition: Driver.h:181
clang::driver::Action::OFK_Host
@ OFK_Host
Definition: Action.h:90
clang::driver::CudaInstallationDetector::getIncludePath
StringRef getIncludePath() const
Get the detected Cuda Include path.
Definition: Cuda.h:72
clang::CudaVersion::CUDA_75
@ CUDA_75
clang::driver::CudaInstallationDetector::getLibDeviceFile
std::string getLibDeviceFile(StringRef Gpu) const
Get libdevice file for given architecture.
Definition: Cuda.h:78
clang::CudaVersion::CUDA_116
@ CUDA_116
Line
const AnnotatedLine * Line
Definition: UsingDeclarationsSorter.cpp:68
clang::CudaVersion::FULLY_SUPPORTED
@ FULLY_SUPPORTED
clang::driver::tools::addOpenMPDeviceRTL
void addOpenMPDeviceRTL(const Driver &D, const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, StringRef BitcodeSuffix, const llvm::Triple &Triple)
Definition: CommonArgs.cpp:2215
clang::driver::Driver::ResourceDir
std::string ResourceDir
The path to the compiler resource directory.
Definition: Driver.h:165
clang::CudaVersion::CUDA_118
@ CUDA_118
clang::CudaVersion::CUDA_111
@ CUDA_111
clang::CudaArch::LAST
@ LAST
P
StringRef P
Definition: ASTMatchersInternal.cpp:563
clang::driver::ToolChain::getSupportedSanitizers
virtual SanitizerMask getSupportedSanitizers() const
Return sanitizers which are available in this toolchain.
Definition: ToolChain.cpp:1082
clang::driver::toolchains::CudaToolChain::AddClangCXXStdlibIncludeArgs
void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CC1Args) const override
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: Cuda.cpp:816
clang::driver::toolchains::CudaToolChain::getInputFilename
std::string getInputFilename(const InputInfo &Input) const override
Some toolchains need to modify the file name, for example to replace the extension for object files w...
Definition: Cuda.cpp:620
clang::driver::toolchains::CudaToolChain::addClangTargetOptions
void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const override
Add options that need to be passed to cc1 for this target.
Definition: Cuda.cpp:633
Compilation.h
clang::CudaArch::SM_30
@ SM_30
mustEmitDebugInfo
static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args)
Define debug info level for the NVPTX devices.
Definition: Cuda.cpp:359
clang::driver::Distro
Distro - Helper class for detecting and classifying Linux distributions.
Definition: Distro.h:23
clang::driver::ToolChain
ToolChain - Access to tools for a single platform.
Definition: ToolChain.h:91
clang::driver::CudaInstallationDetector::WarnIfUnsupportedVersion
void WarnIfUnsupportedVersion()
Definition: Cuda.cpp:108
clang::CudaVersion::PARTIALLY_SUPPORTED
@ PARTIALLY_SUPPORTED
clang::driver::tools::NVPTX::Linker
Definition: Cuda.h:102
clang::driver::toolchains::CudaToolChain::AddCudaIncludeArgs
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use system-specific CUDA includes.
Definition: Cuda.cpp:739
clang::CudaArchToString
const char * CudaArchToString(CudaArch A)
Definition: Cuda.cpp:140
clang::CudaArch::UNKNOWN
@ UNKNOWN
clang::driver::toolchains::CudaToolChain::AddClangSystemIncludeArgs
void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add the clang cc1 arguments for system include paths.
Definition: Cuda.cpp:806
clang::driver::Action::getOffloadingDeviceKind
OffloadKind getOffloadingDeviceKind() const
Definition: Action.h:209
clang::driver::CudaInstallationDetector::AddCudaIncludeArgs
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Definition: Cuda.cpp:290
clang::CudaVersion::CUDA_92
@ CUDA_92
clang::driver::CudaInstallationDetector
A class to find a viable CUDA installation.
Definition: Cuda.h:28
clang::driver::Compilation
Compilation - A set of tasks to perform for a single driver invocation.
Definition: Compilation.h:45
clang::CudaVersionToString
const char * CudaVersionToString(CudaVersion V)
Definition: Cuda.cpp:46
clang::driver::Action::isDeviceOffloading
bool isDeviceOffloading(OffloadKind OKind) const
Definition: Action.h:220
clang::driver::willEmitRemarks
bool willEmitRemarks(const llvm::opt::ArgList &Args)
clang::driver::ToolChain::GetCXXStdlibType
virtual CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const
Definition: ToolChain.cpp:893
clang::driver::ToolChain::AddIAMCUIncludeArgs
virtual void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Add arguments to use MCU GCC toolchain includes.
Definition: ToolChain.cpp:1118
Cuda.h
clang::driver::CudaInstallationDetector::CudaInstallationDetector
CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args)
Definition: Cuda.cpp:122
clang::driver::InputInfo::getFilename
const char * getFilename() const
Definition: InputInfo.h:83
clang::MaxVersionForCudaArch
CudaVersion MaxVersionForCudaArch(CudaArch A)
Get the latest CudaVersion that supports the given CudaArch.
Definition: Cuda.cpp:210
CommonArgs.h
clang
Definition: CalledOnceCheck.h:17
clang::driver::Driver
Driver - Encapsulate logic for constructing compilation processes from a set of gcc-driver-like comma...
Definition: Driver.h:75
clang::driver::InputInfo::getType
types::ID getType() const
Definition: InputInfo.h:77
shouldIncludePTX
static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch)
Definition: Cuda.cpp:491
clang::CudaVersion::CUDA_80
@ CUDA_80
clang::driver::ToolChain::TranslateArgs
virtual llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: ToolChain.h:321
clang::driver::toolchains::CudaToolChain::adjustDebugInfoKind
void adjustDebugInfoKind(codegenoptions::DebugInfoKind &DebugInfoKind, const llvm::opt::ArgList &Args) const override
Adjust debug information kind considering all passed options.
Definition: Cuda.cpp:724
clang::driver::CudaInstallationDetector::version
CudaVersion version() const
Get the detected Cuda install's version.
Definition: Cuda.h:63
clang::CudaArch
CudaArch
Definition: Cuda.h:49
clang::codegenoptions::DebugInfoKind
DebugInfoKind
Definition: DebugInfoOptions.h:20
clang::driver::Driver::getVFS
llvm::vfs::FileSystem & getVFS() const
Definition: Driver.h:369
clang::driver::ToolChain::addClangTargetOptions
virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const
Add options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:814
Distro.h
clang::CudaVersion::CUDA_115
@ CUDA_115
clang::driver::Action::OffloadKind
OffloadKind
Definition: Action.h:86
clang::driver
Definition: Action.h:31
clang::driver::Action::isOffloading
bool isOffloading(OffloadKind OKind) const
Definition: Action.h:223
clang::driver::CudaInstallationDetector::print
void print(raw_ostream &OS) const
Print information about the detected CUDA installation.
Definition: Cuda.cpp:331
clang::CudaVersion::CUDA_113
@ CUDA_113
clang::driver::JobAction
Definition: Action.h:398
clang::driver::CudaInstallationDetector::CheckCudaVersionSupportsArch
void CheckCudaVersionSupportsArch(CudaArch Arch) const
Emit an error if Version does not support the given Arch.
Definition: Cuda.cpp:314
clang::CudaVersion::UNKNOWN
@ UNKNOWN
clang::driver::toolchains::CudaToolChain::GetCXXStdlibType
CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override
Definition: Cuda.cpp:802
clang::driver::toolchains::CudaToolChain::getDefaultDenormalModeForType
llvm::DenormalMode getDefaultDenormalModeForType(const llvm::opt::ArgList &DriverArgs, const JobAction &JA, const llvm::fltSemantics *FPType=nullptr) const override
Returns the output denormal handling type in the default floating point environment for the given FPT...
Definition: Cuda.cpp:698
clang::IsNVIDIAGpuArch
static bool IsNVIDIAGpuArch(CudaArch A)
Definition: Cuda.h:118
CASE_CUDA_VERSION
#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER)
clang::SanitizerMask
Definition: Sanitizers.h:30
clang::driver::ResponseFileSupport
Definition: Job.h:44
clang::CudaVersion::CUDA_112
@ CUDA_112