clang  8.0.0svn
Cuda.cpp
Go to the documentation of this file.
1 //===--- Cuda.cpp - Cuda Tool and ToolChain Implementations -----*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "Cuda.h"
11 #include "CommonArgs.h"
12 #include "InputInfo.h"
13 #include "clang/Basic/Cuda.h"
15 #include "clang/Config/config.h"
17 #include "clang/Driver/Distro.h"
18 #include "clang/Driver/Driver.h"
20 #include "clang/Driver/Options.h"
21 #include "llvm/Option/ArgList.h"
22 #include "llvm/Support/FileSystem.h"
23 #include "llvm/Support/Path.h"
24 #include "llvm/Support/Process.h"
25 #include "llvm/Support/Program.h"
26 #include <system_error>
27 
28 using namespace clang::driver;
29 using namespace clang::driver::toolchains;
30 using namespace clang::driver::tools;
31 using namespace clang;
32 using namespace llvm::opt;
33 
34 // Parses the contents of version.txt in an CUDA installation. It should
35 // contain one line of the from e.g. "CUDA Version 7.5.2".
36 static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
37  if (!V.startswith("CUDA Version "))
38  return CudaVersion::UNKNOWN;
39  V = V.substr(strlen("CUDA Version "));
40  int Major = -1, Minor = -1;
41  auto First = V.split('.');
42  auto Second = First.second.split('.');
43  if (First.first.getAsInteger(10, Major) ||
44  Second.first.getAsInteger(10, Minor))
45  return CudaVersion::UNKNOWN;
46 
47  if (Major == 7 && Minor == 0) {
48  // This doesn't appear to ever happen -- version.txt doesn't exist in the
49  // CUDA 7 installs I've seen. But no harm in checking.
50  return CudaVersion::CUDA_70;
51  }
52  if (Major == 7 && Minor == 5)
53  return CudaVersion::CUDA_75;
54  if (Major == 8 && Minor == 0)
55  return CudaVersion::CUDA_80;
56  if (Major == 9 && Minor == 0)
57  return CudaVersion::CUDA_90;
58  if (Major == 9 && Minor == 1)
59  return CudaVersion::CUDA_91;
60  if (Major == 9 && Minor == 2)
61  return CudaVersion::CUDA_92;
62  if (Major == 10 && Minor == 0)
63  return CudaVersion::CUDA_100;
64  return CudaVersion::UNKNOWN;
65 }
66 
68  const Driver &D, const llvm::Triple &HostTriple,
69  const llvm::opt::ArgList &Args)
70  : D(D) {
71  struct Candidate {
72  std::string Path;
73  bool StrictChecking;
74 
75  Candidate(std::string Path, bool StrictChecking = false)
76  : Path(Path), StrictChecking(StrictChecking) {}
77  };
78  SmallVector<Candidate, 4> Candidates;
79 
80  // In decreasing order so we prefer newer versions to older versions.
81  std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
82 
83  if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
84  Candidates.emplace_back(
85  Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
86  } else if (HostTriple.isOSWindows()) {
87  for (const char *Ver : Versions)
88  Candidates.emplace_back(
89  D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
90  Ver);
91  } else {
92  if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
93  // Try to find ptxas binary. If the executable is located in a directory
94  // called 'bin/', its parent directory might be a good guess for a valid
95  // CUDA installation.
96  // However, some distributions might installs 'ptxas' to /usr/bin. In that
97  // case the candidate would be '/usr' which passes the following checks
98  // because '/usr/include' exists as well. To avoid this case, we always
99  // check for the directory potentially containing files for libdevice,
100  // even if the user passes -nocudalib.
101  if (llvm::ErrorOr<std::string> ptxas =
102  llvm::sys::findProgramByName("ptxas")) {
103  SmallString<256> ptxasAbsolutePath;
104  llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
105 
106  StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
107  if (llvm::sys::path::filename(ptxasDir) == "bin")
108  Candidates.emplace_back(llvm::sys::path::parent_path(ptxasDir),
109  /*StrictChecking=*/true);
110  }
111  }
112 
113  Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
114  for (const char *Ver : Versions)
115  Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
116 
117  if (Distro(D.getVFS()).IsDebian())
118  // Special case for Debian to have nvidia-cuda-toolkit work
119  // out of the box. More info on http://bugs.debian.org/882505
120  Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
121  }
122 
123  bool NoCudaLib = Args.hasArg(options::OPT_nocudalib);
124 
125  for (const auto &Candidate : Candidates) {
126  InstallPath = Candidate.Path;
127  if (InstallPath.empty() || !D.getVFS().exists(InstallPath))
128  continue;
129 
130  BinPath = InstallPath + "/bin";
131  IncludePath = InstallPath + "/include";
132  LibDevicePath = InstallPath + "/nvvm/libdevice";
133 
134  auto &FS = D.getVFS();
135  if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
136  continue;
137  bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking);
138  if (CheckLibDevice && !FS.exists(LibDevicePath))
139  continue;
140 
141  // On Linux, we have both lib and lib64 directories, and we need to choose
142  // based on our triple. On MacOS, we have only a lib directory.
143  //
144  // It's sufficient for our purposes to be flexible: If both lib and lib64
145  // exist, we choose whichever one matches our triple. Otherwise, if only
146  // lib exists, we use it.
147  if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
148  LibPath = InstallPath + "/lib64";
149  else if (FS.exists(InstallPath + "/lib"))
150  LibPath = InstallPath + "/lib";
151  else
152  continue;
153 
154  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
155  FS.getBufferForFile(InstallPath + "/version.txt");
156  if (!VersionFile) {
157  // CUDA 7.0 doesn't have a version.txt, so guess that's our version if
158  // version.txt isn't present.
159  Version = CudaVersion::CUDA_70;
160  } else {
161  Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
162  }
163 
164  if (Version >= CudaVersion::CUDA_90) {
165  // CUDA-9+ uses single libdevice file for all GPU variants.
166  std::string FilePath = LibDevicePath + "/libdevice.10.bc";
167  if (FS.exists(FilePath)) {
168  for (const char *GpuArchName :
169  {"sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53",
170  "sm_60", "sm_61", "sm_62", "sm_70", "sm_72", "sm_75"}) {
171  const CudaArch GpuArch = StringToCudaArch(GpuArchName);
172  if (Version >= MinVersionForCudaArch(GpuArch) &&
173  Version <= MaxVersionForCudaArch(GpuArch))
174  LibDeviceMap[GpuArchName] = FilePath;
175  }
176  }
177  } else {
178  std::error_code EC;
179  for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
180  !EC && LI != LE; LI = LI.increment(EC)) {
181  StringRef FilePath = LI->path();
182  StringRef FileName = llvm::sys::path::filename(FilePath);
183  // Process all bitcode filenames that look like
184  // libdevice.compute_XX.YY.bc
185  const StringRef LibDeviceName = "libdevice.";
186  if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
187  continue;
188  StringRef GpuArch = FileName.slice(
189  LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
190  LibDeviceMap[GpuArch] = FilePath.str();
191  // Insert map entries for specific devices with this compute
192  // capability. NVCC's choice of the libdevice library version is
193  // rather peculiar and depends on the CUDA version.
194  if (GpuArch == "compute_20") {
195  LibDeviceMap["sm_20"] = FilePath;
196  LibDeviceMap["sm_21"] = FilePath;
197  LibDeviceMap["sm_32"] = FilePath;
198  } else if (GpuArch == "compute_30") {
199  LibDeviceMap["sm_30"] = FilePath;
200  if (Version < CudaVersion::CUDA_80) {
201  LibDeviceMap["sm_50"] = FilePath;
202  LibDeviceMap["sm_52"] = FilePath;
203  LibDeviceMap["sm_53"] = FilePath;
204  }
205  LibDeviceMap["sm_60"] = FilePath;
206  LibDeviceMap["sm_61"] = FilePath;
207  LibDeviceMap["sm_62"] = FilePath;
208  } else if (GpuArch == "compute_35") {
209  LibDeviceMap["sm_35"] = FilePath;
210  LibDeviceMap["sm_37"] = FilePath;
211  } else if (GpuArch == "compute_50") {
212  if (Version >= CudaVersion::CUDA_80) {
213  LibDeviceMap["sm_50"] = FilePath;
214  LibDeviceMap["sm_52"] = FilePath;
215  LibDeviceMap["sm_53"] = FilePath;
216  }
217  }
218  }
219  }
220 
221  // Check that we have found at least one libdevice that we can link in if
222  // -nocudalib hasn't been specified.
223  if (LibDeviceMap.empty() && !NoCudaLib)
224  continue;
225 
226  IsValid = true;
227  break;
228  }
229 }
230 
232  const ArgList &DriverArgs, ArgStringList &CC1Args) const {
233  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
234  // Add cuda_wrappers/* to our system include path. This lets us wrap
235  // standard library headers.
236  SmallString<128> P(D.ResourceDir);
237  llvm::sys::path::append(P, "include");
238  llvm::sys::path::append(P, "cuda_wrappers");
239  CC1Args.push_back("-internal-isystem");
240  CC1Args.push_back(DriverArgs.MakeArgString(P));
241  }
242 
243  if (DriverArgs.hasArg(options::OPT_nocudainc))
244  return;
245 
246  if (!isValid()) {
247  D.Diag(diag::err_drv_no_cuda_installation);
248  return;
249  }
250 
251  CC1Args.push_back("-internal-isystem");
252  CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
253  CC1Args.push_back("-include");
254  CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
255 }
256 
258  CudaArch Arch) const {
259  if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
260  ArchsWithBadVersion.count(Arch) > 0)
261  return;
262 
263  auto MinVersion = MinVersionForCudaArch(Arch);
264  auto MaxVersion = MaxVersionForCudaArch(Arch);
265  if (Version < MinVersion || Version > MaxVersion) {
266  ArchsWithBadVersion.insert(Arch);
267  D.Diag(diag::err_drv_cuda_version_unsupported)
268  << CudaArchToString(Arch) << CudaVersionToString(MinVersion)
269  << CudaVersionToString(MaxVersion) << InstallPath
270  << CudaVersionToString(Version);
271  }
272 }
273 
274 void CudaInstallationDetector::print(raw_ostream &OS) const {
275  if (isValid())
276  OS << "Found CUDA installation: " << InstallPath << ", version "
277  << CudaVersionToString(Version) << "\n";
278 }
279 
280 namespace {
281  /// Debug info kind.
283  NoDebug, /// No debug info.
284  LineTableOnly, /// Line tables only.
285  FullDebug /// Full debug info.
286 };
287 } // anonymous namespace
288 
289 static DebugInfoKind mustEmitDebugInfo(const ArgList &Args) {
290  Arg *A = Args.getLastArg(options::OPT_O_Group);
291  if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
292  options::OPT_no_cuda_noopt_device_debug,
293  !A || A->getOption().matches(options::OPT_O0))) {
294  if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
295  const Option &Opt = A->getOption();
296  if (Opt.matches(options::OPT_gN_Group)) {
297  if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
298  return NoDebug;
299  if (Opt.matches(options::OPT_gline_tables_only) ||
300  Opt.matches(options::OPT_ggdb1))
301  return LineTableOnly;
302  }
303  return FullDebug;
304  }
305  }
306  return NoDebug;
307 }
308 
309 void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
310  const InputInfo &Output,
311  const InputInfoList &Inputs,
312  const ArgList &Args,
313  const char *LinkingOutput) const {
314  const auto &TC =
315  static_cast<const toolchains::CudaToolChain &>(getToolChain());
316  assert(TC.getTriple().isNVPTX() && "Wrong platform");
317 
318  StringRef GPUArchName;
319  // If this is an OpenMP action we need to extract the device architecture
320  // from the -march=arch option. This option may come from -Xopenmp-target
321  // flag or the default value.
323  GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
324  assert(!GPUArchName.empty() && "Must have an architecture passed in.");
325  } else
326  GPUArchName = JA.getOffloadingArch();
327 
328  // Obtain architecture from the action.
329  CudaArch gpu_arch = StringToCudaArch(GPUArchName);
330  assert(gpu_arch != CudaArch::UNKNOWN &&
331  "Device action expected to have an architecture.");
332 
333  // Check that our installation's ptxas supports gpu_arch.
334  if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
335  TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
336  }
337 
338  ArgStringList CmdArgs;
339  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
340  DebugInfoKind DIKind = mustEmitDebugInfo(Args);
341  if (DIKind == FullDebug) {
342  // ptxas does not accept -g option if optimization is enabled, so
343  // we ignore the compiler's -O* options if we want debug info.
344  CmdArgs.push_back("-g");
345  CmdArgs.push_back("--dont-merge-basicblocks");
346  CmdArgs.push_back("--return-at-end");
347  } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
348  // Map the -O we received to -O{0,1,2,3}.
349  //
350  // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
351  // default, so it may correspond more closely to the spirit of clang -O2.
352 
353  // -O3 seems like the least-bad option when -Osomething is specified to
354  // clang but it isn't handled below.
355  StringRef OOpt = "3";
356  if (A->getOption().matches(options::OPT_O4) ||
357  A->getOption().matches(options::OPT_Ofast))
358  OOpt = "3";
359  else if (A->getOption().matches(options::OPT_O0))
360  OOpt = "0";
361  else if (A->getOption().matches(options::OPT_O)) {
362  // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
363  OOpt = llvm::StringSwitch<const char *>(A->getValue())
364  .Case("1", "1")
365  .Case("2", "2")
366  .Case("3", "3")
367  .Case("s", "2")
368  .Case("z", "2")
369  .Default("2");
370  }
371  CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
372  } else {
373  // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
374  // to no optimizations, but ptxas's default is -O3.
375  CmdArgs.push_back("-O0");
376  }
377  if (DIKind == LineTableOnly)
378  CmdArgs.push_back("-lineinfo");
379 
380  // Pass -v to ptxas if it was passed to the driver.
381  if (Args.hasArg(options::OPT_v))
382  CmdArgs.push_back("-v");
383 
384  CmdArgs.push_back("--gpu-name");
385  CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
386  CmdArgs.push_back("--output-file");
387  CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
388  for (const auto& II : Inputs)
389  CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
390 
391  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
392  CmdArgs.push_back(Args.MakeArgString(A));
393 
394  bool Relocatable = false;
396  // In OpenMP we need to generate relocatable code.
397  Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
398  options::OPT_fnoopenmp_relocatable_target,
399  /*Default=*/true);
400  else if (JA.isOffloading(Action::OFK_Cuda))
401  Relocatable = Args.hasFlag(options::OPT_fcuda_rdc,
402  options::OPT_fno_cuda_rdc, /*Default=*/false);
403 
404  if (Relocatable)
405  CmdArgs.push_back("-c");
406 
407  const char *Exec;
408  if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
409  Exec = A->getValue();
410  else
411  Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
412  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
413 }
414 
415 static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
416  bool includePTX = true;
417  for (Arg *A : Args) {
418  if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
419  A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
420  continue;
421  A->claim();
422  const StringRef ArchStr = A->getValue();
423  if (ArchStr == "all" || ArchStr == gpu_arch) {
424  includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
425  continue;
426  }
427  }
428  return includePTX;
429 }
430 
431 // All inputs to this linker must be from CudaDeviceActions, as we need to look
432 // at the Inputs' Actions in order to figure out which GPU architecture they
433 // correspond to.
434 void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
435  const InputInfo &Output,
436  const InputInfoList &Inputs,
437  const ArgList &Args,
438  const char *LinkingOutput) const {
439  const auto &TC =
440  static_cast<const toolchains::CudaToolChain &>(getToolChain());
441  assert(TC.getTriple().isNVPTX() && "Wrong platform");
442 
443  ArgStringList CmdArgs;
444  CmdArgs.push_back("--cuda");
445  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
446  CmdArgs.push_back(Args.MakeArgString("--create"));
447  CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
448  if (mustEmitDebugInfo(Args) == FullDebug)
449  CmdArgs.push_back("-g");
450 
451  for (const auto& II : Inputs) {
452  auto *A = II.getAction();
453  assert(A->getInputs().size() == 1 &&
454  "Device offload action is expected to have a single input");
455  const char *gpu_arch_str = A->getOffloadingArch();
456  assert(gpu_arch_str &&
457  "Device action expected to have associated a GPU architecture!");
458  CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
459 
460  if (II.getType() == types::TY_PP_Asm &&
461  !shouldIncludePTX(Args, gpu_arch_str))
462  continue;
463  // We need to pass an Arch of the form "sm_XX" for cubin files and
464  // "compute_XX" for ptx.
465  const char *Arch =
466  (II.getType() == types::TY_PP_Asm)
468  : gpu_arch_str;
469  CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
470  Arch + ",file=" + II.getFilename()));
471  }
472 
473  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
474  CmdArgs.push_back(Args.MakeArgString(A));
475 
476  const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
477  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
478 }
479 
480 void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
481  const InputInfo &Output,
482  const InputInfoList &Inputs,
483  const ArgList &Args,
484  const char *LinkingOutput) const {
485  const auto &TC =
486  static_cast<const toolchains::CudaToolChain &>(getToolChain());
487  assert(TC.getTriple().isNVPTX() && "Wrong platform");
488 
489  ArgStringList CmdArgs;
490 
491  // OpenMP uses nvlink to link cubin files. The result will be embedded in the
492  // host binary by the host linker.
493  assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
494  "CUDA toolchain not expected for an OpenMP host device.");
495 
496  if (Output.isFilename()) {
497  CmdArgs.push_back("-o");
498  CmdArgs.push_back(Output.getFilename());
499  } else
500  assert(Output.isNothing() && "Invalid output.");
501  if (mustEmitDebugInfo(Args) == FullDebug)
502  CmdArgs.push_back("-g");
503 
504  if (Args.hasArg(options::OPT_v))
505  CmdArgs.push_back("-v");
506 
507  StringRef GPUArch =
508  Args.getLastArgValue(options::OPT_march_EQ);
509  assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
510 
511  CmdArgs.push_back("-arch");
512  CmdArgs.push_back(Args.MakeArgString(GPUArch));
513 
514  // Add paths specified in LIBRARY_PATH environment variable as -L options.
515  addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
516 
517  // Add paths for the default clang library path.
518  SmallString<256> DefaultLibPath =
519  llvm::sys::path::parent_path(TC.getDriver().Dir);
520  llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
521  CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
522 
523  // Add linking against library implementing OpenMP calls on NVPTX target.
524  CmdArgs.push_back("-lomptarget-nvptx");
525 
526  for (const auto &II : Inputs) {
527  if (II.getType() == types::TY_LLVM_IR ||
528  II.getType() == types::TY_LTO_IR ||
529  II.getType() == types::TY_LTO_BC ||
530  II.getType() == types::TY_LLVM_BC) {
531  C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
532  << getToolChain().getTripleString();
533  continue;
534  }
535 
536  // Currently, we only pass the input files to the linker, we do not pass
537  // any libraries that may be valid only for the host.
538  if (!II.isFilename())
539  continue;
540 
541  const char *CubinF = C.addTempFile(
542  C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
543 
544  CmdArgs.push_back(CubinF);
545  }
546 
547  AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
548 
549  const char *Exec =
550  Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
551  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
552 }
553 
554 /// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
555 /// which isn't properly a linker but nonetheless performs the step of stitching
556 /// together object files from the assembler into a single blob.
557 
558 CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
559  const ToolChain &HostTC, const ArgList &Args,
560  const Action::OffloadKind OK)
561  : ToolChain(D, Triple, Args), HostTC(HostTC),
562  CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
565  // Lookup binaries into the driver directory, this is used to
566  // discover the clang-offload-bundler executable.
567  getProgramPaths().push_back(getDriver().Dir);
568 }
569 
570 std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
571  // Only object files are changed, for example assembly files keep their .s
572  // extensions. CUDA also continues to use .o as they don't use nvlink but
573  // fatbinary.
574  if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object))
575  return ToolChain::getInputFilename(Input);
576 
577  // Replace extension for object files with cubin because nvlink relies on
578  // these particular file names.
580  llvm::sys::path::replace_extension(Filename, "cubin");
581  return Filename.str();
582 }
583 
585  const llvm::opt::ArgList &DriverArgs,
586  llvm::opt::ArgStringList &CC1Args,
587  Action::OffloadKind DeviceOffloadingKind) const {
588  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
589 
590  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
591  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
592  assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
593  DeviceOffloadingKind == Action::OFK_Cuda) &&
594  "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
595 
596  if (DeviceOffloadingKind == Action::OFK_Cuda) {
597  CC1Args.push_back("-fcuda-is-device");
598 
599  if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
600  options::OPT_fno_cuda_flush_denormals_to_zero, false))
601  CC1Args.push_back("-fcuda-flush-denormals-to-zero");
602 
603  if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
604  options::OPT_fno_cuda_approx_transcendentals, false))
605  CC1Args.push_back("-fcuda-approx-transcendentals");
606 
607  if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc,
608  false))
609  CC1Args.push_back("-fcuda-rdc");
610  }
611 
612  if (DriverArgs.hasArg(options::OPT_nocudalib))
613  return;
614 
615  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
616 
617  if (LibDeviceFile.empty()) {
618  if (DeviceOffloadingKind == Action::OFK_OpenMP &&
619  DriverArgs.hasArg(options::OPT_S))
620  return;
621 
622  getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
623  return;
624  }
625 
626  CC1Args.push_back("-mlink-builtin-bitcode");
627  CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
628 
629  // Libdevice in CUDA-7.0 requires PTX version that's more recent than LLVM
630  // defaults to. Use PTX4.2 by default, which is the PTX version that came with
631  // CUDA-7.0.
632  const char *PtxFeature = "+ptx42";
633  // TODO(tra): CUDA-10+ needs PTX 6.3 to support new features. However that
634  // requires fair amount of work on LLVM side. We'll keep using PTX 6.1 until
635  // all prerequisites are in place.
637  // CUDA-9.1 uses new instructions that are only available in PTX6.1+
638  PtxFeature = "+ptx61";
640  // CUDA-9.0 uses new instructions that are only available in PTX6.0+
641  PtxFeature = "+ptx60";
642  }
643  CC1Args.append({"-target-feature", PtxFeature});
644  if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
645  options::OPT_fno_cuda_short_ptr, false))
646  CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
647 
648  if (DeviceOffloadingKind == Action::OFK_OpenMP) {
649  SmallVector<StringRef, 8> LibraryPaths;
650  // Add path to lib and/or lib64 folders.
651  SmallString<256> DefaultLibPath =
652  llvm::sys::path::parent_path(getDriver().Dir);
653  llvm::sys::path::append(DefaultLibPath,
654  Twine("lib") + CLANG_LIBDIR_SUFFIX);
655  LibraryPaths.emplace_back(DefaultLibPath.c_str());
656 
657  // Add user defined library paths from LIBRARY_PATH.
659  llvm::sys::Process::GetEnv("LIBRARY_PATH");
660  if (LibPath) {
662  const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
663  llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
664  for (StringRef Path : Frags)
665  LibraryPaths.emplace_back(Path.trim());
666  }
667 
668  std::string LibOmpTargetName =
669  "libomptarget-nvptx-" + GpuArch.str() + ".bc";
670  bool FoundBCLibrary = false;
671  for (StringRef LibraryPath : LibraryPaths) {
672  SmallString<128> LibOmpTargetFile(LibraryPath);
673  llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
674  if (llvm::sys::fs::exists(LibOmpTargetFile)) {
675  CC1Args.push_back("-mlink-builtin-bitcode");
676  CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
677  FoundBCLibrary = true;
678  break;
679  }
680  }
681  if (!FoundBCLibrary)
682  getDriver().Diag(diag::warn_drv_omp_offload_target_missingbcruntime)
683  << LibOmpTargetName;
684  }
685 }
686 
687 bool CudaToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
688  const Option &O = A->getOption();
689  return (O.matches(options::OPT_gN_Group) &&
690  !O.matches(options::OPT_gmodules)) ||
691  O.matches(options::OPT_g_Flag) ||
692  O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) ||
693  O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) ||
694  O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) ||
695  O.matches(options::OPT_gdwarf_5) ||
696  O.matches(options::OPT_gcolumn_info);
697 }
698 
699 void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
700  ArgStringList &CC1Args) const {
701  // Check our CUDA version if we're going to include the CUDA headers.
702  if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
703  !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
704  StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
705  assert(!Arch.empty() && "Must have an explicit GPU arch.");
707  }
708  CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
709 }
710 
711 llvm::opt::DerivedArgList *
712 CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
713  StringRef BoundArch,
714  Action::OffloadKind DeviceOffloadKind) const {
715  DerivedArgList *DAL =
716  HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
717  if (!DAL)
718  DAL = new DerivedArgList(Args.getBaseArgs());
719 
720  const OptTable &Opts = getDriver().getOpts();
721 
722  // For OpenMP device offloading, append derived arguments. Make sure
723  // flags are not duplicated.
724  // Also append the compute capability.
725  if (DeviceOffloadKind == Action::OFK_OpenMP) {
726  for (Arg *A : Args) {
727  bool IsDuplicate = false;
728  for (Arg *DALArg : *DAL) {
729  if (A == DALArg) {
730  IsDuplicate = true;
731  break;
732  }
733  }
734  if (!IsDuplicate)
735  DAL->append(A);
736  }
737 
738  StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
739  if (Arch.empty())
740  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
741  CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
742 
743  return DAL;
744  }
745 
746  for (Arg *A : Args) {
747  if (A->getOption().matches(options::OPT_Xarch__)) {
748  // Skip this argument unless the architecture matches BoundArch
749  if (BoundArch.empty() || A->getValue(0) != BoundArch)
750  continue;
751 
752  unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
753  unsigned Prev = Index;
754  std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
755 
756  // If the argument parsing failed or more than one argument was
757  // consumed, the -Xarch_ argument's parameter tried to consume
758  // extra arguments. Emit an error and ignore.
759  //
760  // We also want to disallow any options which would alter the
761  // driver behavior; that isn't going to work in our model. We
762  // use isDriverOption() as an approximation, although things
763  // like -O4 are going to slip through.
764  if (!XarchArg || Index > Prev + 1) {
765  getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
766  << A->getAsString(Args);
767  continue;
768  } else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
769  getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
770  << A->getAsString(Args);
771  continue;
772  }
773  XarchArg->setBaseArg(A);
774  A = XarchArg.release();
775  DAL->AddSynthesizedArg(A);
776  }
777  DAL->append(A);
778  }
779 
780  if (!BoundArch.empty()) {
781  DAL->eraseArg(options::OPT_march_EQ);
782  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
783  }
784  return DAL;
785 }
786 
788  return new tools::NVPTX::Assembler(*this);
789 }
790 
792  if (OK == Action::OFK_OpenMP)
793  return new tools::NVPTX::OpenMPLinker(*this);
794  return new tools::NVPTX::Linker(*this);
795 }
796 
797 void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
799 }
800 
802 CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
803  return HostTC.GetCXXStdlibType(Args);
804 }
805 
806 void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
807  ArgStringList &CC1Args) const {
808  HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
809 }
810 
812  ArgStringList &CC1Args) const {
813  HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
814 }
815 
816 void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
817  ArgStringList &CC1Args) const {
818  HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
819 }
820 
822  // The CudaToolChain only supports sanitizers in the sense that it allows
823  // sanitizer arguments on the command line if they are supported by the host
824  // toolchain. The CudaToolChain will actually ignore any command line
825  // arguments for any of these "supported" sanitizers. That means that no
826  // sanitization of device code is actually supported at this time.
827  //
828  // This behavior is necessary because the host and device toolchains
829  // invocations often share the command line, so the device toolchain must
830  // tolerate flags meant only for the host toolchain.
832 }
833 
835  const ArgList &Args) const {
836  return HostTC.computeMSVCVersion(D, Args);
837 }
virtual void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const
Add warning options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:647
CudaArch
Definition: Cuda.h:35
const char * CudaArchToString(CudaArch A)
Definition: Cuda.cpp:31
llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: Cuda.cpp:712
SanitizerMask getSupportedSanitizers() const override
Return sanitizers which are available in this toolchain.
Definition: Cuda.cpp:821
StringRef P
virtual void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: ToolChain.cpp:733
VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const override
On Windows, returns the MSVC compatibility version.
Definition: Cuda.cpp:834
bool isHostOffloading(OffloadKind OKind) const
Check if this action have any offload kinds.
Definition: Action.h:202
DiagnosticBuilder Diag(unsigned DiagID) const
Definition: Driver.h:110
CudaArch StringToCudaArch(llvm::StringRef S)
Definition: Cuda.cpp:97
CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args)
Definition: Cuda.cpp:67
static CudaVersion ParseCudaVersionFile(llvm::StringRef V)
Definition: Cuda.cpp:36
void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override
Add warning options that need to be passed to cc1 for this target.
Definition: Cuda.cpp:797
void print(raw_ostream &OS) const
Print information about the detected CUDA installation.
Definition: Cuda.cpp:274
Tool * buildAssembler() const override
Definition: Cuda.cpp:787
const char * getFilename() const
Definition: InputInfo.h:84
path_list & getProgramPaths()
Definition: ToolChain.h:226
virtual void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Add the clang cc1 arguments for system include paths.
Definition: ToolChain.cpp:638
Distro - Helper class for detecting and classifying Linux distributions.
Definition: Distro.h:23
bool isOffloading(OffloadKind OKind) const
Definition: Action.h:208
void CheckCudaVersionSupportsArch(CudaArch Arch) const
Emit an error if Version does not support the given Arch.
Definition: Cuda.cpp:257
InputInfo - Wrapper for information about an input source.
Definition: InputInfo.h:23
bool isDeviceOffloading(OffloadKind OKind) const
Definition: Action.h:205
std::string getLibDeviceFile(StringRef Gpu) const
Get libdevice file for given architecture.
Definition: Cuda.h:76
Driver - Encapsulate logic for constructing compilation processes from a set of gcc-driver-like comma...
Definition: Driver.h:59
types::ID getType() const
Definition: InputInfo.h:78
DebugInfoKind
Debug info kind.
Definition: Cuda.cpp:282
CudaInstallationDetector CudaInstallation
Definition: Cuda.h:187
void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C, const InputInfo &Output, const InputInfoList &Inputs, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const JobAction &JA)
void addDirectoryList(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const char *ArgName, const char *EnvVar)
const char * CudaVersionToString(CudaVersion V)
Definition: Cuda.cpp:9
StringRef Filename
Definition: Format.cpp:1602
void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CC1Args) const override
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: Cuda.cpp:811
virtual VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const
On Windows, returns the MSVC compatibility version.
Definition: ToolChain.cpp:852
void addCommand(std::unique_ptr< Command > C)
Definition: Compilation.h:206
const char * CudaVirtualArchToString(CudaVirtualArch A)
Definition: Cuda.cpp:130
bool supportsDebugInfoOption(const llvm::opt::Arg *A) const override
Does this toolchain supports given debug info option or not.
Definition: Cuda.cpp:687
Tool * buildLinker() const override
Definition: Cuda.cpp:791
std::string getInputFilename(const InputInfo &Input) const override
Some toolchains need to modify the file name, for example to replace the extension for object files w...
Definition: Cuda.cpp:570
void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const override
Add options that need to be passed to cc1 for this target.
Definition: Cuda.cpp:584
static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch)
Definition: Cuda.cpp:415
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Definition: Cuda.cpp:231
CudaVersion version() const
Get the detected Cuda install&#39;s version.
Definition: Cuda.h:64
virtual std::string getInputFilename(const InputInfo &Input) const
Some toolchains need to modify the file name, for example to replace the extension for object files w...
Definition: ToolChain.cpp:241
const llvm::opt::DerivedArgList & getArgs() const
Definition: Compilation.h:187
CudaVersion MaxVersionForCudaArch(CudaArch A)
Get the latest CudaVersion that supports the given CudaArch.
Definition: Cuda.cpp:285
vfs::FileSystem & getVFS() const
Definition: Driver.h:301
void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add the clang cc1 arguments for system include paths.
Definition: Cuda.cpp:806
const Driver & getDriver() const
Definition: ToolChain.h:185
CudaVersion
Definition: Cuda.h:19
bool isValid() const
Check whether we detected a valid Cuda install.
Definition: Cuda.h:59
StringRef getIncludePath() const
Get the detected Cuda Include path.
Definition: Cuda.h:70
CudaVersion MinVersionForCudaArch(CudaArch A)
Get the earliest CudaVersion that supports the given CudaArch.
Definition: Cuda.cpp:241
static DebugInfoKind mustEmitDebugInfo(const ArgList &Args)
Definition: Cuda.cpp:289
Dataflow Directional Tag Classes.
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use system-specific CUDA includes.
Definition: Cuda.cpp:699
CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override
Definition: Cuda.cpp:802
std::string SysRoot
sysroot, if present
Definition: Driver.h:149
Tool - Information on a specific compilation tool.
Definition: Tool.h:34
Defines the virtual file system interface vfs::FileSystem.
CudaVirtualArch VirtualArchForCudaArch(CudaArch A)
Get the compute_xx corresponding to an sm_yy.
Definition: Cuda.cpp:188
bool exists(const Twine &Path)
Check whether a file exists. Provided for convenience.
uint64_t SanitizerMask
Definition: Sanitizers.h:26
Compilation - A set of tasks to perform for a single driver invocation.
Definition: Compilation.h:46
const Driver & getDriver() const
Definition: Compilation.h:134
virtual SanitizerMask getSupportedSanitizers() const
Return sanitizers which are available in this toolchain.
Definition: ToolChain.cpp:810
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false)
This is a convenience method that opens a file, gets its content and then closes the file...
bool isNothing() const
Definition: InputInfo.h:75
virtual llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: ToolChain.h:270
bool isFilename() const
Definition: InputInfo.h:76
const llvm::opt::OptTable & getOpts() const
Definition: Driver.h:297
virtual CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const
Definition: ToolChain.cpp:675
const char * addTempFile(const char *Name)
addTempFile - Add a file to remove on exit, and returns its argument.
Definition: Compilation.h:233
virtual void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Add arguments to use MCU GCC toolchain includes.
Definition: ToolChain.cpp:835
virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const
Add options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:643
StringRef getBinPath() const
Get the detected path to Cuda&#39;s bin directory.
Definition: Cuda.h:68
const char * getOffloadingArch() const
Definition: Action.h:198
void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use MCU GCC toolchain includes.
Definition: Cuda.cpp:816
ToolChain - Access to tools for a single platform.
Definition: ToolChain.h:88
static bool real_path(StringRef SrcPath, SmallVectorImpl< char > &RealPath)