clang  8.0.0svn
Cuda.cpp
Go to the documentation of this file.
1 //===--- Cuda.cpp - Cuda Tool and ToolChain Implementations -----*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "Cuda.h"
11 #include "CommonArgs.h"
12 #include "InputInfo.h"
13 #include "clang/Basic/Cuda.h"
14 #include "clang/Config/config.h"
16 #include "clang/Driver/Distro.h"
17 #include "clang/Driver/Driver.h"
19 #include "clang/Driver/Options.h"
20 #include "llvm/Option/ArgList.h"
21 #include "llvm/Support/FileSystem.h"
22 #include "llvm/Support/Path.h"
23 #include "llvm/Support/Process.h"
24 #include "llvm/Support/Program.h"
25 #include "llvm/Support/VirtualFileSystem.h"
26 #include <system_error>
27 
28 using namespace clang::driver;
29 using namespace clang::driver::toolchains;
30 using namespace clang::driver::tools;
31 using namespace clang;
32 using namespace llvm::opt;
33 
34 // Parses the contents of version.txt in an CUDA installation. It should
35 // contain one line of the from e.g. "CUDA Version 7.5.2".
36 static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
37  if (!V.startswith("CUDA Version "))
38  return CudaVersion::UNKNOWN;
39  V = V.substr(strlen("CUDA Version "));
40  int Major = -1, Minor = -1;
41  auto First = V.split('.');
42  auto Second = First.second.split('.');
43  if (First.first.getAsInteger(10, Major) ||
44  Second.first.getAsInteger(10, Minor))
45  return CudaVersion::UNKNOWN;
46 
47  if (Major == 7 && Minor == 0) {
48  // This doesn't appear to ever happen -- version.txt doesn't exist in the
49  // CUDA 7 installs I've seen. But no harm in checking.
50  return CudaVersion::CUDA_70;
51  }
52  if (Major == 7 && Minor == 5)
53  return CudaVersion::CUDA_75;
54  if (Major == 8 && Minor == 0)
55  return CudaVersion::CUDA_80;
56  if (Major == 9 && Minor == 0)
57  return CudaVersion::CUDA_90;
58  if (Major == 9 && Minor == 1)
59  return CudaVersion::CUDA_91;
60  if (Major == 9 && Minor == 2)
61  return CudaVersion::CUDA_92;
62  if (Major == 10 && Minor == 0)
63  return CudaVersion::CUDA_100;
64  return CudaVersion::UNKNOWN;
65 }
66 
68  const Driver &D, const llvm::Triple &HostTriple,
69  const llvm::opt::ArgList &Args)
70  : D(D) {
71  struct Candidate {
72  std::string Path;
73  bool StrictChecking;
74 
75  Candidate(std::string Path, bool StrictChecking = false)
76  : Path(Path), StrictChecking(StrictChecking) {}
77  };
78  SmallVector<Candidate, 4> Candidates;
79 
80  // In decreasing order so we prefer newer versions to older versions.
81  std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
82 
83  if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
84  Candidates.emplace_back(
85  Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
86  } else if (HostTriple.isOSWindows()) {
87  for (const char *Ver : Versions)
88  Candidates.emplace_back(
89  D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
90  Ver);
91  } else {
92  if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
93  // Try to find ptxas binary. If the executable is located in a directory
94  // called 'bin/', its parent directory might be a good guess for a valid
95  // CUDA installation.
96  // However, some distributions might installs 'ptxas' to /usr/bin. In that
97  // case the candidate would be '/usr' which passes the following checks
98  // because '/usr/include' exists as well. To avoid this case, we always
99  // check for the directory potentially containing files for libdevice,
100  // even if the user passes -nocudalib.
101  if (llvm::ErrorOr<std::string> ptxas =
102  llvm::sys::findProgramByName("ptxas")) {
103  SmallString<256> ptxasAbsolutePath;
104  llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
105 
106  StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
107  if (llvm::sys::path::filename(ptxasDir) == "bin")
108  Candidates.emplace_back(llvm::sys::path::parent_path(ptxasDir),
109  /*StrictChecking=*/true);
110  }
111  }
112 
113  Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
114  for (const char *Ver : Versions)
115  Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
116 
117  if (Distro(D.getVFS()).IsDebian())
118  // Special case for Debian to have nvidia-cuda-toolkit work
119  // out of the box. More info on http://bugs.debian.org/882505
120  Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
121  }
122 
123  bool NoCudaLib = Args.hasArg(options::OPT_nocudalib);
124 
125  for (const auto &Candidate : Candidates) {
126  InstallPath = Candidate.Path;
127  if (InstallPath.empty() || !D.getVFS().exists(InstallPath))
128  continue;
129 
130  BinPath = InstallPath + "/bin";
131  IncludePath = InstallPath + "/include";
132  LibDevicePath = InstallPath + "/nvvm/libdevice";
133 
134  auto &FS = D.getVFS();
135  if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
136  continue;
137  bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking);
138  if (CheckLibDevice && !FS.exists(LibDevicePath))
139  continue;
140 
141  // On Linux, we have both lib and lib64 directories, and we need to choose
142  // based on our triple. On MacOS, we have only a lib directory.
143  //
144  // It's sufficient for our purposes to be flexible: If both lib and lib64
145  // exist, we choose whichever one matches our triple. Otherwise, if only
146  // lib exists, we use it.
147  if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
148  LibPath = InstallPath + "/lib64";
149  else if (FS.exists(InstallPath + "/lib"))
150  LibPath = InstallPath + "/lib";
151  else
152  continue;
153 
154  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
155  FS.getBufferForFile(InstallPath + "/version.txt");
156  if (!VersionFile) {
157  // CUDA 7.0 doesn't have a version.txt, so guess that's our version if
158  // version.txt isn't present.
159  Version = CudaVersion::CUDA_70;
160  } else {
161  Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
162  }
163 
164  if (Version >= CudaVersion::CUDA_90) {
165  // CUDA-9+ uses single libdevice file for all GPU variants.
166  std::string FilePath = LibDevicePath + "/libdevice.10.bc";
167  if (FS.exists(FilePath)) {
168  for (const char *GpuArchName :
169  {"sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53",
170  "sm_60", "sm_61", "sm_62", "sm_70", "sm_72", "sm_75"}) {
171  const CudaArch GpuArch = StringToCudaArch(GpuArchName);
172  if (Version >= MinVersionForCudaArch(GpuArch) &&
173  Version <= MaxVersionForCudaArch(GpuArch))
174  LibDeviceMap[GpuArchName] = FilePath;
175  }
176  }
177  } else {
178  std::error_code EC;
179  for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
180  !EC && LI != LE; LI = LI.increment(EC)) {
181  StringRef FilePath = LI->path();
182  StringRef FileName = llvm::sys::path::filename(FilePath);
183  // Process all bitcode filenames that look like
184  // libdevice.compute_XX.YY.bc
185  const StringRef LibDeviceName = "libdevice.";
186  if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
187  continue;
188  StringRef GpuArch = FileName.slice(
189  LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
190  LibDeviceMap[GpuArch] = FilePath.str();
191  // Insert map entries for specific devices with this compute
192  // capability. NVCC's choice of the libdevice library version is
193  // rather peculiar and depends on the CUDA version.
194  if (GpuArch == "compute_20") {
195  LibDeviceMap["sm_20"] = FilePath;
196  LibDeviceMap["sm_21"] = FilePath;
197  LibDeviceMap["sm_32"] = FilePath;
198  } else if (GpuArch == "compute_30") {
199  LibDeviceMap["sm_30"] = FilePath;
200  if (Version < CudaVersion::CUDA_80) {
201  LibDeviceMap["sm_50"] = FilePath;
202  LibDeviceMap["sm_52"] = FilePath;
203  LibDeviceMap["sm_53"] = FilePath;
204  }
205  LibDeviceMap["sm_60"] = FilePath;
206  LibDeviceMap["sm_61"] = FilePath;
207  LibDeviceMap["sm_62"] = FilePath;
208  } else if (GpuArch == "compute_35") {
209  LibDeviceMap["sm_35"] = FilePath;
210  LibDeviceMap["sm_37"] = FilePath;
211  } else if (GpuArch == "compute_50") {
212  if (Version >= CudaVersion::CUDA_80) {
213  LibDeviceMap["sm_50"] = FilePath;
214  LibDeviceMap["sm_52"] = FilePath;
215  LibDeviceMap["sm_53"] = FilePath;
216  }
217  }
218  }
219  }
220 
221  // Check that we have found at least one libdevice that we can link in if
222  // -nocudalib hasn't been specified.
223  if (LibDeviceMap.empty() && !NoCudaLib)
224  continue;
225 
226  IsValid = true;
227  break;
228  }
229 }
230 
232  const ArgList &DriverArgs, ArgStringList &CC1Args) const {
233  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
234  // Add cuda_wrappers/* to our system include path. This lets us wrap
235  // standard library headers.
236  SmallString<128> P(D.ResourceDir);
237  llvm::sys::path::append(P, "include");
238  llvm::sys::path::append(P, "cuda_wrappers");
239  CC1Args.push_back("-internal-isystem");
240  CC1Args.push_back(DriverArgs.MakeArgString(P));
241  }
242 
243  if (DriverArgs.hasArg(options::OPT_nocudainc))
244  return;
245 
246  if (!isValid()) {
247  D.Diag(diag::err_drv_no_cuda_installation);
248  return;
249  }
250 
251  CC1Args.push_back("-internal-isystem");
252  CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
253  CC1Args.push_back("-include");
254  CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
255 }
256 
258  CudaArch Arch) const {
259  if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
260  ArchsWithBadVersion.count(Arch) > 0)
261  return;
262 
263  auto MinVersion = MinVersionForCudaArch(Arch);
264  auto MaxVersion = MaxVersionForCudaArch(Arch);
265  if (Version < MinVersion || Version > MaxVersion) {
266  ArchsWithBadVersion.insert(Arch);
267  D.Diag(diag::err_drv_cuda_version_unsupported)
268  << CudaArchToString(Arch) << CudaVersionToString(MinVersion)
269  << CudaVersionToString(MaxVersion) << InstallPath
270  << CudaVersionToString(Version);
271  }
272 }
273 
274 void CudaInstallationDetector::print(raw_ostream &OS) const {
275  if (isValid())
276  OS << "Found CUDA installation: " << InstallPath << ", version "
277  << CudaVersionToString(Version) << "\n";
278 }
279 
280 namespace {
281  /// Debug info kind.
283  NoDebug, /// No debug info.
284  LineTableOnly, /// Line tables only.
285  FullDebug /// Full debug info.
286 };
287 } // anonymous namespace
288 
289 static DebugInfoKind mustEmitDebugInfo(const ArgList &Args) {
290  Arg *A = Args.getLastArg(options::OPT_O_Group);
291  if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
292  options::OPT_no_cuda_noopt_device_debug,
293  !A || A->getOption().matches(options::OPT_O0))) {
294  if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
295  const Option &Opt = A->getOption();
296  if (Opt.matches(options::OPT_gN_Group)) {
297  if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
298  return NoDebug;
299  if (Opt.matches(options::OPT_gline_tables_only) ||
300  Opt.matches(options::OPT_ggdb1))
301  return LineTableOnly;
302  }
303  return FullDebug;
304  }
305  }
306  return NoDebug;
307 }
308 
309 void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
310  const InputInfo &Output,
311  const InputInfoList &Inputs,
312  const ArgList &Args,
313  const char *LinkingOutput) const {
314  const auto &TC =
315  static_cast<const toolchains::CudaToolChain &>(getToolChain());
316  assert(TC.getTriple().isNVPTX() && "Wrong platform");
317 
318  StringRef GPUArchName;
319  // If this is an OpenMP action we need to extract the device architecture
320  // from the -march=arch option. This option may come from -Xopenmp-target
321  // flag or the default value.
323  GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
324  assert(!GPUArchName.empty() && "Must have an architecture passed in.");
325  } else
326  GPUArchName = JA.getOffloadingArch();
327 
328  // Obtain architecture from the action.
329  CudaArch gpu_arch = StringToCudaArch(GPUArchName);
330  assert(gpu_arch != CudaArch::UNKNOWN &&
331  "Device action expected to have an architecture.");
332 
333  // Check that our installation's ptxas supports gpu_arch.
334  if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
335  TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
336  }
337 
338  ArgStringList CmdArgs;
339  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
340  DebugInfoKind DIKind = mustEmitDebugInfo(Args);
341  if (DIKind == FullDebug) {
342  // ptxas does not accept -g option if optimization is enabled, so
343  // we ignore the compiler's -O* options if we want debug info.
344  CmdArgs.push_back("-g");
345  CmdArgs.push_back("--dont-merge-basicblocks");
346  CmdArgs.push_back("--return-at-end");
347  } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
348  // Map the -O we received to -O{0,1,2,3}.
349  //
350  // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
351  // default, so it may correspond more closely to the spirit of clang -O2.
352 
353  // -O3 seems like the least-bad option when -Osomething is specified to
354  // clang but it isn't handled below.
355  StringRef OOpt = "3";
356  if (A->getOption().matches(options::OPT_O4) ||
357  A->getOption().matches(options::OPT_Ofast))
358  OOpt = "3";
359  else if (A->getOption().matches(options::OPT_O0))
360  OOpt = "0";
361  else if (A->getOption().matches(options::OPT_O)) {
362  // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
363  OOpt = llvm::StringSwitch<const char *>(A->getValue())
364  .Case("1", "1")
365  .Case("2", "2")
366  .Case("3", "3")
367  .Case("s", "2")
368  .Case("z", "2")
369  .Default("2");
370  }
371  CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
372  } else {
373  // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
374  // to no optimizations, but ptxas's default is -O3.
375  CmdArgs.push_back("-O0");
376  }
377  if (DIKind == LineTableOnly)
378  CmdArgs.push_back("-lineinfo");
379 
380  // Pass -v to ptxas if it was passed to the driver.
381  if (Args.hasArg(options::OPT_v))
382  CmdArgs.push_back("-v");
383 
384  CmdArgs.push_back("--gpu-name");
385  CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
386  CmdArgs.push_back("--output-file");
387  CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
388  for (const auto& II : Inputs)
389  CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
390 
391  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
392  CmdArgs.push_back(Args.MakeArgString(A));
393 
394  bool Relocatable = false;
396  // In OpenMP we need to generate relocatable code.
397  Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
398  options::OPT_fnoopenmp_relocatable_target,
399  /*Default=*/true);
400  else if (JA.isOffloading(Action::OFK_Cuda))
401  Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
402  options::OPT_fno_gpu_rdc, /*Default=*/false);
403 
404  if (Relocatable)
405  CmdArgs.push_back("-c");
406 
407  const char *Exec;
408  if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
409  Exec = A->getValue();
410  else
411  Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
412  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
413 }
414 
415 static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
416  bool includePTX = true;
417  for (Arg *A : Args) {
418  if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
419  A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
420  continue;
421  A->claim();
422  const StringRef ArchStr = A->getValue();
423  if (ArchStr == "all" || ArchStr == gpu_arch) {
424  includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
425  continue;
426  }
427  }
428  return includePTX;
429 }
430 
431 // All inputs to this linker must be from CudaDeviceActions, as we need to look
432 // at the Inputs' Actions in order to figure out which GPU architecture they
433 // correspond to.
434 void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
435  const InputInfo &Output,
436  const InputInfoList &Inputs,
437  const ArgList &Args,
438  const char *LinkingOutput) const {
439  const auto &TC =
440  static_cast<const toolchains::CudaToolChain &>(getToolChain());
441  assert(TC.getTriple().isNVPTX() && "Wrong platform");
442 
443  ArgStringList CmdArgs;
444  CmdArgs.push_back("--cuda");
445  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
446  CmdArgs.push_back(Args.MakeArgString("--create"));
447  CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
448  if (mustEmitDebugInfo(Args) == FullDebug)
449  CmdArgs.push_back("-g");
450 
451  for (const auto& II : Inputs) {
452  auto *A = II.getAction();
453  assert(A->getInputs().size() == 1 &&
454  "Device offload action is expected to have a single input");
455  const char *gpu_arch_str = A->getOffloadingArch();
456  assert(gpu_arch_str &&
457  "Device action expected to have associated a GPU architecture!");
458  CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
459 
460  if (II.getType() == types::TY_PP_Asm &&
461  !shouldIncludePTX(Args, gpu_arch_str))
462  continue;
463  // We need to pass an Arch of the form "sm_XX" for cubin files and
464  // "compute_XX" for ptx.
465  const char *Arch =
466  (II.getType() == types::TY_PP_Asm)
468  : gpu_arch_str;
469  CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
470  Arch + ",file=" + II.getFilename()));
471  }
472 
473  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
474  CmdArgs.push_back(Args.MakeArgString(A));
475 
476  const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
477  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
478 }
479 
480 void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
481  const InputInfo &Output,
482  const InputInfoList &Inputs,
483  const ArgList &Args,
484  const char *LinkingOutput) const {
485  const auto &TC =
486  static_cast<const toolchains::CudaToolChain &>(getToolChain());
487  assert(TC.getTriple().isNVPTX() && "Wrong platform");
488 
489  ArgStringList CmdArgs;
490 
491  // OpenMP uses nvlink to link cubin files. The result will be embedded in the
492  // host binary by the host linker.
493  assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
494  "CUDA toolchain not expected for an OpenMP host device.");
495 
496  if (Output.isFilename()) {
497  CmdArgs.push_back("-o");
498  CmdArgs.push_back(Output.getFilename());
499  } else
500  assert(Output.isNothing() && "Invalid output.");
501  if (mustEmitDebugInfo(Args) == FullDebug)
502  CmdArgs.push_back("-g");
503 
504  if (Args.hasArg(options::OPT_v))
505  CmdArgs.push_back("-v");
506 
507  StringRef GPUArch =
508  Args.getLastArgValue(options::OPT_march_EQ);
509  assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
510 
511  CmdArgs.push_back("-arch");
512  CmdArgs.push_back(Args.MakeArgString(GPUArch));
513 
514  // Assume that the directory specified with --libomptarget_nvptx_path
515  // contains the static library libomptarget-nvptx.a.
516  if (const Arg *A = Args.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
517  CmdArgs.push_back(Args.MakeArgString(Twine("-L") + A->getValue()));
518 
519  // Add paths specified in LIBRARY_PATH environment variable as -L options.
520  addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
521 
522  // Add paths for the default clang library path.
523  SmallString<256> DefaultLibPath =
524  llvm::sys::path::parent_path(TC.getDriver().Dir);
525  llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
526  CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
527 
528  // Add linking against library implementing OpenMP calls on NVPTX target.
529  CmdArgs.push_back("-lomptarget-nvptx");
530 
531  for (const auto &II : Inputs) {
532  if (II.getType() == types::TY_LLVM_IR ||
533  II.getType() == types::TY_LTO_IR ||
534  II.getType() == types::TY_LTO_BC ||
535  II.getType() == types::TY_LLVM_BC) {
536  C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
537  << getToolChain().getTripleString();
538  continue;
539  }
540 
541  // Currently, we only pass the input files to the linker, we do not pass
542  // any libraries that may be valid only for the host.
543  if (!II.isFilename())
544  continue;
545 
546  const char *CubinF = C.addTempFile(
547  C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
548 
549  CmdArgs.push_back(CubinF);
550  }
551 
552  AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
553 
554  const char *Exec =
555  Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
556  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
557 }
558 
559 /// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
560 /// which isn't properly a linker but nonetheless performs the step of stitching
561 /// together object files from the assembler into a single blob.
562 
563 CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
564  const ToolChain &HostTC, const ArgList &Args,
565  const Action::OffloadKind OK)
566  : ToolChain(D, Triple, Args), HostTC(HostTC),
567  CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
570  // Lookup binaries into the driver directory, this is used to
571  // discover the clang-offload-bundler executable.
572  getProgramPaths().push_back(getDriver().Dir);
573 }
574 
575 std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
576  // Only object files are changed, for example assembly files keep their .s
577  // extensions. CUDA also continues to use .o as they don't use nvlink but
578  // fatbinary.
579  if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object))
580  return ToolChain::getInputFilename(Input);
581 
582  // Replace extension for object files with cubin because nvlink relies on
583  // these particular file names.
585  llvm::sys::path::replace_extension(Filename, "cubin");
586  return Filename.str();
587 }
588 
590  const llvm::opt::ArgList &DriverArgs,
591  llvm::opt::ArgStringList &CC1Args,
592  Action::OffloadKind DeviceOffloadingKind) const {
593  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
594 
595  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
596  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
597  assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
598  DeviceOffloadingKind == Action::OFK_Cuda) &&
599  "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
600 
601  if (DeviceOffloadingKind == Action::OFK_Cuda) {
602  CC1Args.push_back("-fcuda-is-device");
603 
604  if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
605  options::OPT_fno_cuda_flush_denormals_to_zero, false))
606  CC1Args.push_back("-fcuda-flush-denormals-to-zero");
607 
608  if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
609  options::OPT_fno_cuda_approx_transcendentals, false))
610  CC1Args.push_back("-fcuda-approx-transcendentals");
611 
612  if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
613  false))
614  CC1Args.push_back("-fgpu-rdc");
615  }
616 
617  if (DriverArgs.hasArg(options::OPT_nocudalib))
618  return;
619 
620  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
621 
622  if (LibDeviceFile.empty()) {
623  if (DeviceOffloadingKind == Action::OFK_OpenMP &&
624  DriverArgs.hasArg(options::OPT_S))
625  return;
626 
627  getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
628  return;
629  }
630 
631  CC1Args.push_back("-mlink-builtin-bitcode");
632  CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
633 
634  // Libdevice in CUDA-7.0 requires PTX version that's more recent than LLVM
635  // defaults to. Use PTX4.2 by default, which is the PTX version that came with
636  // CUDA-7.0.
637  const char *PtxFeature = "+ptx42";
638  // TODO(tra): CUDA-10+ needs PTX 6.3 to support new features. However that
639  // requires fair amount of work on LLVM side. We'll keep using PTX 6.1 until
640  // all prerequisites are in place.
642  // CUDA-9.1 uses new instructions that are only available in PTX6.1+
643  PtxFeature = "+ptx61";
645  // CUDA-9.0 uses new instructions that are only available in PTX6.0+
646  PtxFeature = "+ptx60";
647  }
648  CC1Args.append({"-target-feature", PtxFeature});
649  if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
650  options::OPT_fno_cuda_short_ptr, false))
651  CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
652 
653  if (DeviceOffloadingKind == Action::OFK_OpenMP) {
654  SmallVector<StringRef, 8> LibraryPaths;
655 
656  if (const Arg *A = DriverArgs.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
657  LibraryPaths.push_back(A->getValue());
658 
659  // Add user defined library paths from LIBRARY_PATH.
661  llvm::sys::Process::GetEnv("LIBRARY_PATH");
662  if (LibPath) {
664  const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
665  llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
666  for (StringRef Path : Frags)
667  LibraryPaths.emplace_back(Path.trim());
668  }
669 
670  // Add path to lib / lib64 folder.
671  SmallString<256> DefaultLibPath =
672  llvm::sys::path::parent_path(getDriver().Dir);
673  llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
674  LibraryPaths.emplace_back(DefaultLibPath.c_str());
675 
676  std::string LibOmpTargetName =
677  "libomptarget-nvptx-" + GpuArch.str() + ".bc";
678  bool FoundBCLibrary = false;
679  for (StringRef LibraryPath : LibraryPaths) {
680  SmallString<128> LibOmpTargetFile(LibraryPath);
681  llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
682  if (llvm::sys::fs::exists(LibOmpTargetFile)) {
683  CC1Args.push_back("-mlink-builtin-bitcode");
684  CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
685  FoundBCLibrary = true;
686  break;
687  }
688  }
689  if (!FoundBCLibrary)
690  getDriver().Diag(diag::warn_drv_omp_offload_target_missingbcruntime)
691  << LibOmpTargetName;
692  }
693 }
694 
695 bool CudaToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
696  const Option &O = A->getOption();
697  return (O.matches(options::OPT_gN_Group) &&
698  !O.matches(options::OPT_gmodules)) ||
699  O.matches(options::OPT_g_Flag) ||
700  O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) ||
701  O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) ||
702  O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) ||
703  O.matches(options::OPT_gdwarf_5) ||
704  O.matches(options::OPT_gcolumn_info);
705 }
706 
707 void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
708  ArgStringList &CC1Args) const {
709  // Check our CUDA version if we're going to include the CUDA headers.
710  if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
711  !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
712  StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
713  assert(!Arch.empty() && "Must have an explicit GPU arch.");
715  }
716  CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
717 }
718 
719 llvm::opt::DerivedArgList *
720 CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
721  StringRef BoundArch,
722  Action::OffloadKind DeviceOffloadKind) const {
723  DerivedArgList *DAL =
724  HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
725  if (!DAL)
726  DAL = new DerivedArgList(Args.getBaseArgs());
727 
728  const OptTable &Opts = getDriver().getOpts();
729 
730  // For OpenMP device offloading, append derived arguments. Make sure
731  // flags are not duplicated.
732  // Also append the compute capability.
733  if (DeviceOffloadKind == Action::OFK_OpenMP) {
734  for (Arg *A : Args) {
735  bool IsDuplicate = false;
736  for (Arg *DALArg : *DAL) {
737  if (A == DALArg) {
738  IsDuplicate = true;
739  break;
740  }
741  }
742  if (!IsDuplicate)
743  DAL->append(A);
744  }
745 
746  StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
747  if (Arch.empty())
748  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
749  CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
750 
751  return DAL;
752  }
753 
754  for (Arg *A : Args) {
755  if (A->getOption().matches(options::OPT_Xarch__)) {
756  // Skip this argument unless the architecture matches BoundArch
757  if (BoundArch.empty() || A->getValue(0) != BoundArch)
758  continue;
759 
760  unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
761  unsigned Prev = Index;
762  std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
763 
764  // If the argument parsing failed or more than one argument was
765  // consumed, the -Xarch_ argument's parameter tried to consume
766  // extra arguments. Emit an error and ignore.
767  //
768  // We also want to disallow any options which would alter the
769  // driver behavior; that isn't going to work in our model. We
770  // use isDriverOption() as an approximation, although things
771  // like -O4 are going to slip through.
772  if (!XarchArg || Index > Prev + 1) {
773  getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
774  << A->getAsString(Args);
775  continue;
776  } else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
777  getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
778  << A->getAsString(Args);
779  continue;
780  }
781  XarchArg->setBaseArg(A);
782  A = XarchArg.release();
783  DAL->AddSynthesizedArg(A);
784  }
785  DAL->append(A);
786  }
787 
788  if (!BoundArch.empty()) {
789  DAL->eraseArg(options::OPT_march_EQ);
790  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
791  }
792  return DAL;
793 }
794 
796  return new tools::NVPTX::Assembler(*this);
797 }
798 
800  if (OK == Action::OFK_OpenMP)
801  return new tools::NVPTX::OpenMPLinker(*this);
802  return new tools::NVPTX::Linker(*this);
803 }
804 
805 void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
807 }
808 
810 CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
811  return HostTC.GetCXXStdlibType(Args);
812 }
813 
814 void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
815  ArgStringList &CC1Args) const {
816  HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
817 }
818 
820  ArgStringList &CC1Args) const {
821  HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
822 }
823 
824 void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
825  ArgStringList &CC1Args) const {
826  HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
827 }
828 
830  // The CudaToolChain only supports sanitizers in the sense that it allows
831  // sanitizer arguments on the command line if they are supported by the host
832  // toolchain. The CudaToolChain will actually ignore any command line
833  // arguments for any of these "supported" sanitizers. That means that no
834  // sanitization of device code is actually supported at this time.
835  //
836  // This behavior is necessary because the host and device toolchains
837  // invocations often share the command line, so the device toolchain must
838  // tolerate flags meant only for the host toolchain.
840 }
841 
843  const ArgList &Args) const {
844  return HostTC.computeMSVCVersion(D, Args);
845 }
virtual void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const
Add warning options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:651
CudaArch
Definition: Cuda.h:35
const char * CudaArchToString(CudaArch A)
Definition: Cuda.cpp:31
llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: Cuda.cpp:720
SanitizerMask getSupportedSanitizers() const override
Return sanitizers which are available in this toolchain.
Definition: Cuda.cpp:829
StringRef P
virtual void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: ToolChain.cpp:737
VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const override
On Windows, returns the MSVC compatibility version.
Definition: Cuda.cpp:842
bool isHostOffloading(OffloadKind OKind) const
Check if this action have any offload kinds.
Definition: Action.h:202
DiagnosticBuilder Diag(unsigned DiagID) const
Definition: Driver.h:109
CudaArch StringToCudaArch(llvm::StringRef S)
Definition: Cuda.cpp:97
CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args)
Definition: Cuda.cpp:67
static CudaVersion ParseCudaVersionFile(llvm::StringRef V)
Definition: Cuda.cpp:36
void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override
Add warning options that need to be passed to cc1 for this target.
Definition: Cuda.cpp:805
void print(raw_ostream &OS) const
Print information about the detected CUDA installation.
Definition: Cuda.cpp:274
Tool * buildAssembler() const override
Definition: Cuda.cpp:795
const char * getFilename() const
Definition: InputInfo.h:84
path_list & getProgramPaths()
Definition: ToolChain.h:225
virtual void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Add the clang cc1 arguments for system include paths.
Definition: ToolChain.cpp:642
Distro - Helper class for detecting and classifying Linux distributions.
Definition: Distro.h:23
bool isOffloading(OffloadKind OKind) const
Definition: Action.h:208
void CheckCudaVersionSupportsArch(CudaArch Arch) const
Emit an error if Version does not support the given Arch.
Definition: Cuda.cpp:257
InputInfo - Wrapper for information about an input source.
Definition: InputInfo.h:23
bool isDeviceOffloading(OffloadKind OKind) const
Definition: Action.h:205
std::string getLibDeviceFile(StringRef Gpu) const
Get libdevice file for given architecture.
Definition: Cuda.h:76
Driver - Encapsulate logic for constructing compilation processes from a set of gcc-driver-like comma...
Definition: Driver.h:58
types::ID getType() const
Definition: InputInfo.h:78
DebugInfoKind
Debug info kind.
Definition: Cuda.cpp:282
CudaInstallationDetector CudaInstallation
Definition: Cuda.h:187
void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C, const InputInfo &Output, const InputInfoList &Inputs, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const JobAction &JA)
llvm::vfs::FileSystem & getVFS() const
Definition: Driver.h:300
void addDirectoryList(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const char *ArgName, const char *EnvVar)
const char * CudaVersionToString(CudaVersion V)
Definition: Cuda.cpp:9
StringRef Filename
Definition: Format.cpp:1620
void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CC1Args) const override
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: Cuda.cpp:819
virtual VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const
On Windows, returns the MSVC compatibility version.
Definition: ToolChain.cpp:856
void addCommand(std::unique_ptr< Command > C)
Definition: Compilation.h:206
const char * CudaVirtualArchToString(CudaVirtualArch A)
Definition: Cuda.cpp:130
bool supportsDebugInfoOption(const llvm::opt::Arg *A) const override
Does this toolchain supports given debug info option or not.
Definition: Cuda.cpp:695
Tool * buildLinker() const override
Definition: Cuda.cpp:799
std::string getInputFilename(const InputInfo &Input) const override
Some toolchains need to modify the file name, for example to replace the extension for object files w...
Definition: Cuda.cpp:575
void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const override
Add options that need to be passed to cc1 for this target.
Definition: Cuda.cpp:589
static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch)
Definition: Cuda.cpp:415
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Definition: Cuda.cpp:231
CudaVersion version() const
Get the detected Cuda install&#39;s version.
Definition: Cuda.h:64
virtual std::string getInputFilename(const InputInfo &Input) const
Some toolchains need to modify the file name, for example to replace the extension for object files w...
Definition: ToolChain.cpp:243
const llvm::opt::DerivedArgList & getArgs() const
Definition: Compilation.h:187
CudaVersion MaxVersionForCudaArch(CudaArch A)
Get the latest CudaVersion that supports the given CudaArch.
Definition: Cuda.cpp:285
void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add the clang cc1 arguments for system include paths.
Definition: Cuda.cpp:814
const Driver & getDriver() const
Definition: ToolChain.h:184
CudaVersion
Definition: Cuda.h:19
bool isValid() const
Check whether we detected a valid Cuda install.
Definition: Cuda.h:59
StringRef getIncludePath() const
Get the detected Cuda Include path.
Definition: Cuda.h:70
CudaVersion MinVersionForCudaArch(CudaArch A)
Get the earliest CudaVersion that supports the given CudaArch.
Definition: Cuda.cpp:241
static DebugInfoKind mustEmitDebugInfo(const ArgList &Args)
Definition: Cuda.cpp:289
Dataflow Directional Tag Classes.
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use system-specific CUDA includes.
Definition: Cuda.cpp:707
CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override
Definition: Cuda.cpp:810
std::string SysRoot
sysroot, if present
Definition: Driver.h:148
Tool - Information on a specific compilation tool.
Definition: Tool.h:34
CudaVirtualArch VirtualArchForCudaArch(CudaArch A)
Get the compute_xx corresponding to an sm_yy.
Definition: Cuda.cpp:188
uint64_t SanitizerMask
Definition: Sanitizers.h:26
Compilation - A set of tasks to perform for a single driver invocation.
Definition: Compilation.h:46
const Driver & getDriver() const
Definition: Compilation.h:134
virtual SanitizerMask getSupportedSanitizers() const
Return sanitizers which are available in this toolchain.
Definition: ToolChain.cpp:814
bool isNothing() const
Definition: InputInfo.h:75
virtual llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: ToolChain.h:269
bool isFilename() const
Definition: InputInfo.h:76
const llvm::opt::OptTable & getOpts() const
Definition: Driver.h:296
virtual CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const
Definition: ToolChain.cpp:679
const char * addTempFile(const char *Name)
addTempFile - Add a file to remove on exit, and returns its argument.
Definition: Compilation.h:233
virtual void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Add arguments to use MCU GCC toolchain includes.
Definition: ToolChain.cpp:839
virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const
Add options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:647
StringRef getBinPath() const
Get the detected path to Cuda&#39;s bin directory.
Definition: Cuda.h:68
const char * getOffloadingArch() const
Definition: Action.h:198
void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use MCU GCC toolchain includes.
Definition: Cuda.cpp:824
ToolChain - Access to tools for a single platform.
Definition: ToolChain.h:87
static bool real_path(StringRef SrcPath, SmallVectorImpl< char > &RealPath)