clang  7.0.0svn
Cuda.cpp
Go to the documentation of this file.
1 //===--- Cuda.cpp - Cuda Tool and ToolChain Implementations -----*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "Cuda.h"
11 #include "CommonArgs.h"
12 #include "InputInfo.h"
13 #include "clang/Basic/Cuda.h"
15 #include "clang/Config/config.h"
17 #include "clang/Driver/Distro.h"
18 #include "clang/Driver/Driver.h"
20 #include "clang/Driver/Options.h"
21 #include "llvm/Option/ArgList.h"
22 #include "llvm/Support/FileSystem.h"
23 #include "llvm/Support/Path.h"
24 #include "llvm/Support/Process.h"
25 #include "llvm/Support/Program.h"
26 #include <system_error>
27 
28 using namespace clang::driver;
29 using namespace clang::driver::toolchains;
30 using namespace clang::driver::tools;
31 using namespace clang;
32 using namespace llvm::opt;
33 
34 // Parses the contents of version.txt in an CUDA installation. It should
35 // contain one line of the from e.g. "CUDA Version 7.5.2".
36 static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
37  if (!V.startswith("CUDA Version "))
38  return CudaVersion::UNKNOWN;
39  V = V.substr(strlen("CUDA Version "));
40  int Major = -1, Minor = -1;
41  auto First = V.split('.');
42  auto Second = First.second.split('.');
43  if (First.first.getAsInteger(10, Major) ||
44  Second.first.getAsInteger(10, Minor))
45  return CudaVersion::UNKNOWN;
46 
47  if (Major == 7 && Minor == 0) {
48  // This doesn't appear to ever happen -- version.txt doesn't exist in the
49  // CUDA 7 installs I've seen. But no harm in checking.
50  return CudaVersion::CUDA_70;
51  }
52  if (Major == 7 && Minor == 5)
53  return CudaVersion::CUDA_75;
54  if (Major == 8 && Minor == 0)
55  return CudaVersion::CUDA_80;
56  if (Major == 9 && Minor == 0)
57  return CudaVersion::CUDA_90;
58  if (Major == 9 && Minor == 1)
59  return CudaVersion::CUDA_91;
60  if (Major == 9 && Minor == 2)
61  return CudaVersion::CUDA_92;
62  return CudaVersion::UNKNOWN;
63 }
64 
66  const Driver &D, const llvm::Triple &HostTriple,
67  const llvm::opt::ArgList &Args)
68  : D(D) {
69  struct Candidate {
70  std::string Path;
71  bool StrictChecking;
72 
73  Candidate(std::string Path, bool StrictChecking = false)
74  : Path(Path), StrictChecking(StrictChecking) {}
75  };
76  SmallVector<Candidate, 4> Candidates;
77 
78  // In decreasing order so we prefer newer versions to older versions.
79  std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
80 
81  if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
82  Candidates.emplace_back(
83  Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
84  } else if (HostTriple.isOSWindows()) {
85  for (const char *Ver : Versions)
86  Candidates.emplace_back(
87  D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
88  Ver);
89  } else {
90  if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
91  // Try to find ptxas binary. If the executable is located in a directory
92  // called 'bin/', its parent directory might be a good guess for a valid
93  // CUDA installation.
94  // However, some distributions might installs 'ptxas' to /usr/bin. In that
95  // case the candidate would be '/usr' which passes the following checks
96  // because '/usr/include' exists as well. To avoid this case, we always
97  // check for the directory potentially containing files for libdevice,
98  // even if the user passes -nocudalib.
99  if (llvm::ErrorOr<std::string> ptxas =
100  llvm::sys::findProgramByName("ptxas")) {
101  SmallString<256> ptxasAbsolutePath;
102  llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
103 
104  StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
105  if (llvm::sys::path::filename(ptxasDir) == "bin")
106  Candidates.emplace_back(llvm::sys::path::parent_path(ptxasDir),
107  /*StrictChecking=*/true);
108  }
109  }
110 
111  Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
112  for (const char *Ver : Versions)
113  Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
114 
115  if (Distro(D.getVFS()).IsDebian())
116  // Special case for Debian to have nvidia-cuda-toolkit work
117  // out of the box. More info on http://bugs.debian.org/882505
118  Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
119  }
120 
121  bool NoCudaLib = Args.hasArg(options::OPT_nocudalib);
122 
123  for (const auto &Candidate : Candidates) {
124  InstallPath = Candidate.Path;
125  if (InstallPath.empty() || !D.getVFS().exists(InstallPath))
126  continue;
127 
128  BinPath = InstallPath + "/bin";
129  IncludePath = InstallPath + "/include";
130  LibDevicePath = InstallPath + "/nvvm/libdevice";
131 
132  auto &FS = D.getVFS();
133  if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
134  continue;
135  bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking);
136  if (CheckLibDevice && !FS.exists(LibDevicePath))
137  continue;
138 
139  // On Linux, we have both lib and lib64 directories, and we need to choose
140  // based on our triple. On MacOS, we have only a lib directory.
141  //
142  // It's sufficient for our purposes to be flexible: If both lib and lib64
143  // exist, we choose whichever one matches our triple. Otherwise, if only
144  // lib exists, we use it.
145  if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
146  LibPath = InstallPath + "/lib64";
147  else if (FS.exists(InstallPath + "/lib"))
148  LibPath = InstallPath + "/lib";
149  else
150  continue;
151 
152  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
153  FS.getBufferForFile(InstallPath + "/version.txt");
154  if (!VersionFile) {
155  // CUDA 7.0 doesn't have a version.txt, so guess that's our version if
156  // version.txt isn't present.
157  Version = CudaVersion::CUDA_70;
158  } else {
159  Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
160  }
161 
162  if (Version >= CudaVersion::CUDA_90) {
163  // CUDA-9+ uses single libdevice file for all GPU variants.
164  std::string FilePath = LibDevicePath + "/libdevice.10.bc";
165  if (FS.exists(FilePath)) {
166  for (const char *GpuArchName :
167  {"sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53",
168  "sm_60", "sm_61", "sm_62", "sm_70", "sm_72"}) {
169  const CudaArch GpuArch = StringToCudaArch(GpuArchName);
170  if (Version >= MinVersionForCudaArch(GpuArch) &&
171  Version <= MaxVersionForCudaArch(GpuArch))
172  LibDeviceMap[GpuArchName] = FilePath;
173  }
174  }
175  } else {
176  std::error_code EC;
177  for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
178  !EC && LI != LE; LI = LI.increment(EC)) {
179  StringRef FilePath = LI->path();
180  StringRef FileName = llvm::sys::path::filename(FilePath);
181  // Process all bitcode filenames that look like
182  // libdevice.compute_XX.YY.bc
183  const StringRef LibDeviceName = "libdevice.";
184  if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
185  continue;
186  StringRef GpuArch = FileName.slice(
187  LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
188  LibDeviceMap[GpuArch] = FilePath.str();
189  // Insert map entries for specific devices with this compute
190  // capability. NVCC's choice of the libdevice library version is
191  // rather peculiar and depends on the CUDA version.
192  if (GpuArch == "compute_20") {
193  LibDeviceMap["sm_20"] = FilePath;
194  LibDeviceMap["sm_21"] = FilePath;
195  LibDeviceMap["sm_32"] = FilePath;
196  } else if (GpuArch == "compute_30") {
197  LibDeviceMap["sm_30"] = FilePath;
198  if (Version < CudaVersion::CUDA_80) {
199  LibDeviceMap["sm_50"] = FilePath;
200  LibDeviceMap["sm_52"] = FilePath;
201  LibDeviceMap["sm_53"] = FilePath;
202  }
203  LibDeviceMap["sm_60"] = FilePath;
204  LibDeviceMap["sm_61"] = FilePath;
205  LibDeviceMap["sm_62"] = FilePath;
206  } else if (GpuArch == "compute_35") {
207  LibDeviceMap["sm_35"] = FilePath;
208  LibDeviceMap["sm_37"] = FilePath;
209  } else if (GpuArch == "compute_50") {
210  if (Version >= CudaVersion::CUDA_80) {
211  LibDeviceMap["sm_50"] = FilePath;
212  LibDeviceMap["sm_52"] = FilePath;
213  LibDeviceMap["sm_53"] = FilePath;
214  }
215  }
216  }
217  }
218 
219  // Check that we have found at least one libdevice that we can link in if
220  // -nocudalib hasn't been specified.
221  if (LibDeviceMap.empty() && !NoCudaLib)
222  continue;
223 
224  IsValid = true;
225  break;
226  }
227 }
228 
230  const ArgList &DriverArgs, ArgStringList &CC1Args) const {
231  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
232  // Add cuda_wrappers/* to our system include path. This lets us wrap
233  // standard library headers.
234  SmallString<128> P(D.ResourceDir);
235  llvm::sys::path::append(P, "include");
236  llvm::sys::path::append(P, "cuda_wrappers");
237  CC1Args.push_back("-internal-isystem");
238  CC1Args.push_back(DriverArgs.MakeArgString(P));
239  }
240 
241  if (DriverArgs.hasArg(options::OPT_nocudainc))
242  return;
243 
244  if (!isValid()) {
245  D.Diag(diag::err_drv_no_cuda_installation);
246  return;
247  }
248 
249  CC1Args.push_back("-internal-isystem");
250  CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
251  CC1Args.push_back("-include");
252  CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
253 }
254 
256  CudaArch Arch) const {
257  if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
258  ArchsWithBadVersion.count(Arch) > 0)
259  return;
260 
261  auto MinVersion = MinVersionForCudaArch(Arch);
262  auto MaxVersion = MaxVersionForCudaArch(Arch);
263  if (Version < MinVersion || Version > MaxVersion) {
264  ArchsWithBadVersion.insert(Arch);
265  D.Diag(diag::err_drv_cuda_version_unsupported)
266  << CudaArchToString(Arch) << CudaVersionToString(MinVersion)
267  << CudaVersionToString(MaxVersion) << InstallPath
268  << CudaVersionToString(Version);
269  }
270 }
271 
272 void CudaInstallationDetector::print(raw_ostream &OS) const {
273  if (isValid())
274  OS << "Found CUDA installation: " << InstallPath << ", version "
275  << CudaVersionToString(Version) << "\n";
276 }
277 
278 namespace {
279  /// Debug info kind.
281  NoDebug, /// No debug info.
282  LineTableOnly, /// Line tables only.
283  FullDebug /// Full debug info.
284 };
285 } // anonymous namespace
286 
287 static DebugInfoKind mustEmitDebugInfo(const ArgList &Args) {
288  Arg *A = Args.getLastArg(options::OPT_O_Group);
289  if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
290  options::OPT_no_cuda_noopt_device_debug,
291  !A || A->getOption().matches(options::OPT_O0))) {
292  if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
293  const Option &Opt = A->getOption();
294  if (Opt.matches(options::OPT_gN_Group)) {
295  if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
296  return NoDebug;
297  if (Opt.matches(options::OPT_gline_tables_only) ||
298  Opt.matches(options::OPT_ggdb1))
299  return LineTableOnly;
300  }
301  return FullDebug;
302  }
303  }
304  return NoDebug;
305 }
306 
307 void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
308  const InputInfo &Output,
309  const InputInfoList &Inputs,
310  const ArgList &Args,
311  const char *LinkingOutput) const {
312  const auto &TC =
313  static_cast<const toolchains::CudaToolChain &>(getToolChain());
314  assert(TC.getTriple().isNVPTX() && "Wrong platform");
315 
316  StringRef GPUArchName;
317  // If this is an OpenMP action we need to extract the device architecture
318  // from the -march=arch option. This option may come from -Xopenmp-target
319  // flag or the default value.
321  GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
322  assert(!GPUArchName.empty() && "Must have an architecture passed in.");
323  } else
324  GPUArchName = JA.getOffloadingArch();
325 
326  // Obtain architecture from the action.
327  CudaArch gpu_arch = StringToCudaArch(GPUArchName);
328  assert(gpu_arch != CudaArch::UNKNOWN &&
329  "Device action expected to have an architecture.");
330 
331  // Check that our installation's ptxas supports gpu_arch.
332  if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
333  TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
334  }
335 
336  ArgStringList CmdArgs;
337  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
338  DebugInfoKind DIKind = mustEmitDebugInfo(Args);
339  if (DIKind == FullDebug) {
340  // ptxas does not accept -g option if optimization is enabled, so
341  // we ignore the compiler's -O* options if we want debug info.
342  CmdArgs.push_back("-g");
343  CmdArgs.push_back("--dont-merge-basicblocks");
344  CmdArgs.push_back("--return-at-end");
345  } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
346  // Map the -O we received to -O{0,1,2,3}.
347  //
348  // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
349  // default, so it may correspond more closely to the spirit of clang -O2.
350 
351  // -O3 seems like the least-bad option when -Osomething is specified to
352  // clang but it isn't handled below.
353  StringRef OOpt = "3";
354  if (A->getOption().matches(options::OPT_O4) ||
355  A->getOption().matches(options::OPT_Ofast))
356  OOpt = "3";
357  else if (A->getOption().matches(options::OPT_O0))
358  OOpt = "0";
359  else if (A->getOption().matches(options::OPT_O)) {
360  // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
361  OOpt = llvm::StringSwitch<const char *>(A->getValue())
362  .Case("1", "1")
363  .Case("2", "2")
364  .Case("3", "3")
365  .Case("s", "2")
366  .Case("z", "2")
367  .Default("2");
368  }
369  CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
370  } else {
371  // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
372  // to no optimizations, but ptxas's default is -O3.
373  CmdArgs.push_back("-O0");
374  }
375  if (DIKind == LineTableOnly)
376  CmdArgs.push_back("-lineinfo");
377 
378  // Pass -v to ptxas if it was passed to the driver.
379  if (Args.hasArg(options::OPT_v))
380  CmdArgs.push_back("-v");
381 
382  CmdArgs.push_back("--gpu-name");
383  CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
384  CmdArgs.push_back("--output-file");
385  CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
386  for (const auto& II : Inputs)
387  CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
388 
389  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
390  CmdArgs.push_back(Args.MakeArgString(A));
391 
392  bool Relocatable = false;
394  // In OpenMP we need to generate relocatable code.
395  Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
396  options::OPT_fnoopenmp_relocatable_target,
397  /*Default=*/true);
398  else if (JA.isOffloading(Action::OFK_Cuda))
399  Relocatable = Args.hasFlag(options::OPT_fcuda_rdc,
400  options::OPT_fno_cuda_rdc, /*Default=*/false);
401 
402  if (Relocatable)
403  CmdArgs.push_back("-c");
404 
405  const char *Exec;
406  if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
407  Exec = A->getValue();
408  else
409  Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
410  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
411 }
412 
413 static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
414  bool includePTX = true;
415  for (Arg *A : Args) {
416  if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
417  A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
418  continue;
419  A->claim();
420  const StringRef ArchStr = A->getValue();
421  if (ArchStr == "all" || ArchStr == gpu_arch) {
422  includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
423  continue;
424  }
425  }
426  return includePTX;
427 }
428 
429 // All inputs to this linker must be from CudaDeviceActions, as we need to look
430 // at the Inputs' Actions in order to figure out which GPU architecture they
431 // correspond to.
432 void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
433  const InputInfo &Output,
434  const InputInfoList &Inputs,
435  const ArgList &Args,
436  const char *LinkingOutput) const {
437  const auto &TC =
438  static_cast<const toolchains::CudaToolChain &>(getToolChain());
439  assert(TC.getTriple().isNVPTX() && "Wrong platform");
440 
441  ArgStringList CmdArgs;
442  CmdArgs.push_back("--cuda");
443  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
444  CmdArgs.push_back(Args.MakeArgString("--create"));
445  CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
446  if (mustEmitDebugInfo(Args) == FullDebug)
447  CmdArgs.push_back("-g");
448 
449  for (const auto& II : Inputs) {
450  auto *A = II.getAction();
451  assert(A->getInputs().size() == 1 &&
452  "Device offload action is expected to have a single input");
453  const char *gpu_arch_str = A->getOffloadingArch();
454  assert(gpu_arch_str &&
455  "Device action expected to have associated a GPU architecture!");
456  CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
457 
458  if (II.getType() == types::TY_PP_Asm &&
459  !shouldIncludePTX(Args, gpu_arch_str))
460  continue;
461  // We need to pass an Arch of the form "sm_XX" for cubin files and
462  // "compute_XX" for ptx.
463  const char *Arch =
464  (II.getType() == types::TY_PP_Asm)
466  : gpu_arch_str;
467  CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
468  Arch + ",file=" + II.getFilename()));
469  }
470 
471  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
472  CmdArgs.push_back(Args.MakeArgString(A));
473 
474  const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
475  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
476 }
477 
478 void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
479  const InputInfo &Output,
480  const InputInfoList &Inputs,
481  const ArgList &Args,
482  const char *LinkingOutput) const {
483  const auto &TC =
484  static_cast<const toolchains::CudaToolChain &>(getToolChain());
485  assert(TC.getTriple().isNVPTX() && "Wrong platform");
486 
487  ArgStringList CmdArgs;
488 
489  // OpenMP uses nvlink to link cubin files. The result will be embedded in the
490  // host binary by the host linker.
491  assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
492  "CUDA toolchain not expected for an OpenMP host device.");
493 
494  if (Output.isFilename()) {
495  CmdArgs.push_back("-o");
496  CmdArgs.push_back(Output.getFilename());
497  } else
498  assert(Output.isNothing() && "Invalid output.");
499  if (mustEmitDebugInfo(Args) == FullDebug)
500  CmdArgs.push_back("-g");
501 
502  if (Args.hasArg(options::OPT_v))
503  CmdArgs.push_back("-v");
504 
505  StringRef GPUArch =
506  Args.getLastArgValue(options::OPT_march_EQ);
507  assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
508 
509  CmdArgs.push_back("-arch");
510  CmdArgs.push_back(Args.MakeArgString(GPUArch));
511 
512  // Add paths specified in LIBRARY_PATH environment variable as -L options.
513  addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
514 
515  // Add paths for the default clang library path.
516  SmallString<256> DefaultLibPath =
517  llvm::sys::path::parent_path(TC.getDriver().Dir);
518  llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
519  CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
520 
521  // Add linking against library implementing OpenMP calls on NVPTX target.
522  CmdArgs.push_back("-lomptarget-nvptx");
523 
524  for (const auto &II : Inputs) {
525  if (II.getType() == types::TY_LLVM_IR ||
526  II.getType() == types::TY_LTO_IR ||
527  II.getType() == types::TY_LTO_BC ||
528  II.getType() == types::TY_LLVM_BC) {
529  C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
530  << getToolChain().getTripleString();
531  continue;
532  }
533 
534  // Currently, we only pass the input files to the linker, we do not pass
535  // any libraries that may be valid only for the host.
536  if (!II.isFilename())
537  continue;
538 
539  const char *CubinF = C.addTempFile(
540  C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
541 
542  CmdArgs.push_back(CubinF);
543  }
544 
545  AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
546 
547  const char *Exec =
548  Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
549  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
550 }
551 
552 /// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
553 /// which isn't properly a linker but nonetheless performs the step of stitching
554 /// together object files from the assembler into a single blob.
555 
556 CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
557  const ToolChain &HostTC, const ArgList &Args,
558  const Action::OffloadKind OK)
559  : ToolChain(D, Triple, Args), HostTC(HostTC),
560  CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
563  // Lookup binaries into the driver directory, this is used to
564  // discover the clang-offload-bundler executable.
565  getProgramPaths().push_back(getDriver().Dir);
566 }
567 
568 std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
569  // Only object files are changed, for example assembly files keep their .s
570  // extensions. CUDA also continues to use .o as they don't use nvlink but
571  // fatbinary.
572  if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object))
573  return ToolChain::getInputFilename(Input);
574 
575  // Replace extension for object files with cubin because nvlink relies on
576  // these particular file names.
578  llvm::sys::path::replace_extension(Filename, "cubin");
579  return Filename.str();
580 }
581 
583  const llvm::opt::ArgList &DriverArgs,
584  llvm::opt::ArgStringList &CC1Args,
585  Action::OffloadKind DeviceOffloadingKind) const {
586  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
587 
588  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
589  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
590  assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
591  DeviceOffloadingKind == Action::OFK_Cuda) &&
592  "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
593 
594  if (DeviceOffloadingKind == Action::OFK_Cuda) {
595  CC1Args.push_back("-fcuda-is-device");
596 
597  if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
598  options::OPT_fno_cuda_flush_denormals_to_zero, false))
599  CC1Args.push_back("-fcuda-flush-denormals-to-zero");
600 
601  if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
602  options::OPT_fno_cuda_approx_transcendentals, false))
603  CC1Args.push_back("-fcuda-approx-transcendentals");
604 
605  if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc,
606  false))
607  CC1Args.push_back("-fcuda-rdc");
608  }
609 
610  if (DriverArgs.hasArg(options::OPT_nocudalib))
611  return;
612 
613  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
614 
615  if (LibDeviceFile.empty()) {
616  if (DeviceOffloadingKind == Action::OFK_OpenMP &&
617  DriverArgs.hasArg(options::OPT_S))
618  return;
619 
620  getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
621  return;
622  }
623 
624  CC1Args.push_back("-mlink-cuda-bitcode");
625  CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
626 
627  // Libdevice in CUDA-7.0 requires PTX version that's more recent than LLVM
628  // defaults to. Use PTX4.2 by default, which is the PTX version that came with
629  // CUDA-7.0.
630  const char *PtxFeature = "+ptx42";
632  // CUDA-9.1 uses new instructions that are only available in PTX6.1+
633  PtxFeature = "+ptx61";
635  // CUDA-9.0 uses new instructions that are only available in PTX6.0+
636  PtxFeature = "+ptx60";
637  }
638  CC1Args.append({"-target-feature", PtxFeature});
639  if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
640  options::OPT_fno_cuda_short_ptr, false))
641  CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
642 
643  if (DeviceOffloadingKind == Action::OFK_OpenMP) {
644  SmallVector<StringRef, 8> LibraryPaths;
645  // Add path to lib and/or lib64 folders.
646  SmallString<256> DefaultLibPath =
647  llvm::sys::path::parent_path(getDriver().Dir);
648  llvm::sys::path::append(DefaultLibPath,
649  Twine("lib") + CLANG_LIBDIR_SUFFIX);
650  LibraryPaths.emplace_back(DefaultLibPath.c_str());
651 
652  // Add user defined library paths from LIBRARY_PATH.
654  llvm::sys::Process::GetEnv("LIBRARY_PATH");
655  if (LibPath) {
657  const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
658  llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
659  for (StringRef Path : Frags)
660  LibraryPaths.emplace_back(Path.trim());
661  }
662 
663  std::string LibOmpTargetName =
664  "libomptarget-nvptx-" + GpuArch.str() + ".bc";
665  bool FoundBCLibrary = false;
666  for (StringRef LibraryPath : LibraryPaths) {
667  SmallString<128> LibOmpTargetFile(LibraryPath);
668  llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
669  if (llvm::sys::fs::exists(LibOmpTargetFile)) {
670  CC1Args.push_back("-mlink-cuda-bitcode");
671  CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
672  FoundBCLibrary = true;
673  break;
674  }
675  }
676  if (!FoundBCLibrary)
677  getDriver().Diag(diag::warn_drv_omp_offload_target_missingbcruntime)
678  << LibOmpTargetName;
679  }
680 }
681 
682 void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
683  ArgStringList &CC1Args) const {
684  // Check our CUDA version if we're going to include the CUDA headers.
685  if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
686  !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
687  StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
688  assert(!Arch.empty() && "Must have an explicit GPU arch.");
690  }
691  CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
692 }
693 
694 llvm::opt::DerivedArgList *
695 CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
696  StringRef BoundArch,
697  Action::OffloadKind DeviceOffloadKind) const {
698  DerivedArgList *DAL =
699  HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
700  if (!DAL)
701  DAL = new DerivedArgList(Args.getBaseArgs());
702 
703  const OptTable &Opts = getDriver().getOpts();
704 
705  // For OpenMP device offloading, append derived arguments. Make sure
706  // flags are not duplicated.
707  // Also append the compute capability.
708  if (DeviceOffloadKind == Action::OFK_OpenMP) {
709  for (Arg *A : Args) {
710  bool IsDuplicate = false;
711  for (Arg *DALArg : *DAL) {
712  if (A == DALArg) {
713  IsDuplicate = true;
714  break;
715  }
716  }
717  if (!IsDuplicate)
718  DAL->append(A);
719  }
720 
721  StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
722  if (Arch.empty())
723  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
724  CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
725 
726  return DAL;
727  }
728 
729  for (Arg *A : Args) {
730  if (A->getOption().matches(options::OPT_Xarch__)) {
731  // Skip this argument unless the architecture matches BoundArch
732  if (BoundArch.empty() || A->getValue(0) != BoundArch)
733  continue;
734 
735  unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
736  unsigned Prev = Index;
737  std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
738 
739  // If the argument parsing failed or more than one argument was
740  // consumed, the -Xarch_ argument's parameter tried to consume
741  // extra arguments. Emit an error and ignore.
742  //
743  // We also want to disallow any options which would alter the
744  // driver behavior; that isn't going to work in our model. We
745  // use isDriverOption() as an approximation, although things
746  // like -O4 are going to slip through.
747  if (!XarchArg || Index > Prev + 1) {
748  getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
749  << A->getAsString(Args);
750  continue;
751  } else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
752  getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
753  << A->getAsString(Args);
754  continue;
755  }
756  XarchArg->setBaseArg(A);
757  A = XarchArg.release();
758  DAL->AddSynthesizedArg(A);
759  }
760  DAL->append(A);
761  }
762 
763  if (!BoundArch.empty()) {
764  DAL->eraseArg(options::OPT_march_EQ);
765  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
766  }
767  return DAL;
768 }
769 
771  return new tools::NVPTX::Assembler(*this);
772 }
773 
775  if (OK == Action::OFK_OpenMP)
776  return new tools::NVPTX::OpenMPLinker(*this);
777  return new tools::NVPTX::Linker(*this);
778 }
779 
780 void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
782 }
783 
785 CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
786  return HostTC.GetCXXStdlibType(Args);
787 }
788 
789 void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
790  ArgStringList &CC1Args) const {
791  HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
792 }
793 
795  ArgStringList &CC1Args) const {
796  HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
797 }
798 
799 void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
800  ArgStringList &CC1Args) const {
801  HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
802 }
803 
805  // The CudaToolChain only supports sanitizers in the sense that it allows
806  // sanitizer arguments on the command line if they are supported by the host
807  // toolchain. The CudaToolChain will actually ignore any command line
808  // arguments for any of these "supported" sanitizers. That means that no
809  // sanitization of device code is actually supported at this time.
810  //
811  // This behavior is necessary because the host and device toolchains
812  // invocations often share the command line, so the device toolchain must
813  // tolerate flags meant only for the host toolchain.
815 }
816 
818  const ArgList &Args) const {
819  return HostTC.computeMSVCVersion(D, Args);
820 }
virtual void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const
Add warning options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:627
CudaArch
Definition: Cuda.h:34
const char * CudaArchToString(CudaArch A)
Definition: Cuda.cpp:29
llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: Cuda.cpp:695
SanitizerMask getSupportedSanitizers() const override
Return sanitizers which are available in this toolchain.
Definition: Cuda.cpp:804
StringRef P
virtual void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: ToolChain.cpp:713
VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const override
On Windows, returns the MSVC compatibility version.
Definition: Cuda.cpp:817
bool isHostOffloading(OffloadKind OKind) const
Check if this action have any offload kinds.
Definition: Action.h:201
DiagnosticBuilder Diag(unsigned DiagID) const
Definition: Driver.h:110
CudaArch StringToCudaArch(llvm::StringRef S)
Definition: Cuda.cpp:93
CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args)
Definition: Cuda.cpp:65
static CudaVersion ParseCudaVersionFile(llvm::StringRef V)
Definition: Cuda.cpp:36
void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override
Add warning options that need to be passed to cc1 for this target.
Definition: Cuda.cpp:780
void print(raw_ostream &OS) const
Print information about the detected CUDA installation.
Definition: Cuda.cpp:272
Tool * buildAssembler() const override
Definition: Cuda.cpp:770
const char * getFilename() const
Definition: InputInfo.h:84
path_list & getProgramPaths()
Definition: ToolChain.h:219
virtual void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Add the clang cc1 arguments for system include paths.
Definition: ToolChain.cpp:618
Distro - Helper class for detecting and classifying Linux distributions.
Definition: Distro.h:23
bool isOffloading(OffloadKind OKind) const
Definition: Action.h:207
void CheckCudaVersionSupportsArch(CudaArch Arch) const
Emit an error if Version does not support the given Arch.
Definition: Cuda.cpp:255
InputInfo - Wrapper for information about an input source.
Definition: InputInfo.h:23
bool isDeviceOffloading(OffloadKind OKind) const
Definition: Action.h:204
std::string getLibDeviceFile(StringRef Gpu) const
Get libdevice file for given architecture.
Definition: Cuda.h:76
Driver - Encapsulate logic for constructing compilation processes from a set of gcc-driver-like comma...
Definition: Driver.h:59
types::ID getType() const
Definition: InputInfo.h:78
DebugInfoKind
Debug info kind.
Definition: Cuda.cpp:280
CudaInstallationDetector CudaInstallation
Definition: Cuda.h:186
void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C, const InputInfo &Output, const InputInfoList &Inputs, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const JobAction &JA)
void addDirectoryList(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const char *ArgName, const char *EnvVar)
const char * CudaVersionToString(CudaVersion V)
Definition: Cuda.cpp:9
StringRef Filename
Definition: Format.cpp:1598
void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CC1Args) const override
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: Cuda.cpp:794
virtual VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const
On Windows, returns the MSVC compatibility version.
Definition: ToolChain.cpp:828
void addCommand(std::unique_ptr< Command > C)
Definition: Compilation.h:206
const char * CudaVirtualArchToString(CudaVirtualArch A)
Definition: Cuda.cpp:125
Tool * buildLinker() const override
Definition: Cuda.cpp:774
std::string getInputFilename(const InputInfo &Input) const override
Some toolchains need to modify the file name, for example to replace the extension for object files w...
Definition: Cuda.cpp:568
void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const override
Add options that need to be passed to cc1 for this target.
Definition: Cuda.cpp:582
static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch)
Definition: Cuda.cpp:413
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Definition: Cuda.cpp:229
CudaVersion version() const
Get the detected Cuda install&#39;s version.
Definition: Cuda.h:64
virtual std::string getInputFilename(const InputInfo &Input) const
Some toolchains need to modify the file name, for example to replace the extension for object files w...
Definition: ToolChain.cpp:229
const llvm::opt::DerivedArgList & getArgs() const
Definition: Compilation.h:187
CudaVersion MaxVersionForCudaArch(CudaArch A)
Get the latest CudaVersion that supports the given CudaArch.
Definition: Cuda.cpp:273
vfs::FileSystem & getVFS() const
Definition: Driver.h:301
void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add the clang cc1 arguments for system include paths.
Definition: Cuda.cpp:789
const Driver & getDriver() const
Definition: ToolChain.h:181
CudaVersion
Definition: Cuda.h:19
bool isValid() const
Check whether we detected a valid Cuda install.
Definition: Cuda.h:59
StringRef getIncludePath() const
Get the detected Cuda Include path.
Definition: Cuda.h:70
CudaVersion MinVersionForCudaArch(CudaArch A)
Get the earliest CudaVersion that supports the given CudaArch.
Definition: Cuda.cpp:231
static DebugInfoKind mustEmitDebugInfo(const ArgList &Args)
Definition: Cuda.cpp:287
Dataflow Directional Tag Classes.
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use system-specific CUDA includes.
Definition: Cuda.cpp:682
CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override
Definition: Cuda.cpp:785
std::string SysRoot
sysroot, if present
Definition: Driver.h:149
Tool - Information on a specific compilation tool.
Definition: Tool.h:34
Defines the virtual file system interface vfs::FileSystem.
CudaVirtualArch VirtualArchForCudaArch(CudaArch A)
Get the compute_xx corresponding to an sm_yy.
Definition: Cuda.cpp:180
bool exists(const Twine &Path)
Check whether a file exists. Provided for convenience.
uint64_t SanitizerMask
Definition: Sanitizers.h:26
Compilation - A set of tasks to perform for a single driver invocation.
Definition: Compilation.h:46
const Driver & getDriver() const
Definition: Compilation.h:134
virtual SanitizerMask getSupportedSanitizers() const
Return sanitizers which are available in this toolchain.
Definition: ToolChain.cpp:786
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false)
This is a convenience method that opens a file, gets its content and then closes the file...
bool isNothing() const
Definition: InputInfo.h:75
virtual llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: ToolChain.h:261
bool isFilename() const
Definition: InputInfo.h:76
const llvm::opt::OptTable & getOpts() const
Definition: Driver.h:297
virtual CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const
Definition: ToolChain.cpp:655
const char * addTempFile(const char *Name)
addTempFile - Add a file to remove on exit, and returns its argument.
Definition: Compilation.h:233
virtual void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Add arguments to use MCU GCC toolchain includes.
Definition: ToolChain.cpp:811
virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const
Add options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:623
StringRef getBinPath() const
Get the detected path to Cuda&#39;s bin directory.
Definition: Cuda.h:68
const char * getOffloadingArch() const
Definition: Action.h:197
void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use MCU GCC toolchain includes.
Definition: Cuda.cpp:799
ToolChain - Access to tools for a single platform.
Definition: ToolChain.h:88
static bool real_path(StringRef SrcPath, SmallVectorImpl< char > &RealPath)