clang  9.0.0svn
Cuda.cpp
Go to the documentation of this file.
1 //===--- Cuda.cpp - Cuda Tool and ToolChain Implementations -----*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "Cuda.h"
11 #include "CommonArgs.h"
12 #include "InputInfo.h"
13 #include "clang/Basic/Cuda.h"
14 #include "clang/Config/config.h"
16 #include "clang/Driver/Distro.h"
17 #include "clang/Driver/Driver.h"
19 #include "clang/Driver/Options.h"
20 #include "llvm/Option/ArgList.h"
21 #include "llvm/Support/FileSystem.h"
22 #include "llvm/Support/Path.h"
23 #include "llvm/Support/Process.h"
24 #include "llvm/Support/Program.h"
25 #include "llvm/Support/VirtualFileSystem.h"
26 #include <system_error>
27 
28 using namespace clang::driver;
29 using namespace clang::driver::toolchains;
30 using namespace clang::driver::tools;
31 using namespace clang;
32 using namespace llvm::opt;
33 
34 // Parses the contents of version.txt in an CUDA installation. It should
35 // contain one line of the from e.g. "CUDA Version 7.5.2".
36 static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
37  if (!V.startswith("CUDA Version "))
38  return CudaVersion::UNKNOWN;
39  V = V.substr(strlen("CUDA Version "));
40  int Major = -1, Minor = -1;
41  auto First = V.split('.');
42  auto Second = First.second.split('.');
43  if (First.first.getAsInteger(10, Major) ||
44  Second.first.getAsInteger(10, Minor))
45  return CudaVersion::UNKNOWN;
46 
47  if (Major == 7 && Minor == 0) {
48  // This doesn't appear to ever happen -- version.txt doesn't exist in the
49  // CUDA 7 installs I've seen. But no harm in checking.
50  return CudaVersion::CUDA_70;
51  }
52  if (Major == 7 && Minor == 5)
53  return CudaVersion::CUDA_75;
54  if (Major == 8 && Minor == 0)
55  return CudaVersion::CUDA_80;
56  if (Major == 9 && Minor == 0)
57  return CudaVersion::CUDA_90;
58  if (Major == 9 && Minor == 1)
59  return CudaVersion::CUDA_91;
60  if (Major == 9 && Minor == 2)
61  return CudaVersion::CUDA_92;
62  if (Major == 10 && Minor == 0)
63  return CudaVersion::CUDA_100;
64  return CudaVersion::UNKNOWN;
65 }
66 
68  const Driver &D, const llvm::Triple &HostTriple,
69  const llvm::opt::ArgList &Args)
70  : D(D) {
71  struct Candidate {
72  std::string Path;
73  bool StrictChecking;
74 
75  Candidate(std::string Path, bool StrictChecking = false)
76  : Path(Path), StrictChecking(StrictChecking) {}
77  };
78  SmallVector<Candidate, 4> Candidates;
79 
80  // In decreasing order so we prefer newer versions to older versions.
81  std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
82 
83  if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
84  Candidates.emplace_back(
85  Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
86  } else if (HostTriple.isOSWindows()) {
87  for (const char *Ver : Versions)
88  Candidates.emplace_back(
89  D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
90  Ver);
91  } else {
92  if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
93  // Try to find ptxas binary. If the executable is located in a directory
94  // called 'bin/', its parent directory might be a good guess for a valid
95  // CUDA installation.
96  // However, some distributions might installs 'ptxas' to /usr/bin. In that
97  // case the candidate would be '/usr' which passes the following checks
98  // because '/usr/include' exists as well. To avoid this case, we always
99  // check for the directory potentially containing files for libdevice,
100  // even if the user passes -nocudalib.
101  if (llvm::ErrorOr<std::string> ptxas =
102  llvm::sys::findProgramByName("ptxas")) {
103  SmallString<256> ptxasAbsolutePath;
104  llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
105 
106  StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
107  if (llvm::sys::path::filename(ptxasDir) == "bin")
108  Candidates.emplace_back(llvm::sys::path::parent_path(ptxasDir),
109  /*StrictChecking=*/true);
110  }
111  }
112 
113  Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
114  for (const char *Ver : Versions)
115  Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
116 
117  if (Distro(D.getVFS()).IsDebian() || Distro(D.getVFS()).IsUbuntu())
118  // Special case for Debian to have nvidia-cuda-toolkit work
119  // out of the box. More info on http://bugs.debian.org/882505
120  Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
121  }
122 
123  bool NoCudaLib = Args.hasArg(options::OPT_nocudalib);
124 
125  for (const auto &Candidate : Candidates) {
126  InstallPath = Candidate.Path;
127  if (InstallPath.empty() || !D.getVFS().exists(InstallPath))
128  continue;
129 
130  BinPath = InstallPath + "/bin";
131  IncludePath = InstallPath + "/include";
132  LibDevicePath = InstallPath + "/nvvm/libdevice";
133 
134  auto &FS = D.getVFS();
135  if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
136  continue;
137  bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking);
138  if (CheckLibDevice && !FS.exists(LibDevicePath))
139  continue;
140 
141  // On Linux, we have both lib and lib64 directories, and we need to choose
142  // based on our triple. On MacOS, we have only a lib directory.
143  //
144  // It's sufficient for our purposes to be flexible: If both lib and lib64
145  // exist, we choose whichever one matches our triple. Otherwise, if only
146  // lib exists, we use it.
147  if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
148  LibPath = InstallPath + "/lib64";
149  else if (FS.exists(InstallPath + "/lib"))
150  LibPath = InstallPath + "/lib";
151  else
152  continue;
153 
154  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
155  FS.getBufferForFile(InstallPath + "/version.txt");
156  if (!VersionFile) {
157  // CUDA 7.0 doesn't have a version.txt, so guess that's our version if
158  // version.txt isn't present.
159  Version = CudaVersion::CUDA_70;
160  } else {
161  Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
162  }
163 
164  if (Version >= CudaVersion::CUDA_90) {
165  // CUDA-9+ uses single libdevice file for all GPU variants.
166  std::string FilePath = LibDevicePath + "/libdevice.10.bc";
167  if (FS.exists(FilePath)) {
168  for (const char *GpuArchName :
169  {"sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53",
170  "sm_60", "sm_61", "sm_62", "sm_70", "sm_72", "sm_75"}) {
171  const CudaArch GpuArch = StringToCudaArch(GpuArchName);
172  if (Version >= MinVersionForCudaArch(GpuArch) &&
173  Version <= MaxVersionForCudaArch(GpuArch))
174  LibDeviceMap[GpuArchName] = FilePath;
175  }
176  }
177  } else {
178  std::error_code EC;
179  for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
180  !EC && LI != LE; LI = LI.increment(EC)) {
181  StringRef FilePath = LI->path();
182  StringRef FileName = llvm::sys::path::filename(FilePath);
183  // Process all bitcode filenames that look like
184  // libdevice.compute_XX.YY.bc
185  const StringRef LibDeviceName = "libdevice.";
186  if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
187  continue;
188  StringRef GpuArch = FileName.slice(
189  LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
190  LibDeviceMap[GpuArch] = FilePath.str();
191  // Insert map entries for specific devices with this compute
192  // capability. NVCC's choice of the libdevice library version is
193  // rather peculiar and depends on the CUDA version.
194  if (GpuArch == "compute_20") {
195  LibDeviceMap["sm_20"] = FilePath;
196  LibDeviceMap["sm_21"] = FilePath;
197  LibDeviceMap["sm_32"] = FilePath;
198  } else if (GpuArch == "compute_30") {
199  LibDeviceMap["sm_30"] = FilePath;
200  if (Version < CudaVersion::CUDA_80) {
201  LibDeviceMap["sm_50"] = FilePath;
202  LibDeviceMap["sm_52"] = FilePath;
203  LibDeviceMap["sm_53"] = FilePath;
204  }
205  LibDeviceMap["sm_60"] = FilePath;
206  LibDeviceMap["sm_61"] = FilePath;
207  LibDeviceMap["sm_62"] = FilePath;
208  } else if (GpuArch == "compute_35") {
209  LibDeviceMap["sm_35"] = FilePath;
210  LibDeviceMap["sm_37"] = FilePath;
211  } else if (GpuArch == "compute_50") {
212  if (Version >= CudaVersion::CUDA_80) {
213  LibDeviceMap["sm_50"] = FilePath;
214  LibDeviceMap["sm_52"] = FilePath;
215  LibDeviceMap["sm_53"] = FilePath;
216  }
217  }
218  }
219  }
220 
221  // Check that we have found at least one libdevice that we can link in if
222  // -nocudalib hasn't been specified.
223  if (LibDeviceMap.empty() && !NoCudaLib)
224  continue;
225 
226  IsValid = true;
227  break;
228  }
229 }
230 
232  const ArgList &DriverArgs, ArgStringList &CC1Args) const {
233  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
234  // Add cuda_wrappers/* to our system include path. This lets us wrap
235  // standard library headers.
236  SmallString<128> P(D.ResourceDir);
237  llvm::sys::path::append(P, "include");
238  llvm::sys::path::append(P, "cuda_wrappers");
239  CC1Args.push_back("-internal-isystem");
240  CC1Args.push_back(DriverArgs.MakeArgString(P));
241  }
242 
243  if (DriverArgs.hasArg(options::OPT_nocudainc))
244  return;
245 
246  if (!isValid()) {
247  D.Diag(diag::err_drv_no_cuda_installation);
248  return;
249  }
250 
251  CC1Args.push_back("-internal-isystem");
252  CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
253  CC1Args.push_back("-include");
254  CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
255 }
256 
258  CudaArch Arch) const {
259  if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
260  ArchsWithBadVersion.count(Arch) > 0)
261  return;
262 
263  auto MinVersion = MinVersionForCudaArch(Arch);
264  auto MaxVersion = MaxVersionForCudaArch(Arch);
265  if (Version < MinVersion || Version > MaxVersion) {
266  ArchsWithBadVersion.insert(Arch);
267  D.Diag(diag::err_drv_cuda_version_unsupported)
268  << CudaArchToString(Arch) << CudaVersionToString(MinVersion)
269  << CudaVersionToString(MaxVersion) << InstallPath
270  << CudaVersionToString(Version);
271  }
272 }
273 
274 void CudaInstallationDetector::print(raw_ostream &OS) const {
275  if (isValid())
276  OS << "Found CUDA installation: " << InstallPath << ", version "
277  << CudaVersionToString(Version) << "\n";
278 }
279 
280 namespace {
281 /// Debug info level for the NVPTX devices. We may need to emit different debug
282 /// info level for the host and for the device itselfi. This type controls
283 /// emission of the debug info for the devices. It either prohibits disable info
284 /// emission completely, or emits debug directives only, or emits same debug
285 /// info as for the host.
287  DisableDebugInfo, /// Do not emit debug info for the devices.
288  DebugDirectivesOnly, /// Emit only debug directives.
289  EmitSameDebugInfoAsHost, /// Use the same debug info level just like for the
290  /// host.
291 };
292 } // anonymous namespace
293 
294 /// Define debug info level for the NVPTX devices. If the debug info for both
295 /// the host and device are disabled (-g0/-ggdb0 or no debug options at all). If
296 /// only debug directives are requested for the both host and device
297 /// (-gline-directvies-only), or the debug info only for the device is disabled
298 /// (optimization is on and --cuda-noopt-device-debug was not specified), the
299 /// debug directves only must be emitted for the device. Otherwise, use the same
300 /// debug info level just like for the host (with the limitations of only
301 /// supported DWARF2 standard).
302 static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) {
303  const Arg *A = Args.getLastArg(options::OPT_O_Group);
304  bool IsDebugEnabled = !A || A->getOption().matches(options::OPT_O0) ||
305  Args.hasFlag(options::OPT_cuda_noopt_device_debug,
306  options::OPT_no_cuda_noopt_device_debug,
307  /*Default=*/false);
308  if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
309  const Option &Opt = A->getOption();
310  if (Opt.matches(options::OPT_gN_Group)) {
311  if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
312  return DisableDebugInfo;
313  if (Opt.matches(options::OPT_gline_directives_only))
314  return DebugDirectivesOnly;
315  }
316  return IsDebugEnabled ? EmitSameDebugInfoAsHost : DebugDirectivesOnly;
317  }
318  return DisableDebugInfo;
319 }
320 
321 void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
322  const InputInfo &Output,
323  const InputInfoList &Inputs,
324  const ArgList &Args,
325  const char *LinkingOutput) const {
326  const auto &TC =
327  static_cast<const toolchains::CudaToolChain &>(getToolChain());
328  assert(TC.getTriple().isNVPTX() && "Wrong platform");
329 
330  StringRef GPUArchName;
331  // If this is an OpenMP action we need to extract the device architecture
332  // from the -march=arch option. This option may come from -Xopenmp-target
333  // flag or the default value.
335  GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
336  assert(!GPUArchName.empty() && "Must have an architecture passed in.");
337  } else
338  GPUArchName = JA.getOffloadingArch();
339 
340  // Obtain architecture from the action.
341  CudaArch gpu_arch = StringToCudaArch(GPUArchName);
342  assert(gpu_arch != CudaArch::UNKNOWN &&
343  "Device action expected to have an architecture.");
344 
345  // Check that our installation's ptxas supports gpu_arch.
346  if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
347  TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
348  }
349 
350  ArgStringList CmdArgs;
351  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
353  if (DIKind == EmitSameDebugInfoAsHost) {
354  // ptxas does not accept -g option if optimization is enabled, so
355  // we ignore the compiler's -O* options if we want debug info.
356  CmdArgs.push_back("-g");
357  CmdArgs.push_back("--dont-merge-basicblocks");
358  CmdArgs.push_back("--return-at-end");
359  } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
360  // Map the -O we received to -O{0,1,2,3}.
361  //
362  // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
363  // default, so it may correspond more closely to the spirit of clang -O2.
364 
365  // -O3 seems like the least-bad option when -Osomething is specified to
366  // clang but it isn't handled below.
367  StringRef OOpt = "3";
368  if (A->getOption().matches(options::OPT_O4) ||
369  A->getOption().matches(options::OPT_Ofast))
370  OOpt = "3";
371  else if (A->getOption().matches(options::OPT_O0))
372  OOpt = "0";
373  else if (A->getOption().matches(options::OPT_O)) {
374  // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
375  OOpt = llvm::StringSwitch<const char *>(A->getValue())
376  .Case("1", "1")
377  .Case("2", "2")
378  .Case("3", "3")
379  .Case("s", "2")
380  .Case("z", "2")
381  .Default("2");
382  }
383  CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
384  } else {
385  // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
386  // to no optimizations, but ptxas's default is -O3.
387  CmdArgs.push_back("-O0");
388  }
389  if (DIKind == DebugDirectivesOnly)
390  CmdArgs.push_back("-lineinfo");
391 
392  // Pass -v to ptxas if it was passed to the driver.
393  if (Args.hasArg(options::OPT_v))
394  CmdArgs.push_back("-v");
395 
396  CmdArgs.push_back("--gpu-name");
397  CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
398  CmdArgs.push_back("--output-file");
399  CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
400  for (const auto& II : Inputs)
401  CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
402 
403  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
404  CmdArgs.push_back(Args.MakeArgString(A));
405 
406  bool Relocatable = false;
408  // In OpenMP we need to generate relocatable code.
409  Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
410  options::OPT_fnoopenmp_relocatable_target,
411  /*Default=*/true);
412  else if (JA.isOffloading(Action::OFK_Cuda))
413  Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
414  options::OPT_fno_gpu_rdc, /*Default=*/false);
415 
416  if (Relocatable)
417  CmdArgs.push_back("-c");
418 
419  const char *Exec;
420  if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
421  Exec = A->getValue();
422  else
423  Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
424  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
425 }
426 
427 static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
428  bool includePTX = true;
429  for (Arg *A : Args) {
430  if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
431  A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
432  continue;
433  A->claim();
434  const StringRef ArchStr = A->getValue();
435  if (ArchStr == "all" || ArchStr == gpu_arch) {
436  includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
437  continue;
438  }
439  }
440  return includePTX;
441 }
442 
443 // All inputs to this linker must be from CudaDeviceActions, as we need to look
444 // at the Inputs' Actions in order to figure out which GPU architecture they
445 // correspond to.
446 void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
447  const InputInfo &Output,
448  const InputInfoList &Inputs,
449  const ArgList &Args,
450  const char *LinkingOutput) const {
451  const auto &TC =
452  static_cast<const toolchains::CudaToolChain &>(getToolChain());
453  assert(TC.getTriple().isNVPTX() && "Wrong platform");
454 
455  ArgStringList CmdArgs;
456  CmdArgs.push_back("--cuda");
457  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
458  CmdArgs.push_back(Args.MakeArgString("--create"));
459  CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
460  if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
461  CmdArgs.push_back("-g");
462 
463  for (const auto& II : Inputs) {
464  auto *A = II.getAction();
465  assert(A->getInputs().size() == 1 &&
466  "Device offload action is expected to have a single input");
467  const char *gpu_arch_str = A->getOffloadingArch();
468  assert(gpu_arch_str &&
469  "Device action expected to have associated a GPU architecture!");
470  CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
471 
472  if (II.getType() == types::TY_PP_Asm &&
473  !shouldIncludePTX(Args, gpu_arch_str))
474  continue;
475  // We need to pass an Arch of the form "sm_XX" for cubin files and
476  // "compute_XX" for ptx.
477  const char *Arch =
478  (II.getType() == types::TY_PP_Asm)
480  : gpu_arch_str;
481  CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
482  Arch + ",file=" + II.getFilename()));
483  }
484 
485  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
486  CmdArgs.push_back(Args.MakeArgString(A));
487 
488  const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
489  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
490 }
491 
492 void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
493  const InputInfo &Output,
494  const InputInfoList &Inputs,
495  const ArgList &Args,
496  const char *LinkingOutput) const {
497  const auto &TC =
498  static_cast<const toolchains::CudaToolChain &>(getToolChain());
499  assert(TC.getTriple().isNVPTX() && "Wrong platform");
500 
501  ArgStringList CmdArgs;
502 
503  // OpenMP uses nvlink to link cubin files. The result will be embedded in the
504  // host binary by the host linker.
505  assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
506  "CUDA toolchain not expected for an OpenMP host device.");
507 
508  if (Output.isFilename()) {
509  CmdArgs.push_back("-o");
510  CmdArgs.push_back(Output.getFilename());
511  } else
512  assert(Output.isNothing() && "Invalid output.");
513  if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
514  CmdArgs.push_back("-g");
515 
516  if (Args.hasArg(options::OPT_v))
517  CmdArgs.push_back("-v");
518 
519  StringRef GPUArch =
520  Args.getLastArgValue(options::OPT_march_EQ);
521  assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
522 
523  CmdArgs.push_back("-arch");
524  CmdArgs.push_back(Args.MakeArgString(GPUArch));
525 
526  // Assume that the directory specified with --libomptarget_nvptx_path
527  // contains the static library libomptarget-nvptx.a.
528  if (const Arg *A = Args.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
529  CmdArgs.push_back(Args.MakeArgString(Twine("-L") + A->getValue()));
530 
531  // Add paths specified in LIBRARY_PATH environment variable as -L options.
532  addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
533 
534  // Add paths for the default clang library path.
535  SmallString<256> DefaultLibPath =
536  llvm::sys::path::parent_path(TC.getDriver().Dir);
537  llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
538  CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
539 
540  // Add linking against library implementing OpenMP calls on NVPTX target.
541  CmdArgs.push_back("-lomptarget-nvptx");
542 
543  for (const auto &II : Inputs) {
544  if (II.getType() == types::TY_LLVM_IR ||
545  II.getType() == types::TY_LTO_IR ||
546  II.getType() == types::TY_LTO_BC ||
547  II.getType() == types::TY_LLVM_BC) {
548  C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
549  << getToolChain().getTripleString();
550  continue;
551  }
552 
553  // Currently, we only pass the input files to the linker, we do not pass
554  // any libraries that may be valid only for the host.
555  if (!II.isFilename())
556  continue;
557 
558  const char *CubinF = C.addTempFile(
559  C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
560 
561  CmdArgs.push_back(CubinF);
562  }
563 
564  AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
565 
566  const char *Exec =
567  Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
568  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
569 }
570 
571 /// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
572 /// which isn't properly a linker but nonetheless performs the step of stitching
573 /// together object files from the assembler into a single blob.
574 
575 CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
576  const ToolChain &HostTC, const ArgList &Args,
577  const Action::OffloadKind OK)
578  : ToolChain(D, Triple, Args), HostTC(HostTC),
579  CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
582  // Lookup binaries into the driver directory, this is used to
583  // discover the clang-offload-bundler executable.
584  getProgramPaths().push_back(getDriver().Dir);
585 }
586 
587 std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
588  // Only object files are changed, for example assembly files keep their .s
589  // extensions. CUDA also continues to use .o as they don't use nvlink but
590  // fatbinary.
591  if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object))
592  return ToolChain::getInputFilename(Input);
593 
594  // Replace extension for object files with cubin because nvlink relies on
595  // these particular file names.
597  llvm::sys::path::replace_extension(Filename, "cubin");
598  return Filename.str();
599 }
600 
602  const llvm::opt::ArgList &DriverArgs,
603  llvm::opt::ArgStringList &CC1Args,
604  Action::OffloadKind DeviceOffloadingKind) const {
605  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
606 
607  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
608  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
609  assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
610  DeviceOffloadingKind == Action::OFK_Cuda) &&
611  "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
612 
613  if (DeviceOffloadingKind == Action::OFK_Cuda) {
614  CC1Args.push_back("-fcuda-is-device");
615 
616  if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
617  options::OPT_fno_cuda_flush_denormals_to_zero, false))
618  CC1Args.push_back("-fcuda-flush-denormals-to-zero");
619 
620  if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
621  options::OPT_fno_cuda_approx_transcendentals, false))
622  CC1Args.push_back("-fcuda-approx-transcendentals");
623 
624  if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
625  false))
626  CC1Args.push_back("-fgpu-rdc");
627  }
628 
629  if (DriverArgs.hasArg(options::OPT_nocudalib))
630  return;
631 
632  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
633 
634  if (LibDeviceFile.empty()) {
635  if (DeviceOffloadingKind == Action::OFK_OpenMP &&
636  DriverArgs.hasArg(options::OPT_S))
637  return;
638 
639  getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
640  return;
641  }
642 
643  CC1Args.push_back("-mlink-builtin-bitcode");
644  CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
645 
646  // Libdevice in CUDA-7.0 requires PTX version that's more recent than LLVM
647  // defaults to. Use PTX4.2 by default, which is the PTX version that came with
648  // CUDA-7.0.
649  const char *PtxFeature = "+ptx42";
650  // TODO(tra): CUDA-10+ needs PTX 6.3 to support new features. However that
651  // requires fair amount of work on LLVM side. We'll keep using PTX 6.1 until
652  // all prerequisites are in place.
654  // CUDA-9.1 uses new instructions that are only available in PTX6.1+
655  PtxFeature = "+ptx61";
657  // CUDA-9.0 uses new instructions that are only available in PTX6.0+
658  PtxFeature = "+ptx60";
659  }
660  CC1Args.append({"-target-feature", PtxFeature});
661  if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
662  options::OPT_fno_cuda_short_ptr, false))
663  CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
664 
665  if (DeviceOffloadingKind == Action::OFK_OpenMP) {
666  SmallVector<StringRef, 8> LibraryPaths;
667 
668  if (const Arg *A = DriverArgs.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
669  LibraryPaths.push_back(A->getValue());
670 
671  // Add user defined library paths from LIBRARY_PATH.
673  llvm::sys::Process::GetEnv("LIBRARY_PATH");
674  if (LibPath) {
676  const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
677  llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
678  for (StringRef Path : Frags)
679  LibraryPaths.emplace_back(Path.trim());
680  }
681 
682  // Add path to lib / lib64 folder.
683  SmallString<256> DefaultLibPath =
684  llvm::sys::path::parent_path(getDriver().Dir);
685  llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
686  LibraryPaths.emplace_back(DefaultLibPath.c_str());
687 
688  std::string LibOmpTargetName =
689  "libomptarget-nvptx-" + GpuArch.str() + ".bc";
690  bool FoundBCLibrary = false;
691  for (StringRef LibraryPath : LibraryPaths) {
692  SmallString<128> LibOmpTargetFile(LibraryPath);
693  llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
694  if (llvm::sys::fs::exists(LibOmpTargetFile)) {
695  CC1Args.push_back("-mlink-builtin-bitcode");
696  CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
697  FoundBCLibrary = true;
698  break;
699  }
700  }
701  if (!FoundBCLibrary)
702  getDriver().Diag(diag::warn_drv_omp_offload_target_missingbcruntime)
703  << LibOmpTargetName;
704  }
705 }
706 
707 bool CudaToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
708  const Option &O = A->getOption();
709  return (O.matches(options::OPT_gN_Group) &&
710  !O.matches(options::OPT_gmodules)) ||
711  O.matches(options::OPT_g_Flag) ||
712  O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) ||
713  O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) ||
714  O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) ||
715  O.matches(options::OPT_gdwarf_5) ||
716  O.matches(options::OPT_gcolumn_info);
717 }
718 
720  codegenoptions::DebugInfoKind &DebugInfoKind, const ArgList &Args) const {
721  switch (mustEmitDebugInfo(Args)) {
722  case DisableDebugInfo:
723  DebugInfoKind = codegenoptions::NoDebugInfo;
724  break;
725  case DebugDirectivesOnly:
726  DebugInfoKind = codegenoptions::DebugDirectivesOnly;
727  break;
728  case EmitSameDebugInfoAsHost:
729  // Use same debug info level as the host.
730  break;
731  }
732 }
733 
734 void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
735  ArgStringList &CC1Args) const {
736  // Check our CUDA version if we're going to include the CUDA headers.
737  if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
738  !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
739  StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
740  assert(!Arch.empty() && "Must have an explicit GPU arch.");
742  }
743  CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
744 }
745 
746 llvm::opt::DerivedArgList *
747 CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
748  StringRef BoundArch,
749  Action::OffloadKind DeviceOffloadKind) const {
750  DerivedArgList *DAL =
751  HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
752  if (!DAL)
753  DAL = new DerivedArgList(Args.getBaseArgs());
754 
755  const OptTable &Opts = getDriver().getOpts();
756 
757  // For OpenMP device offloading, append derived arguments. Make sure
758  // flags are not duplicated.
759  // Also append the compute capability.
760  if (DeviceOffloadKind == Action::OFK_OpenMP) {
761  for (Arg *A : Args) {
762  bool IsDuplicate = false;
763  for (Arg *DALArg : *DAL) {
764  if (A == DALArg) {
765  IsDuplicate = true;
766  break;
767  }
768  }
769  if (!IsDuplicate)
770  DAL->append(A);
771  }
772 
773  StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
774  if (Arch.empty())
775  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
776  CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
777 
778  return DAL;
779  }
780 
781  for (Arg *A : Args) {
782  if (A->getOption().matches(options::OPT_Xarch__)) {
783  // Skip this argument unless the architecture matches BoundArch
784  if (BoundArch.empty() || A->getValue(0) != BoundArch)
785  continue;
786 
787  unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
788  unsigned Prev = Index;
789  std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
790 
791  // If the argument parsing failed or more than one argument was
792  // consumed, the -Xarch_ argument's parameter tried to consume
793  // extra arguments. Emit an error and ignore.
794  //
795  // We also want to disallow any options which would alter the
796  // driver behavior; that isn't going to work in our model. We
797  // use isDriverOption() as an approximation, although things
798  // like -O4 are going to slip through.
799  if (!XarchArg || Index > Prev + 1) {
800  getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
801  << A->getAsString(Args);
802  continue;
803  } else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
804  getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
805  << A->getAsString(Args);
806  continue;
807  }
808  XarchArg->setBaseArg(A);
809  A = XarchArg.release();
810  DAL->AddSynthesizedArg(A);
811  }
812  DAL->append(A);
813  }
814 
815  if (!BoundArch.empty()) {
816  DAL->eraseArg(options::OPT_march_EQ);
817  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
818  }
819  return DAL;
820 }
821 
823  return new tools::NVPTX::Assembler(*this);
824 }
825 
827  if (OK == Action::OFK_OpenMP)
828  return new tools::NVPTX::OpenMPLinker(*this);
829  return new tools::NVPTX::Linker(*this);
830 }
831 
832 void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
834 }
835 
837 CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
838  return HostTC.GetCXXStdlibType(Args);
839 }
840 
841 void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
842  ArgStringList &CC1Args) const {
843  HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
844 }
845 
847  ArgStringList &CC1Args) const {
848  HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
849 }
850 
851 void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
852  ArgStringList &CC1Args) const {
853  HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
854 }
855 
857  // The CudaToolChain only supports sanitizers in the sense that it allows
858  // sanitizer arguments on the command line if they are supported by the host
859  // toolchain. The CudaToolChain will actually ignore any command line
860  // arguments for any of these "supported" sanitizers. That means that no
861  // sanitization of device code is actually supported at this time.
862  //
863  // This behavior is necessary because the host and device toolchains
864  // invocations often share the command line, so the device toolchain must
865  // tolerate flags meant only for the host toolchain.
867 }
868 
870  const ArgList &Args) const {
871  return HostTC.computeMSVCVersion(D, Args);
872 }
virtual void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const
Add warning options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:655
CudaArch
Definition: Cuda.h:35
const char * CudaArchToString(CudaArch A)
Definition: Cuda.cpp:31
llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: Cuda.cpp:747
SanitizerMask getSupportedSanitizers() const override
Return sanitizers which are available in this toolchain.
Definition: Cuda.cpp:856
StringRef P
virtual void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: ToolChain.cpp:741
VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const override
On Windows, returns the MSVC compatibility version.
Definition: Cuda.cpp:869
bool isHostOffloading(OffloadKind OKind) const
Check if this action have any offload kinds.
Definition: Action.h:202
DiagnosticBuilder Diag(unsigned DiagID) const
Definition: Driver.h:109
CudaArch StringToCudaArch(llvm::StringRef S)
Definition: Cuda.cpp:103
CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args)
Definition: Cuda.cpp:67
static CudaVersion ParseCudaVersionFile(llvm::StringRef V)
Definition: Cuda.cpp:36
void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override
Add warning options that need to be passed to cc1 for this target.
Definition: Cuda.cpp:832
void print(raw_ostream &OS) const
Print information about the detected CUDA installation.
Definition: Cuda.cpp:274
Tool * buildAssembler() const override
Definition: Cuda.cpp:822
const char * getFilename() const
Definition: InputInfo.h:84
path_list & getProgramPaths()
Definition: ToolChain.h:227
virtual void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Add the clang cc1 arguments for system include paths.
Definition: ToolChain.cpp:646
Distro - Helper class for detecting and classifying Linux distributions.
Definition: Distro.h:23
bool isOffloading(OffloadKind OKind) const
Definition: Action.h:208
void CheckCudaVersionSupportsArch(CudaArch Arch) const
Emit an error if Version does not support the given Arch.
Definition: Cuda.cpp:257
Emit location information but do not generate debug info in the output.
InputInfo - Wrapper for information about an input source.
Definition: InputInfo.h:23
bool isDeviceOffloading(OffloadKind OKind) const
Definition: Action.h:205
std::string getLibDeviceFile(StringRef Gpu) const
Get libdevice file for given architecture.
Definition: Cuda.h:76
Driver - Encapsulate logic for constructing compilation processes from a set of gcc-driver-like comma...
Definition: Driver.h:58
types::ID getType() const
Definition: InputInfo.h:78
CudaInstallationDetector CudaInstallation
Definition: Cuda.h:189
void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C, const InputInfo &Output, const InputInfoList &Inputs, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const JobAction &JA)
llvm::vfs::FileSystem & getVFS() const
Definition: Driver.h:297
void addDirectoryList(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const char *ArgName, const char *EnvVar)
const char * CudaVersionToString(CudaVersion V)
Definition: Cuda.cpp:9
StringRef Filename
Definition: Format.cpp:1629
void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CC1Args) const override
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: Cuda.cpp:846
static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args)
Define debug info level for the NVPTX devices.
Definition: Cuda.cpp:302
virtual VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const
On Windows, returns the MSVC compatibility version.
Definition: ToolChain.cpp:860
void addCommand(std::unique_ptr< Command > C)
Definition: Compilation.h:206
const char * CudaVirtualArchToString(CudaVirtualArch A)
Definition: Cuda.cpp:139
bool supportsDebugInfoOption(const llvm::opt::Arg *A) const override
Does this toolchain supports given debug info option or not.
Definition: Cuda.cpp:707
Tool * buildLinker() const override
Definition: Cuda.cpp:826
std::string getInputFilename(const InputInfo &Input) const override
Some toolchains need to modify the file name, for example to replace the extension for object files w...
Definition: Cuda.cpp:587
void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const override
Add options that need to be passed to cc1 for this target.
Definition: Cuda.cpp:601
static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch)
Definition: Cuda.cpp:427
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Definition: Cuda.cpp:231
CudaVersion version() const
Get the detected Cuda install&#39;s version.
Definition: Cuda.h:64
virtual std::string getInputFilename(const InputInfo &Input) const
Some toolchains need to modify the file name, for example to replace the extension for object files w...
Definition: ToolChain.cpp:243
const llvm::opt::DerivedArgList & getArgs() const
Definition: Compilation.h:187
CudaVersion MaxVersionForCudaArch(CudaArch A)
Get the latest CudaVersion that supports the given CudaArch.
Definition: Cuda.cpp:300
void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add the clang cc1 arguments for system include paths.
Definition: Cuda.cpp:841
const Driver & getDriver() const
Definition: ToolChain.h:186
CudaVersion
Definition: Cuda.h:19
bool isValid() const
Check whether we detected a valid Cuda install.
Definition: Cuda.h:59
StringRef getIncludePath() const
Get the detected Cuda Include path.
Definition: Cuda.h:70
CudaVersion MinVersionForCudaArch(CudaArch A)
Get the earliest CudaVersion that supports the given CudaArch.
Definition: Cuda.cpp:253
Dataflow Directional Tag Classes.
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use system-specific CUDA includes.
Definition: Cuda.cpp:734
CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override
Definition: Cuda.cpp:837
std::string SysRoot
sysroot, if present
Definition: Driver.h:148
Tool - Information on a specific compilation tool.
Definition: Tool.h:34
CudaVirtualArch VirtualArchForCudaArch(CudaArch A)
Get the compute_xx corresponding to an sm_yy.
Definition: Cuda.cpp:197
uint64_t SanitizerMask
Definition: Sanitizers.h:26
DeviceDebugInfoLevel
Debug info level for the NVPTX devices.
Definition: Cuda.cpp:286
Compilation - A set of tasks to perform for a single driver invocation.
Definition: Compilation.h:46
const Driver & getDriver() const
Definition: Compilation.h:134
virtual SanitizerMask getSupportedSanitizers() const
Return sanitizers which are available in this toolchain.
Definition: ToolChain.cpp:818
bool isNothing() const
Definition: InputInfo.h:75
virtual llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: ToolChain.h:271
bool isFilename() const
Definition: InputInfo.h:76
void adjustDebugInfoKind(codegenoptions::DebugInfoKind &DebugInfoKind, const llvm::opt::ArgList &Args) const override
Adjust debug information kind considering all passed options.
Definition: Cuda.cpp:719
const llvm::opt::OptTable & getOpts() const
Definition: Driver.h:293
virtual CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const
Definition: ToolChain.cpp:683
const char * addTempFile(const char *Name)
addTempFile - Add a file to remove on exit, and returns its argument.
Definition: Compilation.h:233
virtual void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Add arguments to use MCU GCC toolchain includes.
Definition: ToolChain.cpp:843
virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const
Add options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:651
StringRef getBinPath() const
Get the detected path to Cuda&#39;s bin directory.
Definition: Cuda.h:68
const char * getOffloadingArch() const
Definition: Action.h:198
void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use MCU GCC toolchain includes.
Definition: Cuda.cpp:851
ToolChain - Access to tools for a single platform.
Definition: ToolChain.h:89
static bool real_path(StringRef SrcPath, SmallVectorImpl< char > &RealPath)