clang  10.0.0svn
Cuda.cpp
Go to the documentation of this file.
1 //===--- Cuda.cpp - Cuda Tool and ToolChain Implementations -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Cuda.h"
10 #include "CommonArgs.h"
11 #include "InputInfo.h"
12 #include "clang/Basic/Cuda.h"
13 #include "clang/Config/config.h"
15 #include "clang/Driver/Distro.h"
16 #include "clang/Driver/Driver.h"
18 #include "clang/Driver/Options.h"
19 #include "llvm/Option/ArgList.h"
20 #include "llvm/Support/FileSystem.h"
21 #include "llvm/Support/Path.h"
22 #include "llvm/Support/Process.h"
23 #include "llvm/Support/Program.h"
24 #include "llvm/Support/VirtualFileSystem.h"
25 #include <system_error>
26 
27 using namespace clang::driver;
28 using namespace clang::driver::toolchains;
29 using namespace clang::driver::tools;
30 using namespace clang;
31 using namespace llvm::opt;
32 
33 // Parses the contents of version.txt in an CUDA installation. It should
34 // contain one line of the from e.g. "CUDA Version 7.5.2".
35 static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
36  if (!V.startswith("CUDA Version "))
37  return CudaVersion::UNKNOWN;
38  V = V.substr(strlen("CUDA Version "));
39  int Major = -1, Minor = -1;
40  auto First = V.split('.');
41  auto Second = First.second.split('.');
42  if (First.first.getAsInteger(10, Major) ||
43  Second.first.getAsInteger(10, Minor))
44  return CudaVersion::UNKNOWN;
45 
46  if (Major == 7 && Minor == 0) {
47  // This doesn't appear to ever happen -- version.txt doesn't exist in the
48  // CUDA 7 installs I've seen. But no harm in checking.
49  return CudaVersion::CUDA_70;
50  }
51  if (Major == 7 && Minor == 5)
52  return CudaVersion::CUDA_75;
53  if (Major == 8 && Minor == 0)
54  return CudaVersion::CUDA_80;
55  if (Major == 9 && Minor == 0)
56  return CudaVersion::CUDA_90;
57  if (Major == 9 && Minor == 1)
58  return CudaVersion::CUDA_91;
59  if (Major == 9 && Minor == 2)
60  return CudaVersion::CUDA_92;
61  if (Major == 10 && Minor == 0)
62  return CudaVersion::CUDA_100;
63  if (Major == 10 && Minor == 1)
64  return CudaVersion::CUDA_101;
65  return CudaVersion::UNKNOWN;
66 }
67 
69  const Driver &D, const llvm::Triple &HostTriple,
70  const llvm::opt::ArgList &Args)
71  : D(D) {
72  struct Candidate {
73  std::string Path;
74  bool StrictChecking;
75 
76  Candidate(std::string Path, bool StrictChecking = false)
77  : Path(Path), StrictChecking(StrictChecking) {}
78  };
79  SmallVector<Candidate, 4> Candidates;
80 
81  // In decreasing order so we prefer newer versions to older versions.
82  std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
83 
84  if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
85  Candidates.emplace_back(
86  Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
87  } else if (HostTriple.isOSWindows()) {
88  for (const char *Ver : Versions)
89  Candidates.emplace_back(
90  D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
91  Ver);
92  } else {
93  if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
94  // Try to find ptxas binary. If the executable is located in a directory
95  // called 'bin/', its parent directory might be a good guess for a valid
96  // CUDA installation.
97  // However, some distributions might installs 'ptxas' to /usr/bin. In that
98  // case the candidate would be '/usr' which passes the following checks
99  // because '/usr/include' exists as well. To avoid this case, we always
100  // check for the directory potentially containing files for libdevice,
101  // even if the user passes -nocudalib.
102  if (llvm::ErrorOr<std::string> ptxas =
103  llvm::sys::findProgramByName("ptxas")) {
104  SmallString<256> ptxasAbsolutePath;
105  llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
106 
107  StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
108  if (llvm::sys::path::filename(ptxasDir) == "bin")
109  Candidates.emplace_back(llvm::sys::path::parent_path(ptxasDir),
110  /*StrictChecking=*/true);
111  }
112  }
113 
114  Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
115  for (const char *Ver : Versions)
116  Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
117 
118  if (Distro(D.getVFS()).IsDebian() || Distro(D.getVFS()).IsUbuntu())
119  // Special case for Debian to have nvidia-cuda-toolkit work
120  // out of the box. More info on http://bugs.debian.org/882505
121  Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
122  }
123 
124  bool NoCudaLib = Args.hasArg(options::OPT_nocudalib);
125 
126  for (const auto &Candidate : Candidates) {
127  InstallPath = Candidate.Path;
128  if (InstallPath.empty() || !D.getVFS().exists(InstallPath))
129  continue;
130 
131  BinPath = InstallPath + "/bin";
132  IncludePath = InstallPath + "/include";
133  LibDevicePath = InstallPath + "/nvvm/libdevice";
134 
135  auto &FS = D.getVFS();
136  if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
137  continue;
138  bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking);
139  if (CheckLibDevice && !FS.exists(LibDevicePath))
140  continue;
141 
142  // On Linux, we have both lib and lib64 directories, and we need to choose
143  // based on our triple. On MacOS, we have only a lib directory.
144  //
145  // It's sufficient for our purposes to be flexible: If both lib and lib64
146  // exist, we choose whichever one matches our triple. Otherwise, if only
147  // lib exists, we use it.
148  if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
149  LibPath = InstallPath + "/lib64";
150  else if (FS.exists(InstallPath + "/lib"))
151  LibPath = InstallPath + "/lib";
152  else
153  continue;
154 
155  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
156  FS.getBufferForFile(InstallPath + "/version.txt");
157  if (!VersionFile) {
158  // CUDA 7.0 doesn't have a version.txt, so guess that's our version if
159  // version.txt isn't present.
160  Version = CudaVersion::CUDA_70;
161  } else {
162  Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
163  }
164 
165  if (Version >= CudaVersion::CUDA_90) {
166  // CUDA-9+ uses single libdevice file for all GPU variants.
167  std::string FilePath = LibDevicePath + "/libdevice.10.bc";
168  if (FS.exists(FilePath)) {
169  for (const char *GpuArchName :
170  {"sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53",
171  "sm_60", "sm_61", "sm_62", "sm_70", "sm_72", "sm_75"}) {
172  const CudaArch GpuArch = StringToCudaArch(GpuArchName);
173  if (Version >= MinVersionForCudaArch(GpuArch) &&
174  Version <= MaxVersionForCudaArch(GpuArch))
175  LibDeviceMap[GpuArchName] = FilePath;
176  }
177  }
178  } else {
179  std::error_code EC;
180  for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
181  !EC && LI != LE; LI = LI.increment(EC)) {
182  StringRef FilePath = LI->path();
183  StringRef FileName = llvm::sys::path::filename(FilePath);
184  // Process all bitcode filenames that look like
185  // libdevice.compute_XX.YY.bc
186  const StringRef LibDeviceName = "libdevice.";
187  if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
188  continue;
189  StringRef GpuArch = FileName.slice(
190  LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
191  LibDeviceMap[GpuArch] = FilePath.str();
192  // Insert map entries for specific devices with this compute
193  // capability. NVCC's choice of the libdevice library version is
194  // rather peculiar and depends on the CUDA version.
195  if (GpuArch == "compute_20") {
196  LibDeviceMap["sm_20"] = FilePath;
197  LibDeviceMap["sm_21"] = FilePath;
198  LibDeviceMap["sm_32"] = FilePath;
199  } else if (GpuArch == "compute_30") {
200  LibDeviceMap["sm_30"] = FilePath;
201  if (Version < CudaVersion::CUDA_80) {
202  LibDeviceMap["sm_50"] = FilePath;
203  LibDeviceMap["sm_52"] = FilePath;
204  LibDeviceMap["sm_53"] = FilePath;
205  }
206  LibDeviceMap["sm_60"] = FilePath;
207  LibDeviceMap["sm_61"] = FilePath;
208  LibDeviceMap["sm_62"] = FilePath;
209  } else if (GpuArch == "compute_35") {
210  LibDeviceMap["sm_35"] = FilePath;
211  LibDeviceMap["sm_37"] = FilePath;
212  } else if (GpuArch == "compute_50") {
213  if (Version >= CudaVersion::CUDA_80) {
214  LibDeviceMap["sm_50"] = FilePath;
215  LibDeviceMap["sm_52"] = FilePath;
216  LibDeviceMap["sm_53"] = FilePath;
217  }
218  }
219  }
220  }
221 
222  // Check that we have found at least one libdevice that we can link in if
223  // -nocudalib hasn't been specified.
224  if (LibDeviceMap.empty() && !NoCudaLib)
225  continue;
226 
227  IsValid = true;
228  break;
229  }
230 }
231 
233  const ArgList &DriverArgs, ArgStringList &CC1Args) const {
234  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
235  // Add cuda_wrappers/* to our system include path. This lets us wrap
236  // standard library headers.
237  SmallString<128> P(D.ResourceDir);
238  llvm::sys::path::append(P, "include");
239  llvm::sys::path::append(P, "cuda_wrappers");
240  CC1Args.push_back("-internal-isystem");
241  CC1Args.push_back(DriverArgs.MakeArgString(P));
242  }
243 
244  if (DriverArgs.hasArg(options::OPT_nocudainc))
245  return;
246 
247  if (!isValid()) {
248  D.Diag(diag::err_drv_no_cuda_installation);
249  return;
250  }
251 
252  CC1Args.push_back("-internal-isystem");
253  CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
254  CC1Args.push_back("-include");
255  CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
256 }
257 
259  CudaArch Arch) const {
260  if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
261  ArchsWithBadVersion.count(Arch) > 0)
262  return;
263 
264  auto MinVersion = MinVersionForCudaArch(Arch);
265  auto MaxVersion = MaxVersionForCudaArch(Arch);
266  if (Version < MinVersion || Version > MaxVersion) {
267  ArchsWithBadVersion.insert(Arch);
268  D.Diag(diag::err_drv_cuda_version_unsupported)
269  << CudaArchToString(Arch) << CudaVersionToString(MinVersion)
270  << CudaVersionToString(MaxVersion) << InstallPath
271  << CudaVersionToString(Version);
272  }
273 }
274 
275 void CudaInstallationDetector::print(raw_ostream &OS) const {
276  if (isValid())
277  OS << "Found CUDA installation: " << InstallPath << ", version "
278  << CudaVersionToString(Version) << "\n";
279 }
280 
281 namespace {
282 /// Debug info level for the NVPTX devices. We may need to emit different debug
283 /// info level for the host and for the device itselfi. This type controls
284 /// emission of the debug info for the devices. It either prohibits disable info
285 /// emission completely, or emits debug directives only, or emits same debug
286 /// info as for the host.
288  DisableDebugInfo, /// Do not emit debug info for the devices.
289  DebugDirectivesOnly, /// Emit only debug directives.
290  EmitSameDebugInfoAsHost, /// Use the same debug info level just like for the
291  /// host.
292 };
293 } // anonymous namespace
294 
295 /// Define debug info level for the NVPTX devices. If the debug info for both
296 /// the host and device are disabled (-g0/-ggdb0 or no debug options at all). If
297 /// only debug directives are requested for the both host and device
298 /// (-gline-directvies-only), or the debug info only for the device is disabled
299 /// (optimization is on and --cuda-noopt-device-debug was not specified), the
300 /// debug directves only must be emitted for the device. Otherwise, use the same
301 /// debug info level just like for the host (with the limitations of only
302 /// supported DWARF2 standard).
303 static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) {
304  const Arg *A = Args.getLastArg(options::OPT_O_Group);
305  bool IsDebugEnabled = !A || A->getOption().matches(options::OPT_O0) ||
306  Args.hasFlag(options::OPT_cuda_noopt_device_debug,
307  options::OPT_no_cuda_noopt_device_debug,
308  /*Default=*/false);
309  if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
310  const Option &Opt = A->getOption();
311  if (Opt.matches(options::OPT_gN_Group)) {
312  if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
313  return DisableDebugInfo;
314  if (Opt.matches(options::OPT_gline_directives_only))
315  return DebugDirectivesOnly;
316  }
317  return IsDebugEnabled ? EmitSameDebugInfoAsHost : DebugDirectivesOnly;
318  }
319  return DisableDebugInfo;
320 }
321 
322 void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
323  const InputInfo &Output,
324  const InputInfoList &Inputs,
325  const ArgList &Args,
326  const char *LinkingOutput) const {
327  const auto &TC =
328  static_cast<const toolchains::CudaToolChain &>(getToolChain());
329  assert(TC.getTriple().isNVPTX() && "Wrong platform");
330 
331  StringRef GPUArchName;
332  // If this is an OpenMP action we need to extract the device architecture
333  // from the -march=arch option. This option may come from -Xopenmp-target
334  // flag or the default value.
336  GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
337  assert(!GPUArchName.empty() && "Must have an architecture passed in.");
338  } else
339  GPUArchName = JA.getOffloadingArch();
340 
341  // Obtain architecture from the action.
342  CudaArch gpu_arch = StringToCudaArch(GPUArchName);
343  assert(gpu_arch != CudaArch::UNKNOWN &&
344  "Device action expected to have an architecture.");
345 
346  // Check that our installation's ptxas supports gpu_arch.
347  if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
348  TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
349  }
350 
351  ArgStringList CmdArgs;
352  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
354  if (DIKind == EmitSameDebugInfoAsHost) {
355  // ptxas does not accept -g option if optimization is enabled, so
356  // we ignore the compiler's -O* options if we want debug info.
357  CmdArgs.push_back("-g");
358  CmdArgs.push_back("--dont-merge-basicblocks");
359  CmdArgs.push_back("--return-at-end");
360  } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
361  // Map the -O we received to -O{0,1,2,3}.
362  //
363  // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
364  // default, so it may correspond more closely to the spirit of clang -O2.
365 
366  // -O3 seems like the least-bad option when -Osomething is specified to
367  // clang but it isn't handled below.
368  StringRef OOpt = "3";
369  if (A->getOption().matches(options::OPT_O4) ||
370  A->getOption().matches(options::OPT_Ofast))
371  OOpt = "3";
372  else if (A->getOption().matches(options::OPT_O0))
373  OOpt = "0";
374  else if (A->getOption().matches(options::OPT_O)) {
375  // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
376  OOpt = llvm::StringSwitch<const char *>(A->getValue())
377  .Case("1", "1")
378  .Case("2", "2")
379  .Case("3", "3")
380  .Case("s", "2")
381  .Case("z", "2")
382  .Default("2");
383  }
384  CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
385  } else {
386  // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
387  // to no optimizations, but ptxas's default is -O3.
388  CmdArgs.push_back("-O0");
389  }
390  if (DIKind == DebugDirectivesOnly)
391  CmdArgs.push_back("-lineinfo");
392 
393  // Pass -v to ptxas if it was passed to the driver.
394  if (Args.hasArg(options::OPT_v))
395  CmdArgs.push_back("-v");
396 
397  CmdArgs.push_back("--gpu-name");
398  CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
399  CmdArgs.push_back("--output-file");
400  CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
401  for (const auto& II : Inputs)
402  CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
403 
404  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
405  CmdArgs.push_back(Args.MakeArgString(A));
406 
407  bool Relocatable = false;
409  // In OpenMP we need to generate relocatable code.
410  Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
411  options::OPT_fnoopenmp_relocatable_target,
412  /*Default=*/true);
413  else if (JA.isOffloading(Action::OFK_Cuda))
414  Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
415  options::OPT_fno_gpu_rdc, /*Default=*/false);
416 
417  if (Relocatable)
418  CmdArgs.push_back("-c");
419 
420  const char *Exec;
421  if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
422  Exec = A->getValue();
423  else
424  Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
425  C.addCommand(std::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
426 }
427 
428 static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
429  bool includePTX = true;
430  for (Arg *A : Args) {
431  if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
432  A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
433  continue;
434  A->claim();
435  const StringRef ArchStr = A->getValue();
436  if (ArchStr == "all" || ArchStr == gpu_arch) {
437  includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
438  continue;
439  }
440  }
441  return includePTX;
442 }
443 
444 // All inputs to this linker must be from CudaDeviceActions, as we need to look
445 // at the Inputs' Actions in order to figure out which GPU architecture they
446 // correspond to.
447 void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
448  const InputInfo &Output,
449  const InputInfoList &Inputs,
450  const ArgList &Args,
451  const char *LinkingOutput) const {
452  const auto &TC =
453  static_cast<const toolchains::CudaToolChain &>(getToolChain());
454  assert(TC.getTriple().isNVPTX() && "Wrong platform");
455 
456  ArgStringList CmdArgs;
457  if (TC.CudaInstallation.version() <= CudaVersion::CUDA_100)
458  CmdArgs.push_back("--cuda");
459  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
460  CmdArgs.push_back(Args.MakeArgString("--create"));
461  CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
462  if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
463  CmdArgs.push_back("-g");
464 
465  for (const auto& II : Inputs) {
466  auto *A = II.getAction();
467  assert(A->getInputs().size() == 1 &&
468  "Device offload action is expected to have a single input");
469  const char *gpu_arch_str = A->getOffloadingArch();
470  assert(gpu_arch_str &&
471  "Device action expected to have associated a GPU architecture!");
472  CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
473 
474  if (II.getType() == types::TY_PP_Asm &&
475  !shouldIncludePTX(Args, gpu_arch_str))
476  continue;
477  // We need to pass an Arch of the form "sm_XX" for cubin files and
478  // "compute_XX" for ptx.
479  const char *Arch =
480  (II.getType() == types::TY_PP_Asm)
482  : gpu_arch_str;
483  CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
484  Arch + ",file=" + II.getFilename()));
485  }
486 
487  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
488  CmdArgs.push_back(Args.MakeArgString(A));
489 
490  const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
491  C.addCommand(std::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
492 }
493 
494 void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
495  const InputInfo &Output,
496  const InputInfoList &Inputs,
497  const ArgList &Args,
498  const char *LinkingOutput) const {
499  const auto &TC =
500  static_cast<const toolchains::CudaToolChain &>(getToolChain());
501  assert(TC.getTriple().isNVPTX() && "Wrong platform");
502 
503  ArgStringList CmdArgs;
504 
505  // OpenMP uses nvlink to link cubin files. The result will be embedded in the
506  // host binary by the host linker.
507  assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
508  "CUDA toolchain not expected for an OpenMP host device.");
509 
510  if (Output.isFilename()) {
511  CmdArgs.push_back("-o");
512  CmdArgs.push_back(Output.getFilename());
513  } else
514  assert(Output.isNothing() && "Invalid output.");
515  if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
516  CmdArgs.push_back("-g");
517 
518  if (Args.hasArg(options::OPT_v))
519  CmdArgs.push_back("-v");
520 
521  StringRef GPUArch =
522  Args.getLastArgValue(options::OPT_march_EQ);
523  assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
524 
525  CmdArgs.push_back("-arch");
526  CmdArgs.push_back(Args.MakeArgString(GPUArch));
527 
528  // Assume that the directory specified with --libomptarget_nvptx_path
529  // contains the static library libomptarget-nvptx.a.
530  if (const Arg *A = Args.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
531  CmdArgs.push_back(Args.MakeArgString(Twine("-L") + A->getValue()));
532 
533  // Add paths specified in LIBRARY_PATH environment variable as -L options.
534  addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
535 
536  // Add paths for the default clang library path.
537  SmallString<256> DefaultLibPath =
538  llvm::sys::path::parent_path(TC.getDriver().Dir);
539  llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
540  CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
541 
542  // Add linking against library implementing OpenMP calls on NVPTX target.
543  CmdArgs.push_back("-lomptarget-nvptx");
544 
545  for (const auto &II : Inputs) {
546  if (II.getType() == types::TY_LLVM_IR ||
547  II.getType() == types::TY_LTO_IR ||
548  II.getType() == types::TY_LTO_BC ||
549  II.getType() == types::TY_LLVM_BC) {
550  C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
551  << getToolChain().getTripleString();
552  continue;
553  }
554 
555  // Currently, we only pass the input files to the linker, we do not pass
556  // any libraries that may be valid only for the host.
557  if (!II.isFilename())
558  continue;
559 
560  const char *CubinF = C.addTempFile(
561  C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
562 
563  CmdArgs.push_back(CubinF);
564  }
565 
566  AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
567 
568  const char *Exec =
569  Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
570  C.addCommand(std::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
571 }
572 
573 /// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
574 /// which isn't properly a linker but nonetheless performs the step of stitching
575 /// together object files from the assembler into a single blob.
576 
577 CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
578  const ToolChain &HostTC, const ArgList &Args,
579  const Action::OffloadKind OK)
580  : ToolChain(D, Triple, Args), HostTC(HostTC),
581  CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
584  // Lookup binaries into the driver directory, this is used to
585  // discover the clang-offload-bundler executable.
586  getProgramPaths().push_back(getDriver().Dir);
587 }
588 
589 std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
590  // Only object files are changed, for example assembly files keep their .s
591  // extensions. CUDA also continues to use .o as they don't use nvlink but
592  // fatbinary.
593  if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object))
594  return ToolChain::getInputFilename(Input);
595 
596  // Replace extension for object files with cubin because nvlink relies on
597  // these particular file names.
599  llvm::sys::path::replace_extension(Filename, "cubin");
600  return Filename.str();
601 }
602 
604  const llvm::opt::ArgList &DriverArgs,
605  llvm::opt::ArgStringList &CC1Args,
606  Action::OffloadKind DeviceOffloadingKind) const {
607  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
608 
609  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
610  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
611  assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
612  DeviceOffloadingKind == Action::OFK_Cuda) &&
613  "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
614 
615  if (DeviceOffloadingKind == Action::OFK_Cuda) {
616  CC1Args.push_back("-fcuda-is-device");
617 
618  if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
619  options::OPT_fno_cuda_flush_denormals_to_zero, false))
620  CC1Args.push_back("-fcuda-flush-denormals-to-zero");
621 
622  if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
623  options::OPT_fno_cuda_approx_transcendentals, false))
624  CC1Args.push_back("-fcuda-approx-transcendentals");
625 
626  if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
627  false))
628  CC1Args.push_back("-fgpu-rdc");
629  }
630 
631  if (DriverArgs.hasArg(options::OPT_nocudalib))
632  return;
633 
634  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
635 
636  if (LibDeviceFile.empty()) {
637  if (DeviceOffloadingKind == Action::OFK_OpenMP &&
638  DriverArgs.hasArg(options::OPT_S))
639  return;
640 
641  getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
642  return;
643  }
644 
645  CC1Args.push_back("-mlink-builtin-bitcode");
646  CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
647 
648  // New CUDA versions often introduce new instructions that are only supported
649  // by new PTX version, so we need to raise PTX level to enable them in NVPTX
650  // back-end.
651  const char *PtxFeature = nullptr;
652  switch(CudaInstallation.version()) {
654  PtxFeature = "+ptx64";
655  break;
657  PtxFeature = "+ptx63";
658  break;
660  PtxFeature = "+ptx61";
661  break;
663  PtxFeature = "+ptx61";
664  break;
666  PtxFeature = "+ptx60";
667  break;
668  default:
669  PtxFeature = "+ptx42";
670  }
671  CC1Args.append({"-target-feature", PtxFeature});
672  if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
673  options::OPT_fno_cuda_short_ptr, false))
674  CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
675 
677  CC1Args.push_back(DriverArgs.MakeArgString(
678  Twine("-target-sdk-version=") +
680 
681  if (DeviceOffloadingKind == Action::OFK_OpenMP) {
682  SmallVector<StringRef, 8> LibraryPaths;
683  if (const Arg *A = DriverArgs.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
684  LibraryPaths.push_back(A->getValue());
685 
686  // Add user defined library paths from LIBRARY_PATH.
688  llvm::sys::Process::GetEnv("LIBRARY_PATH");
689  if (LibPath) {
691  const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
692  llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
693  for (StringRef Path : Frags)
694  LibraryPaths.emplace_back(Path.trim());
695  }
696 
697  // Add path to lib / lib64 folder.
698  SmallString<256> DefaultLibPath =
699  llvm::sys::path::parent_path(getDriver().Dir);
700  llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
701  LibraryPaths.emplace_back(DefaultLibPath.c_str());
702 
703  std::string LibOmpTargetName =
704  "libomptarget-nvptx-" + GpuArch.str() + ".bc";
705  bool FoundBCLibrary = false;
706  for (StringRef LibraryPath : LibraryPaths) {
707  SmallString<128> LibOmpTargetFile(LibraryPath);
708  llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
709  if (llvm::sys::fs::exists(LibOmpTargetFile)) {
710  CC1Args.push_back("-mlink-builtin-bitcode");
711  CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
712  FoundBCLibrary = true;
713  break;
714  }
715  }
716  if (!FoundBCLibrary)
717  getDriver().Diag(diag::warn_drv_omp_offload_target_missingbcruntime)
718  << LibOmpTargetName;
719  }
720 }
721 
722 bool CudaToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
723  const Option &O = A->getOption();
724  return (O.matches(options::OPT_gN_Group) &&
725  !O.matches(options::OPT_gmodules)) ||
726  O.matches(options::OPT_g_Flag) ||
727  O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) ||
728  O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) ||
729  O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) ||
730  O.matches(options::OPT_gdwarf_5) ||
731  O.matches(options::OPT_gcolumn_info);
732 }
733 
735  codegenoptions::DebugInfoKind &DebugInfoKind, const ArgList &Args) const {
736  switch (mustEmitDebugInfo(Args)) {
737  case DisableDebugInfo:
738  DebugInfoKind = codegenoptions::NoDebugInfo;
739  break;
740  case DebugDirectivesOnly:
741  DebugInfoKind = codegenoptions::DebugDirectivesOnly;
742  break;
743  case EmitSameDebugInfoAsHost:
744  // Use same debug info level as the host.
745  break;
746  }
747 }
748 
749 void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
750  ArgStringList &CC1Args) const {
751  // Check our CUDA version if we're going to include the CUDA headers.
752  if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
753  !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
754  StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
755  assert(!Arch.empty() && "Must have an explicit GPU arch.");
757  }
758  CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
759 }
760 
761 llvm::opt::DerivedArgList *
762 CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
763  StringRef BoundArch,
764  Action::OffloadKind DeviceOffloadKind) const {
765  DerivedArgList *DAL =
766  HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
767  if (!DAL)
768  DAL = new DerivedArgList(Args.getBaseArgs());
769 
770  const OptTable &Opts = getDriver().getOpts();
771 
772  // For OpenMP device offloading, append derived arguments. Make sure
773  // flags are not duplicated.
774  // Also append the compute capability.
775  if (DeviceOffloadKind == Action::OFK_OpenMP) {
776  for (Arg *A : Args) {
777  bool IsDuplicate = false;
778  for (Arg *DALArg : *DAL) {
779  if (A == DALArg) {
780  IsDuplicate = true;
781  break;
782  }
783  }
784  if (!IsDuplicate)
785  DAL->append(A);
786  }
787 
788  StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
789  if (Arch.empty())
790  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
791  CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
792 
793  return DAL;
794  }
795 
796  for (Arg *A : Args) {
797  if (A->getOption().matches(options::OPT_Xarch__)) {
798  // Skip this argument unless the architecture matches BoundArch
799  if (BoundArch.empty() || A->getValue(0) != BoundArch)
800  continue;
801 
802  unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
803  unsigned Prev = Index;
804  std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
805 
806  // If the argument parsing failed or more than one argument was
807  // consumed, the -Xarch_ argument's parameter tried to consume
808  // extra arguments. Emit an error and ignore.
809  //
810  // We also want to disallow any options which would alter the
811  // driver behavior; that isn't going to work in our model. We
812  // use isDriverOption() as an approximation, although things
813  // like -O4 are going to slip through.
814  if (!XarchArg || Index > Prev + 1) {
815  getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
816  << A->getAsString(Args);
817  continue;
818  } else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
819  getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
820  << A->getAsString(Args);
821  continue;
822  }
823  XarchArg->setBaseArg(A);
824  A = XarchArg.release();
825  DAL->AddSynthesizedArg(A);
826  }
827  DAL->append(A);
828  }
829 
830  if (!BoundArch.empty()) {
831  DAL->eraseArg(options::OPT_march_EQ);
832  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
833  }
834  return DAL;
835 }
836 
838  return new tools::NVPTX::Assembler(*this);
839 }
840 
842  if (OK == Action::OFK_OpenMP)
843  return new tools::NVPTX::OpenMPLinker(*this);
844  return new tools::NVPTX::Linker(*this);
845 }
846 
847 void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
849 }
850 
852 CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
853  return HostTC.GetCXXStdlibType(Args);
854 }
855 
856 void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
857  ArgStringList &CC1Args) const {
858  HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
859 }
860 
862  ArgStringList &CC1Args) const {
863  HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
864 }
865 
866 void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
867  ArgStringList &CC1Args) const {
868  HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
869 }
870 
872  // The CudaToolChain only supports sanitizers in the sense that it allows
873  // sanitizer arguments on the command line if they are supported by the host
874  // toolchain. The CudaToolChain will actually ignore any command line
875  // arguments for any of these "supported" sanitizers. That means that no
876  // sanitization of device code is actually supported at this time.
877  //
878  // This behavior is necessary because the host and device toolchains
879  // invocations often share the command line, so the device toolchain must
880  // tolerate flags meant only for the host toolchain.
882 }
883 
885  const ArgList &Args) const {
886  return HostTC.computeMSVCVersion(D, Args);
887 }
virtual void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const
Add warning options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:708
CudaArch
Definition: Cuda.h:35
const char * CudaArchToString(CudaArch A)
Definition: Cuda.cpp:46
llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: Cuda.cpp:762
SanitizerMask getSupportedSanitizers() const override
Return sanitizers which are available in this toolchain.
Definition: Cuda.cpp:871
StringRef P
virtual void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: ToolChain.cpp:821
VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const override
On Windows, returns the MSVC compatibility version.
Definition: Cuda.cpp:884
bool isHostOffloading(OffloadKind OKind) const
Check if this action have any offload kinds.
Definition: Action.h:201
DiagnosticBuilder Diag(unsigned DiagID) const
Definition: Driver.h:108
CudaArch StringToCudaArch(llvm::StringRef S)
Definition: Cuda.cpp:126
CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args)
Definition: Cuda.cpp:68
static CudaVersion ParseCudaVersionFile(llvm::StringRef V)
Definition: Cuda.cpp:35
void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override
Add warning options that need to be passed to cc1 for this target.
Definition: Cuda.cpp:847
void print(raw_ostream &OS) const
Print information about the detected CUDA installation.
Definition: Cuda.cpp:275
Tool * buildAssembler() const override
Definition: Cuda.cpp:837
const char * getFilename() const
Definition: InputInfo.h:83
path_list & getProgramPaths()
Definition: ToolChain.h:234
virtual void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Add the clang cc1 arguments for system include paths.
Definition: ToolChain.cpp:699
Distro - Helper class for detecting and classifying Linux distributions.
Definition: Distro.h:22
bool isOffloading(OffloadKind OKind) const
Definition: Action.h:207
void CheckCudaVersionSupportsArch(CudaArch Arch) const
Emit an error if Version does not support the given Arch.
Definition: Cuda.cpp:258
Emit location information but do not generate debug info in the output.
InputInfo - Wrapper for information about an input source.
Definition: InputInfo.h:22
bool isDeviceOffloading(OffloadKind OKind) const
Definition: Action.h:204
std::string getLibDeviceFile(StringRef Gpu) const
Get libdevice file for given architecture.
Definition: Cuda.h:75
Driver - Encapsulate logic for constructing compilation processes from a set of gcc-driver-like comma...
Definition: Driver.h:59
types::ID getType() const
Definition: InputInfo.h:77
CudaInstallationDetector CudaInstallation
Definition: Cuda.h:188
void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C, const InputInfo &Output, const InputInfoList &Inputs, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const JobAction &JA)
llvm::vfs::FileSystem & getVFS() const
Definition: Driver.h:307
void addDirectoryList(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const char *ArgName, const char *EnvVar)
const char * CudaVersionToString(CudaVersion V)
Definition: Cuda.cpp:10
StringRef Filename
Definition: Format.cpp:1756
void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CC1Args) const override
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: Cuda.cpp:861
static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args)
Define debug info level for the NVPTX devices.
Definition: Cuda.cpp:303
virtual VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const
On Windows, returns the MSVC compatibility version.
Definition: ToolChain.cpp:952
#define V(N, I)
Definition: ASTContext.h:2915
void addCommand(std::unique_ptr< Command > C)
Definition: Compilation.h:205
const char * CudaVirtualArchToString(CudaVirtualArch A)
Definition: Cuda.cpp:166
bool supportsDebugInfoOption(const llvm::opt::Arg *A) const override
Does this toolchain supports given debug info option or not.
Definition: Cuda.cpp:722
Tool * buildLinker() const override
Definition: Cuda.cpp:841
std::string getInputFilename(const InputInfo &Input) const override
Some toolchains need to modify the file name, for example to replace the extension for object files w...
Definition: Cuda.cpp:589
void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const override
Add options that need to be passed to cc1 for this target.
Definition: Cuda.cpp:603
static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch)
Definition: Cuda.cpp:428
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Definition: Cuda.cpp:232
CudaVersion version() const
Get the detected Cuda install&#39;s version.
Definition: Cuda.h:63
virtual std::string getInputFilename(const InputInfo &Input) const
Some toolchains need to modify the file name, for example to replace the extension for object files w...
Definition: ToolChain.cpp:242
const llvm::opt::DerivedArgList & getArgs() const
Definition: Compilation.h:186
CudaVersion MaxVersionForCudaArch(CudaArch A)
Get the latest CudaVersion that supports the given CudaArch.
Definition: Cuda.cpp:335
void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add the clang cc1 arguments for system include paths.
Definition: Cuda.cpp:856
const Driver & getDriver() const
Definition: ToolChain.h:193
CudaVersion
Definition: Cuda.h:19
bool isValid() const
Check whether we detected a valid Cuda install.
Definition: Cuda.h:58
StringRef getIncludePath() const
Get the detected Cuda Include path.
Definition: Cuda.h:69
CudaVersion MinVersionForCudaArch(CudaArch A)
Get the earliest CudaVersion that supports the given CudaArch.
Definition: Cuda.cpp:284
Dataflow Directional Tag Classes.
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use system-specific CUDA includes.
Definition: Cuda.cpp:749
CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override
Definition: Cuda.cpp:852
std::string SysRoot
sysroot, if present
Definition: Driver.h:147
Tool - Information on a specific compilation tool.
Definition: Tool.h:33
CudaVirtualArch VirtualArchForCudaArch(CudaArch A)
Get the compute_xx corresponding to an sm_yy.
Definition: Cuda.cpp:224
DeviceDebugInfoLevel
Debug info level for the NVPTX devices.
Definition: Cuda.cpp:287
Compilation - A set of tasks to perform for a single driver invocation.
Definition: Compilation.h:45
const Driver & getDriver() const
Definition: Compilation.h:133
virtual SanitizerMask getSupportedSanitizers() const
Return sanitizers which are available in this toolchain.
Definition: ToolChain.cpp:904
bool isNothing() const
Definition: InputInfo.h:74
virtual llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: ToolChain.h:278
bool LE(InterpState &S, CodePtr OpPC)
Definition: Interp.h:240
bool isFilename() const
Definition: InputInfo.h:75
void adjustDebugInfoKind(codegenoptions::DebugInfoKind &DebugInfoKind, const llvm::opt::ArgList &Args) const override
Adjust debug information kind considering all passed options.
Definition: Cuda.cpp:734
const llvm::opt::OptTable & getOpts() const
Definition: Driver.h:303
virtual CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const
Definition: ToolChain.cpp:763
const char * addTempFile(const char *Name)
addTempFile - Add a file to remove on exit, and returns its argument.
Definition: Compilation.h:232
virtual void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Add arguments to use MCU GCC toolchain includes.
Definition: ToolChain.cpp:935
virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const
Add options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:704
StringRef getBinPath() const
Get the detected path to Cuda&#39;s bin directory.
Definition: Cuda.h:67
const char * getOffloadingArch() const
Definition: Action.h:197
void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use MCU GCC toolchain includes.
Definition: Cuda.cpp:866
ToolChain - Access to tools for a single platform.
Definition: ToolChain.h:88