clang  6.0.0svn
Cuda.cpp
Go to the documentation of this file.
1 //===--- Cuda.cpp - Cuda Tool and ToolChain Implementations -----*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "Cuda.h"
11 #include "InputInfo.h"
12 #include "CommonArgs.h"
13 #include "clang/Basic/Cuda.h"
14 #include "clang/Config/config.h"
16 #include "clang/Driver/Distro.h"
18 #include "clang/Driver/Driver.h"
20 #include "clang/Driver/Options.h"
21 #include "llvm/Option/ArgList.h"
22 #include "llvm/Support/Path.h"
23 #include <system_error>
24 
25 using namespace clang::driver;
26 using namespace clang::driver::toolchains;
27 using namespace clang::driver::tools;
28 using namespace clang;
29 using namespace llvm::opt;
30 
31 // Parses the contents of version.txt in an CUDA installation. It should
32 // contain one line of the from e.g. "CUDA Version 7.5.2".
33 static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
34  if (!V.startswith("CUDA Version "))
35  return CudaVersion::UNKNOWN;
36  V = V.substr(strlen("CUDA Version "));
37  int Major = -1, Minor = -1;
38  auto First = V.split('.');
39  auto Second = First.second.split('.');
40  if (First.first.getAsInteger(10, Major) ||
41  Second.first.getAsInteger(10, Minor))
42  return CudaVersion::UNKNOWN;
43 
44  if (Major == 7 && Minor == 0) {
45  // This doesn't appear to ever happen -- version.txt doesn't exist in the
46  // CUDA 7 installs I've seen. But no harm in checking.
47  return CudaVersion::CUDA_70;
48  }
49  if (Major == 7 && Minor == 5)
50  return CudaVersion::CUDA_75;
51  if (Major == 8 && Minor == 0)
52  return CudaVersion::CUDA_80;
53  if (Major == 9 && Minor == 0)
54  return CudaVersion::CUDA_90;
55  return CudaVersion::UNKNOWN;
56 }
57 
59  const Driver &D, const llvm::Triple &HostTriple,
60  const llvm::opt::ArgList &Args)
61  : D(D) {
62  SmallVector<std::string, 4> CudaPathCandidates;
63 
64  // In decreasing order so we prefer newer versions to older versions.
65  std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
66 
67  if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
68  CudaPathCandidates.push_back(
69  Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ));
70  } else if (HostTriple.isOSWindows()) {
71  for (const char *Ver : Versions)
72  CudaPathCandidates.push_back(
73  D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
74  Ver);
75  } else {
76  CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda");
77  for (const char *Ver : Versions)
78  CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-" + Ver);
79 
80  if (Distro(D.getVFS()).IsDebian())
81  // Special case for Debian to have nvidia-cuda-toolkit work
82  // out of the box. More info on http://bugs.debian.org/882505
83  CudaPathCandidates.push_back(D.SysRoot + "/usr/lib/cuda");
84  }
85 
86  for (const auto &CudaPath : CudaPathCandidates) {
87  if (CudaPath.empty() || !D.getVFS().exists(CudaPath))
88  continue;
89 
90  InstallPath = CudaPath;
91  BinPath = CudaPath + "/bin";
92  IncludePath = InstallPath + "/include";
93  LibDevicePath = InstallPath + "/nvvm/libdevice";
94 
95  auto &FS = D.getVFS();
96  if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
97  continue;
98 
99  // On Linux, we have both lib and lib64 directories, and we need to choose
100  // based on our triple. On MacOS, we have only a lib directory.
101  //
102  // It's sufficient for our purposes to be flexible: If both lib and lib64
103  // exist, we choose whichever one matches our triple. Otherwise, if only
104  // lib exists, we use it.
105  if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
106  LibPath = InstallPath + "/lib64";
107  else if (FS.exists(InstallPath + "/lib"))
108  LibPath = InstallPath + "/lib";
109  else
110  continue;
111 
112  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
113  FS.getBufferForFile(InstallPath + "/version.txt");
114  if (!VersionFile) {
115  // CUDA 7.0 doesn't have a version.txt, so guess that's our version if
116  // version.txt isn't present.
117  Version = CudaVersion::CUDA_70;
118  } else {
119  Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
120  }
121 
122  if (Version == CudaVersion::CUDA_90) {
123  // CUDA-9 uses single libdevice file for all GPU variants.
124  std::string FilePath = LibDevicePath + "/libdevice.10.bc";
125  if (FS.exists(FilePath)) {
126  for (const char *GpuArch :
127  {"sm_20", "sm_30", "sm_32", "sm_35", "sm_50", "sm_52", "sm_53",
128  "sm_60", "sm_61", "sm_62", "sm_70"})
129  LibDeviceMap[GpuArch] = FilePath;
130  }
131  } else {
132  std::error_code EC;
133  for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
134  !EC && LI != LE; LI = LI.increment(EC)) {
135  StringRef FilePath = LI->path();
136  StringRef FileName = llvm::sys::path::filename(FilePath);
137  // Process all bitcode filenames that look like
138  // libdevice.compute_XX.YY.bc
139  const StringRef LibDeviceName = "libdevice.";
140  if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
141  continue;
142  StringRef GpuArch = FileName.slice(
143  LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
144  LibDeviceMap[GpuArch] = FilePath.str();
145  // Insert map entries for specifc devices with this compute
146  // capability. NVCC's choice of the libdevice library version is
147  // rather peculiar and depends on the CUDA version.
148  if (GpuArch == "compute_20") {
149  LibDeviceMap["sm_20"] = FilePath;
150  LibDeviceMap["sm_21"] = FilePath;
151  LibDeviceMap["sm_32"] = FilePath;
152  } else if (GpuArch == "compute_30") {
153  LibDeviceMap["sm_30"] = FilePath;
154  if (Version < CudaVersion::CUDA_80) {
155  LibDeviceMap["sm_50"] = FilePath;
156  LibDeviceMap["sm_52"] = FilePath;
157  LibDeviceMap["sm_53"] = FilePath;
158  }
159  LibDeviceMap["sm_60"] = FilePath;
160  LibDeviceMap["sm_61"] = FilePath;
161  LibDeviceMap["sm_62"] = FilePath;
162  } else if (GpuArch == "compute_35") {
163  LibDeviceMap["sm_35"] = FilePath;
164  LibDeviceMap["sm_37"] = FilePath;
165  } else if (GpuArch == "compute_50") {
166  if (Version >= CudaVersion::CUDA_80) {
167  LibDeviceMap["sm_50"] = FilePath;
168  LibDeviceMap["sm_52"] = FilePath;
169  LibDeviceMap["sm_53"] = FilePath;
170  }
171  }
172  }
173  }
174 
175  // Check that we have found at least one libdevice that we can link in if
176  // -nocudalib hasn't been specified.
177  if (LibDeviceMap.empty() && !Args.hasArg(options::OPT_nocudalib))
178  continue;
179 
180  IsValid = true;
181  break;
182  }
183 }
184 
186  const ArgList &DriverArgs, ArgStringList &CC1Args) const {
187  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
188  // Add cuda_wrappers/* to our system include path. This lets us wrap
189  // standard library headers.
191  llvm::sys::path::append(P, "include");
192  llvm::sys::path::append(P, "cuda_wrappers");
193  CC1Args.push_back("-internal-isystem");
194  CC1Args.push_back(DriverArgs.MakeArgString(P));
195  }
196 
197  if (DriverArgs.hasArg(options::OPT_nocudainc))
198  return;
199 
200  if (!isValid()) {
201  D.Diag(diag::err_drv_no_cuda_installation);
202  return;
203  }
204 
205  CC1Args.push_back("-internal-isystem");
206  CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
207  CC1Args.push_back("-include");
208  CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
209 }
210 
212  CudaArch Arch) const {
213  if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
214  ArchsWithBadVersion.count(Arch) > 0)
215  return;
216 
217  auto MinVersion = MinVersionForCudaArch(Arch);
218  auto MaxVersion = MaxVersionForCudaArch(Arch);
219  if (Version < MinVersion || Version > MaxVersion) {
220  ArchsWithBadVersion.insert(Arch);
221  D.Diag(diag::err_drv_cuda_version_unsupported)
222  << CudaArchToString(Arch) << CudaVersionToString(MinVersion)
223  << CudaVersionToString(MaxVersion) << InstallPath
224  << CudaVersionToString(Version);
225  }
226 }
227 
228 void CudaInstallationDetector::print(raw_ostream &OS) const {
229  if (isValid())
230  OS << "Found CUDA installation: " << InstallPath << ", version "
231  << CudaVersionToString(Version) << "\n";
232 }
233 
234 void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
235  const InputInfo &Output,
236  const InputInfoList &Inputs,
237  const ArgList &Args,
238  const char *LinkingOutput) const {
239  const auto &TC =
240  static_cast<const toolchains::CudaToolChain &>(getToolChain());
241  assert(TC.getTriple().isNVPTX() && "Wrong platform");
242 
243  StringRef GPUArchName;
244  // If this is an OpenMP action we need to extract the device architecture
245  // from the -march=arch option. This option may come from -Xopenmp-target
246  // flag or the default value.
248  GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
249  assert(!GPUArchName.empty() && "Must have an architecture passed in.");
250  } else
251  GPUArchName = JA.getOffloadingArch();
252 
253  // Obtain architecture from the action.
254  CudaArch gpu_arch = StringToCudaArch(GPUArchName);
255  assert(gpu_arch != CudaArch::UNKNOWN &&
256  "Device action expected to have an architecture.");
257 
258  // Check that our installation's ptxas supports gpu_arch.
259  if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
260  TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
261  }
262 
263  ArgStringList CmdArgs;
264  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
265  if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
266  options::OPT_no_cuda_noopt_device_debug, false)) {
267  // ptxas does not accept -g option if optimization is enabled, so
268  // we ignore the compiler's -O* options if we want debug info.
269  CmdArgs.push_back("-g");
270  CmdArgs.push_back("--dont-merge-basicblocks");
271  CmdArgs.push_back("--return-at-end");
272  } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
273  // Map the -O we received to -O{0,1,2,3}.
274  //
275  // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
276  // default, so it may correspond more closely to the spirit of clang -O2.
277 
278  // -O3 seems like the least-bad option when -Osomething is specified to
279  // clang but it isn't handled below.
280  StringRef OOpt = "3";
281  if (A->getOption().matches(options::OPT_O4) ||
282  A->getOption().matches(options::OPT_Ofast))
283  OOpt = "3";
284  else if (A->getOption().matches(options::OPT_O0))
285  OOpt = "0";
286  else if (A->getOption().matches(options::OPT_O)) {
287  // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
288  OOpt = llvm::StringSwitch<const char *>(A->getValue())
289  .Case("1", "1")
290  .Case("2", "2")
291  .Case("3", "3")
292  .Case("s", "2")
293  .Case("z", "2")
294  .Default("2");
295  }
296  CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
297  } else {
298  // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
299  // to no optimizations, but ptxas's default is -O3.
300  CmdArgs.push_back("-O0");
301  }
302 
303  // Pass -v to ptxas if it was passed to the driver.
304  if (Args.hasArg(options::OPT_v))
305  CmdArgs.push_back("-v");
306 
307  CmdArgs.push_back("--gpu-name");
308  CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
309  CmdArgs.push_back("--output-file");
310  CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
311  for (const auto& II : Inputs)
312  CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
313 
314  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
315  CmdArgs.push_back(Args.MakeArgString(A));
316 
317  // In OpenMP we need to generate relocatable code.
319  Args.hasFlag(options::OPT_fopenmp_relocatable_target,
320  options::OPT_fnoopenmp_relocatable_target,
321  /*Default=*/ true))
322  CmdArgs.push_back("-c");
323 
324  const char *Exec;
325  if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
326  Exec = A->getValue();
327  else
328  Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
329  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
330 }
331 
332 // All inputs to this linker must be from CudaDeviceActions, as we need to look
333 // at the Inputs' Actions in order to figure out which GPU architecture they
334 // correspond to.
335 void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
336  const InputInfo &Output,
337  const InputInfoList &Inputs,
338  const ArgList &Args,
339  const char *LinkingOutput) const {
340  const auto &TC =
341  static_cast<const toolchains::CudaToolChain &>(getToolChain());
342  assert(TC.getTriple().isNVPTX() && "Wrong platform");
343 
344  ArgStringList CmdArgs;
345  CmdArgs.push_back("--cuda");
346  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
347  CmdArgs.push_back(Args.MakeArgString("--create"));
348  CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
349 
350  for (const auto& II : Inputs) {
351  auto *A = II.getAction();
352  assert(A->getInputs().size() == 1 &&
353  "Device offload action is expected to have a single input");
354  const char *gpu_arch_str = A->getOffloadingArch();
355  assert(gpu_arch_str &&
356  "Device action expected to have associated a GPU architecture!");
357  CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
358 
359  // We need to pass an Arch of the form "sm_XX" for cubin files and
360  // "compute_XX" for ptx.
361  const char *Arch =
362  (II.getType() == types::TY_PP_Asm)
364  : gpu_arch_str;
365  CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
366  Arch + ",file=" + II.getFilename()));
367  }
368 
369  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
370  CmdArgs.push_back(Args.MakeArgString(A));
371 
372  const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
373  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
374 }
375 
376 void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
377  const InputInfo &Output,
378  const InputInfoList &Inputs,
379  const ArgList &Args,
380  const char *LinkingOutput) const {
381  const auto &TC =
382  static_cast<const toolchains::CudaToolChain &>(getToolChain());
383  assert(TC.getTriple().isNVPTX() && "Wrong platform");
384 
385  ArgStringList CmdArgs;
386 
387  // OpenMP uses nvlink to link cubin files. The result will be embedded in the
388  // host binary by the host linker.
389  assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
390  "CUDA toolchain not expected for an OpenMP host device.");
391 
392  if (Output.isFilename()) {
393  CmdArgs.push_back("-o");
394  CmdArgs.push_back(Output.getFilename());
395  } else
396  assert(Output.isNothing() && "Invalid output.");
397  if (Args.hasArg(options::OPT_g_Flag))
398  CmdArgs.push_back("-g");
399 
400  if (Args.hasArg(options::OPT_v))
401  CmdArgs.push_back("-v");
402 
403  StringRef GPUArch =
404  Args.getLastArgValue(options::OPT_march_EQ);
405  assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
406 
407  CmdArgs.push_back("-arch");
408  CmdArgs.push_back(Args.MakeArgString(GPUArch));
409 
410  // Add paths specified in LIBRARY_PATH environment variable as -L options.
411  addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
412 
413  // Add paths for the default clang library path.
414  SmallString<256> DefaultLibPath =
415  llvm::sys::path::parent_path(TC.getDriver().Dir);
416  llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
417  CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
418 
419  // Add linking against library implementing OpenMP calls on NVPTX target.
420  CmdArgs.push_back("-lomptarget-nvptx");
421 
422  for (const auto &II : Inputs) {
423  if (II.getType() == types::TY_LLVM_IR ||
424  II.getType() == types::TY_LTO_IR ||
425  II.getType() == types::TY_LTO_BC ||
426  II.getType() == types::TY_LLVM_BC) {
427  C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
428  << getToolChain().getTripleString();
429  continue;
430  }
431 
432  // Currently, we only pass the input files to the linker, we do not pass
433  // any libraries that may be valid only for the host.
434  if (!II.isFilename())
435  continue;
436 
437  const char *CubinF = C.addTempFile(
438  C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
439 
440  CmdArgs.push_back(CubinF);
441  }
442 
443  AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
444 
445  const char *Exec =
446  Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
447  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
448 }
449 
450 /// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
451 /// which isn't properly a linker but nonetheless performs the step of stitching
452 /// together object files from the assembler into a single blob.
453 
454 CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
455  const ToolChain &HostTC, const ArgList &Args,
456  const Action::OffloadKind OK)
457  : ToolChain(D, Triple, Args), HostTC(HostTC),
458  CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
461  // Lookup binaries into the driver directory, this is used to
462  // discover the clang-offload-bundler executable.
463  getProgramPaths().push_back(getDriver().Dir);
464 }
465 
466 std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
467  // Only object files are changed, for example assembly files keep their .s
468  // extensions. CUDA also continues to use .o as they don't use nvlink but
469  // fatbinary.
470  if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object))
471  return ToolChain::getInputFilename(Input);
472 
473  // Replace extension for object files with cubin because nvlink relies on
474  // these particular file names.
476  llvm::sys::path::replace_extension(Filename, "cubin");
477  return Filename.str();
478 }
479 
481  const llvm::opt::ArgList &DriverArgs,
482  llvm::opt::ArgStringList &CC1Args,
483  Action::OffloadKind DeviceOffloadingKind) const {
484  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
485 
486  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
487  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
488  assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
489  DeviceOffloadingKind == Action::OFK_Cuda) &&
490  "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
491 
492  if (DeviceOffloadingKind == Action::OFK_Cuda) {
493  CC1Args.push_back("-fcuda-is-device");
494 
495  if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
496  options::OPT_fno_cuda_flush_denormals_to_zero, false))
497  CC1Args.push_back("-fcuda-flush-denormals-to-zero");
498 
499  if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
500  options::OPT_fno_cuda_approx_transcendentals, false))
501  CC1Args.push_back("-fcuda-approx-transcendentals");
502  }
503 
504  if (DriverArgs.hasArg(options::OPT_nocudalib))
505  return;
506 
507  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
508 
509  if (LibDeviceFile.empty()) {
510  if (DeviceOffloadingKind == Action::OFK_OpenMP &&
511  DriverArgs.hasArg(options::OPT_S))
512  return;
513 
514  getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
515  return;
516  }
517 
518  CC1Args.push_back("-mlink-cuda-bitcode");
519  CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
520 
522  // CUDA-9 uses new instructions that are only available in PTX6.0
523  CC1Args.push_back("-target-feature");
524  CC1Args.push_back("+ptx60");
525  } else {
526  // Libdevice in CUDA-7.0 requires PTX version that's more recent
527  // than LLVM defaults to. Use PTX4.2 which is the PTX version that
528  // came with CUDA-7.0.
529  CC1Args.push_back("-target-feature");
530  CC1Args.push_back("+ptx42");
531  }
532 }
533 
534 void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
535  ArgStringList &CC1Args) const {
536  // Check our CUDA version if we're going to include the CUDA headers.
537  if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
538  !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
539  StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
540  assert(!Arch.empty() && "Must have an explicit GPU arch.");
542  }
543  CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
544 }
545 
546 llvm::opt::DerivedArgList *
547 CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
548  StringRef BoundArch,
549  Action::OffloadKind DeviceOffloadKind) const {
550  DerivedArgList *DAL =
551  HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
552  if (!DAL)
553  DAL = new DerivedArgList(Args.getBaseArgs());
554 
555  const OptTable &Opts = getDriver().getOpts();
556 
557  // For OpenMP device offloading, append derived arguments. Make sure
558  // flags are not duplicated.
559  // Also append the compute capability.
560  if (DeviceOffloadKind == Action::OFK_OpenMP) {
561  for (Arg *A : Args) {
562  bool IsDuplicate = false;
563  for (Arg *DALArg : *DAL) {
564  if (A == DALArg) {
565  IsDuplicate = true;
566  break;
567  }
568  }
569  if (!IsDuplicate)
570  DAL->append(A);
571  }
572 
573  StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
574  if (Arch.empty())
575  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
576  CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
577 
578  return DAL;
579  }
580 
581  for (Arg *A : Args) {
582  if (A->getOption().matches(options::OPT_Xarch__)) {
583  // Skip this argument unless the architecture matches BoundArch
584  if (BoundArch.empty() || A->getValue(0) != BoundArch)
585  continue;
586 
587  unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
588  unsigned Prev = Index;
589  std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
590 
591  // If the argument parsing failed or more than one argument was
592  // consumed, the -Xarch_ argument's parameter tried to consume
593  // extra arguments. Emit an error and ignore.
594  //
595  // We also want to disallow any options which would alter the
596  // driver behavior; that isn't going to work in our model. We
597  // use isDriverOption() as an approximation, although things
598  // like -O4 are going to slip through.
599  if (!XarchArg || Index > Prev + 1) {
600  getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
601  << A->getAsString(Args);
602  continue;
603  } else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
604  getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
605  << A->getAsString(Args);
606  continue;
607  }
608  XarchArg->setBaseArg(A);
609  A = XarchArg.release();
610  DAL->AddSynthesizedArg(A);
611  }
612  DAL->append(A);
613  }
614 
615  if (!BoundArch.empty()) {
616  DAL->eraseArg(options::OPT_march_EQ);
617  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
618  }
619  return DAL;
620 }
621 
623  return new tools::NVPTX::Assembler(*this);
624 }
625 
627  if (OK == Action::OFK_OpenMP)
628  return new tools::NVPTX::OpenMPLinker(*this);
629  return new tools::NVPTX::Linker(*this);
630 }
631 
632 void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
634 }
635 
637 CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
638  return HostTC.GetCXXStdlibType(Args);
639 }
640 
641 void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
642  ArgStringList &CC1Args) const {
643  HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
644 }
645 
647  ArgStringList &CC1Args) const {
648  HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
649 }
650 
651 void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
652  ArgStringList &CC1Args) const {
653  HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
654 }
655 
657  // The CudaToolChain only supports sanitizers in the sense that it allows
658  // sanitizer arguments on the command line if they are supported by the host
659  // toolchain. The CudaToolChain will actually ignore any command line
660  // arguments for any of these "supported" sanitizers. That means that no
661  // sanitization of device code is actually supported at this time.
662  //
663  // This behavior is necessary because the host and device toolchains
664  // invocations often share the command line, so the device toolchain must
665  // tolerate flags meant only for the host toolchain.
667 }
668 
670  const ArgList &Args) const {
671  return HostTC.computeMSVCVersion(D, Args);
672 }
virtual void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const
Add warning options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:617
CudaArch
Definition: Cuda.h:32
Represents a version number in the form major[.minor[.subminor[.build]]].
Definition: VersionTuple.h:26
const char * CudaArchToString(CudaArch A)
Definition: Cuda.cpp:25
llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: Cuda.cpp:547
SanitizerMask getSupportedSanitizers() const override
Return sanitizers which are available in this toolchain.
Definition: Cuda.cpp:656
StringRef P
virtual void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: ToolChain.cpp:703
VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const override
On Windows, returns the MSVC compatibility version.
Definition: Cuda.cpp:669
bool isHostOffloading(OffloadKind OKind) const
Check if this action have any offload kinds.
Definition: Action.h:187
DiagnosticBuilder Diag(unsigned DiagID) const
Definition: Driver.h:116
CudaArch StringToCudaArch(llvm::StringRef S)
Definition: Cuda.cpp:59
CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args)
Definition: Cuda.cpp:58
static CudaVersion ParseCudaVersionFile(llvm::StringRef V)
Definition: Cuda.cpp:33
void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override
Add warning options that need to be passed to cc1 for this target.
Definition: Cuda.cpp:632
void print(raw_ostream &OS) const
Print information about the detected CUDA installation.
Definition: Cuda.cpp:228
Tool * buildAssembler() const override
Definition: Cuda.cpp:622
const char * getFilename() const
Definition: InputInfo.h:84
path_list & getProgramPaths()
Definition: ToolChain.h:205
virtual void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Add the clang cc1 arguments for system include paths.
Definition: ToolChain.cpp:608
Distro - Helper class for detecting and classifying Linux distributions.
Definition: Distro.h:23
bool isOffloading(OffloadKind OKind) const
Definition: Action.h:193
void CheckCudaVersionSupportsArch(CudaArch Arch) const
Emit an error if Version does not support the given Arch.
Definition: Cuda.cpp:211
InputInfo - Wrapper for information about an input source.
Definition: InputInfo.h:23
bool isDeviceOffloading(OffloadKind OKind) const
Definition: Action.h:190
std::string getLibDeviceFile(StringRef Gpu) const
Get libdevice file for given architecture.
Definition: Cuda.h:76
Driver - Encapsulate logic for constructing compilation processes from a set of gcc-driver-like comma...
Definition: Driver.h:65
types::ID getType() const
Definition: InputInfo.h:78
CudaInstallationDetector CudaInstallation
Definition: Cuda.h:184
void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C, const InputInfo &Output, const InputInfoList &Inputs, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const JobAction &JA)
void addDirectoryList(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const char *ArgName, const char *EnvVar)
const char * CudaVersionToString(CudaVersion V)
Definition: Cuda.cpp:9
StringRef Filename
Definition: Format.cpp:1345
void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CC1Args) const override
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: Cuda.cpp:646
virtual VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const
On Windows, returns the MSVC compatibility version.
Definition: ToolChain.cpp:813
void addCommand(std::unique_ptr< Command > C)
Definition: Compilation.h:189
const char * CudaVirtualArchToString(CudaVirtualArch A)
Definition: Cuda.cpp:77
Tool * buildLinker() const override
Definition: Cuda.cpp:626
std::string getInputFilename(const InputInfo &Input) const override
Some toolchains need to modify the file name, for example to replace the extension for object files w...
Definition: Cuda.cpp:466
void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const override
Add options that need to be passed to cc1 for this target.
Definition: Cuda.cpp:480
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Definition: Cuda.cpp:185
CudaVersion version() const
Get the detected Cuda install&#39;s version.
Definition: Cuda.h:64
virtual std::string getInputFilename(const InputInfo &Input) const
Some toolchains need to modify the file name, for example to replace the extension for object files w...
Definition: ToolChain.cpp:230
const llvm::opt::DerivedArgList & getArgs() const
Definition: Compilation.h:170
CudaVersion MaxVersionForCudaArch(CudaArch A)
Get the latest CudaVersion that supports the given CudaArch.
Definition: Cuda.cpp:183
vfs::FileSystem & getVFS() const
Definition: Driver.h:284
void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add the clang cc1 arguments for system include paths.
Definition: Cuda.cpp:641
const Driver & getDriver() const
Definition: ToolChain.h:167
CudaVersion
Definition: Cuda.h:19
bool isValid() const
Check whether we detected a valid Cuda install.
Definition: Cuda.h:59
StringRef getIncludePath() const
Get the detected Cuda Include path.
Definition: Cuda.h:70
CudaVersion MinVersionForCudaArch(CudaArch A)
Get the earliest CudaVersion that supports the given CudaArch.
Definition: Cuda.cpp:159
Dataflow Directional Tag Classes.
uint64_t SanitizerMask
Definition: Sanitizers.h:24
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use system-specific CUDA includes.
Definition: Cuda.cpp:534
CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override
Definition: Cuda.cpp:637
std::string SysRoot
sysroot, if present
Definition: Driver.h:149
Tool - Information on a specific compilation tool.
Definition: Tool.h:34
Defines the virtual file system interface vfs::FileSystem.
CudaVirtualArch VirtualArchForCudaArch(CudaArch A)
Get the compute_xx corresponding to an sm_yy.
Definition: Cuda.cpp:126
bool exists(const Twine &Path)
Check whether a file exists. Provided for convenience.
Compilation - A set of tasks to perform for a single driver invocation.
Definition: Compilation.h:34
const Driver & getDriver() const
Definition: Compilation.h:117
virtual SanitizerMask getSupportedSanitizers() const
Return sanitizers which are available in this toolchain.
Definition: ToolChain.cpp:776
bool isNothing() const
Definition: InputInfo.h:75
virtual llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: ToolChain.h:248
bool isFilename() const
Definition: InputInfo.h:76
const llvm::opt::OptTable & getOpts() const
Definition: Driver.h:280
virtual CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const
Definition: ToolChain.cpp:645
const char * addTempFile(const char *Name)
addTempFile - Add a file to remove on exit, and returns its argument.
Definition: Compilation.h:216
virtual void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Add arguments to use MCU GCC toolchain includes.
Definition: ToolChain.cpp:796
virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const
Add options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:613
StringRef getBinPath() const
Get the detected path to Cuda&#39;s bin directory.
Definition: Cuda.h:68
const char * getOffloadingArch() const
Definition: Action.h:183
void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use MCU GCC toolchain includes.
Definition: Cuda.cpp:651
ToolChain - Access to tools for a single platform.
Definition: ToolChain.h:73
std::string ResourceDir
The path to the compiler resource directory.
Definition: Driver.h:139