clang  6.0.0svn
Cuda.cpp
Go to the documentation of this file.
1 //===--- Cuda.cpp - Cuda Tool and ToolChain Implementations -----*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "Cuda.h"
11 #include "InputInfo.h"
12 #include "CommonArgs.h"
13 #include "clang/Basic/Cuda.h"
14 #include "clang/Config/config.h"
17 #include "clang/Driver/Driver.h"
19 #include "clang/Driver/Options.h"
20 #include "llvm/Option/ArgList.h"
21 #include "llvm/Support/Path.h"
22 #include <system_error>
23 
24 using namespace clang::driver;
25 using namespace clang::driver::toolchains;
26 using namespace clang::driver::tools;
27 using namespace clang;
28 using namespace llvm::opt;
29 
30 // Parses the contents of version.txt in an CUDA installation. It should
31 // contain one line of the from e.g. "CUDA Version 7.5.2".
32 static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
33  if (!V.startswith("CUDA Version "))
34  return CudaVersion::UNKNOWN;
35  V = V.substr(strlen("CUDA Version "));
36  int Major = -1, Minor = -1;
37  auto First = V.split('.');
38  auto Second = First.second.split('.');
39  if (First.first.getAsInteger(10, Major) ||
40  Second.first.getAsInteger(10, Minor))
41  return CudaVersion::UNKNOWN;
42 
43  if (Major == 7 && Minor == 0) {
44  // This doesn't appear to ever happen -- version.txt doesn't exist in the
45  // CUDA 7 installs I've seen. But no harm in checking.
46  return CudaVersion::CUDA_70;
47  }
48  if (Major == 7 && Minor == 5)
49  return CudaVersion::CUDA_75;
50  if (Major == 8 && Minor == 0)
51  return CudaVersion::CUDA_80;
52  if (Major == 9 && Minor == 0)
53  return CudaVersion::CUDA_90;
54  return CudaVersion::UNKNOWN;
55 }
56 
58  const Driver &D, const llvm::Triple &HostTriple,
59  const llvm::opt::ArgList &Args)
60  : D(D) {
61  SmallVector<std::string, 4> CudaPathCandidates;
62 
63  // In decreasing order so we prefer newer versions to older versions.
64  std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
65 
66  if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
67  CudaPathCandidates.push_back(
68  Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ));
69  } else if (HostTriple.isOSWindows()) {
70  for (const char *Ver : Versions)
71  CudaPathCandidates.push_back(
72  D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
73  Ver);
74  } else {
75  CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda");
76  for (const char *Ver : Versions)
77  CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-" + Ver);
78  }
79 
80  for (const auto &CudaPath : CudaPathCandidates) {
81  if (CudaPath.empty() || !D.getVFS().exists(CudaPath))
82  continue;
83 
84  InstallPath = CudaPath;
85  BinPath = CudaPath + "/bin";
86  IncludePath = InstallPath + "/include";
87  LibDevicePath = InstallPath + "/nvvm/libdevice";
88 
89  auto &FS = D.getVFS();
90  if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
91  continue;
92 
93  // On Linux, we have both lib and lib64 directories, and we need to choose
94  // based on our triple. On MacOS, we have only a lib directory.
95  //
96  // It's sufficient for our purposes to be flexible: If both lib and lib64
97  // exist, we choose whichever one matches our triple. Otherwise, if only
98  // lib exists, we use it.
99  if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
100  LibPath = InstallPath + "/lib64";
101  else if (FS.exists(InstallPath + "/lib"))
102  LibPath = InstallPath + "/lib";
103  else
104  continue;
105 
106  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
107  FS.getBufferForFile(InstallPath + "/version.txt");
108  if (!VersionFile) {
109  // CUDA 7.0 doesn't have a version.txt, so guess that's our version if
110  // version.txt isn't present.
111  Version = CudaVersion::CUDA_70;
112  } else {
113  Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
114  }
115 
116  if (Version == CudaVersion::CUDA_90) {
117  // CUDA-9 uses single libdevice file for all GPU variants.
118  std::string FilePath = LibDevicePath + "/libdevice.10.bc";
119  if (FS.exists(FilePath)) {
120  for (const char *GpuArch :
121  {"sm_20", "sm_30", "sm_32", "sm_35", "sm_50", "sm_52", "sm_53",
122  "sm_60", "sm_61", "sm_62", "sm_70"})
123  LibDeviceMap[GpuArch] = FilePath;
124  }
125  } else {
126  std::error_code EC;
127  for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
128  !EC && LI != LE; LI = LI.increment(EC)) {
129  StringRef FilePath = LI->path();
130  StringRef FileName = llvm::sys::path::filename(FilePath);
131  // Process all bitcode filenames that look like
132  // libdevice.compute_XX.YY.bc
133  const StringRef LibDeviceName = "libdevice.";
134  if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
135  continue;
136  StringRef GpuArch = FileName.slice(
137  LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
138  LibDeviceMap[GpuArch] = FilePath.str();
139  // Insert map entries for specifc devices with this compute
140  // capability. NVCC's choice of the libdevice library version is
141  // rather peculiar and depends on the CUDA version.
142  if (GpuArch == "compute_20") {
143  LibDeviceMap["sm_20"] = FilePath;
144  LibDeviceMap["sm_21"] = FilePath;
145  LibDeviceMap["sm_32"] = FilePath;
146  } else if (GpuArch == "compute_30") {
147  LibDeviceMap["sm_30"] = FilePath;
148  if (Version < CudaVersion::CUDA_80) {
149  LibDeviceMap["sm_50"] = FilePath;
150  LibDeviceMap["sm_52"] = FilePath;
151  LibDeviceMap["sm_53"] = FilePath;
152  }
153  LibDeviceMap["sm_60"] = FilePath;
154  LibDeviceMap["sm_61"] = FilePath;
155  LibDeviceMap["sm_62"] = FilePath;
156  } else if (GpuArch == "compute_35") {
157  LibDeviceMap["sm_35"] = FilePath;
158  LibDeviceMap["sm_37"] = FilePath;
159  } else if (GpuArch == "compute_50") {
160  if (Version >= CudaVersion::CUDA_80) {
161  LibDeviceMap["sm_50"] = FilePath;
162  LibDeviceMap["sm_52"] = FilePath;
163  LibDeviceMap["sm_53"] = FilePath;
164  }
165  }
166  }
167  }
168 
169  // Check that we have found at least one libdevice that we can link in if
170  // -nocudalib hasn't been specified.
171  if (LibDeviceMap.empty() && !Args.hasArg(options::OPT_nocudalib))
172  continue;
173 
174  IsValid = true;
175  break;
176  }
177 }
178 
180  const ArgList &DriverArgs, ArgStringList &CC1Args) const {
181  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
182  // Add cuda_wrappers/* to our system include path. This lets us wrap
183  // standard library headers.
185  llvm::sys::path::append(P, "include");
186  llvm::sys::path::append(P, "cuda_wrappers");
187  CC1Args.push_back("-internal-isystem");
188  CC1Args.push_back(DriverArgs.MakeArgString(P));
189  }
190 
191  if (DriverArgs.hasArg(options::OPT_nocudainc))
192  return;
193 
194  if (!isValid()) {
195  D.Diag(diag::err_drv_no_cuda_installation);
196  return;
197  }
198 
199  CC1Args.push_back("-internal-isystem");
200  CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
201  CC1Args.push_back("-include");
202  CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
203 }
204 
206  CudaArch Arch) const {
207  if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
208  ArchsWithVersionTooLowErrors.count(Arch) > 0)
209  return;
210 
211  auto RequiredVersion = MinVersionForCudaArch(Arch);
212  if (Version < RequiredVersion) {
213  ArchsWithVersionTooLowErrors.insert(Arch);
214  D.Diag(diag::err_drv_cuda_version_too_low)
215  << InstallPath << CudaArchToString(Arch) << CudaVersionToString(Version)
216  << CudaVersionToString(RequiredVersion);
217  }
218 }
219 
220 void CudaInstallationDetector::print(raw_ostream &OS) const {
221  if (isValid())
222  OS << "Found CUDA installation: " << InstallPath << ", version "
223  << CudaVersionToString(Version) << "\n";
224 }
225 
226 void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
227  const InputInfo &Output,
228  const InputInfoList &Inputs,
229  const ArgList &Args,
230  const char *LinkingOutput) const {
231  const auto &TC =
232  static_cast<const toolchains::CudaToolChain &>(getToolChain());
233  assert(TC.getTriple().isNVPTX() && "Wrong platform");
234 
235  StringRef GPUArchName;
236  // If this is an OpenMP action we need to extract the device architecture
237  // from the -march=arch option. This option may come from -Xopenmp-target
238  // flag or the default value.
240  GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
241  assert(!GPUArchName.empty() && "Must have an architecture passed in.");
242  } else
243  GPUArchName = JA.getOffloadingArch();
244 
245  // Obtain architecture from the action.
246  CudaArch gpu_arch = StringToCudaArch(GPUArchName);
247  assert(gpu_arch != CudaArch::UNKNOWN &&
248  "Device action expected to have an architecture.");
249 
250  // Check that our installation's ptxas supports gpu_arch.
251  if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
252  TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
253  }
254 
255  ArgStringList CmdArgs;
256  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
257  if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
258  options::OPT_no_cuda_noopt_device_debug, false)) {
259  // ptxas does not accept -g option if optimization is enabled, so
260  // we ignore the compiler's -O* options if we want debug info.
261  CmdArgs.push_back("-g");
262  CmdArgs.push_back("--dont-merge-basicblocks");
263  CmdArgs.push_back("--return-at-end");
264  } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
265  // Map the -O we received to -O{0,1,2,3}.
266  //
267  // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
268  // default, so it may correspond more closely to the spirit of clang -O2.
269 
270  // -O3 seems like the least-bad option when -Osomething is specified to
271  // clang but it isn't handled below.
272  StringRef OOpt = "3";
273  if (A->getOption().matches(options::OPT_O4) ||
274  A->getOption().matches(options::OPT_Ofast))
275  OOpt = "3";
276  else if (A->getOption().matches(options::OPT_O0))
277  OOpt = "0";
278  else if (A->getOption().matches(options::OPT_O)) {
279  // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
280  OOpt = llvm::StringSwitch<const char *>(A->getValue())
281  .Case("1", "1")
282  .Case("2", "2")
283  .Case("3", "3")
284  .Case("s", "2")
285  .Case("z", "2")
286  .Default("2");
287  }
288  CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
289  } else {
290  // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
291  // to no optimizations, but ptxas's default is -O3.
292  CmdArgs.push_back("-O0");
293  }
294 
295  // Pass -v to ptxas if it was passed to the driver.
296  if (Args.hasArg(options::OPT_v))
297  CmdArgs.push_back("-v");
298 
299  CmdArgs.push_back("--gpu-name");
300  CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
301  CmdArgs.push_back("--output-file");
302  SmallString<256> OutputFileName(Output.getFilename());
304  llvm::sys::path::replace_extension(OutputFileName, "cubin");
305  CmdArgs.push_back(Args.MakeArgString(OutputFileName));
306  for (const auto& II : Inputs)
307  CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
308 
309  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
310  CmdArgs.push_back(Args.MakeArgString(A));
311 
312  // In OpenMP we need to generate relocatable code.
314  Args.hasFlag(options::OPT_fopenmp_relocatable_target,
315  options::OPT_fnoopenmp_relocatable_target,
316  /*Default=*/ true))
317  CmdArgs.push_back("-c");
318 
319  const char *Exec;
320  if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
321  Exec = A->getValue();
322  else
323  Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
324  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
325 }
326 
327 // All inputs to this linker must be from CudaDeviceActions, as we need to look
328 // at the Inputs' Actions in order to figure out which GPU architecture they
329 // correspond to.
330 void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
331  const InputInfo &Output,
332  const InputInfoList &Inputs,
333  const ArgList &Args,
334  const char *LinkingOutput) const {
335  const auto &TC =
336  static_cast<const toolchains::CudaToolChain &>(getToolChain());
337  assert(TC.getTriple().isNVPTX() && "Wrong platform");
338 
339  ArgStringList CmdArgs;
340  CmdArgs.push_back("--cuda");
341  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
342  CmdArgs.push_back(Args.MakeArgString("--create"));
343  CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
344 
345  for (const auto& II : Inputs) {
346  auto *A = II.getAction();
347  assert(A->getInputs().size() == 1 &&
348  "Device offload action is expected to have a single input");
349  const char *gpu_arch_str = A->getOffloadingArch();
350  assert(gpu_arch_str &&
351  "Device action expected to have associated a GPU architecture!");
352  CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
353 
354  // We need to pass an Arch of the form "sm_XX" for cubin files and
355  // "compute_XX" for ptx.
356  const char *Arch =
357  (II.getType() == types::TY_PP_Asm)
359  : gpu_arch_str;
360  CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
361  Arch + ",file=" + II.getFilename()));
362  }
363 
364  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
365  CmdArgs.push_back(Args.MakeArgString(A));
366 
367  const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
368  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
369 }
370 
371 void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
372  const InputInfo &Output,
373  const InputInfoList &Inputs,
374  const ArgList &Args,
375  const char *LinkingOutput) const {
376  const auto &TC =
377  static_cast<const toolchains::CudaToolChain &>(getToolChain());
378  assert(TC.getTriple().isNVPTX() && "Wrong platform");
379 
380  ArgStringList CmdArgs;
381 
382  // OpenMP uses nvlink to link cubin files. The result will be embedded in the
383  // host binary by the host linker.
384  assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
385  "CUDA toolchain not expected for an OpenMP host device.");
386 
387  if (Output.isFilename()) {
388  CmdArgs.push_back("-o");
389  CmdArgs.push_back(Output.getFilename());
390  } else
391  assert(Output.isNothing() && "Invalid output.");
392  if (Args.hasArg(options::OPT_g_Flag))
393  CmdArgs.push_back("-g");
394 
395  if (Args.hasArg(options::OPT_v))
396  CmdArgs.push_back("-v");
397 
398  StringRef GPUArch =
399  Args.getLastArgValue(options::OPT_march_EQ);
400  assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
401 
402  CmdArgs.push_back("-arch");
403  CmdArgs.push_back(Args.MakeArgString(GPUArch));
404 
405  // Add paths specified in LIBRARY_PATH environment variable as -L options.
406  addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
407 
408  // Add paths for the default clang library path.
409  SmallString<256> DefaultLibPath =
410  llvm::sys::path::parent_path(TC.getDriver().Dir);
411  llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
412  CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
413 
414  // Add linking against library implementing OpenMP calls on NVPTX target.
415  CmdArgs.push_back("-lomptarget-nvptx");
416 
417  for (const auto &II : Inputs) {
418  if (II.getType() == types::TY_LLVM_IR ||
419  II.getType() == types::TY_LTO_IR ||
420  II.getType() == types::TY_LTO_BC ||
421  II.getType() == types::TY_LLVM_BC) {
422  C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
423  << getToolChain().getTripleString();
424  continue;
425  }
426 
427  // Currently, we only pass the input files to the linker, we do not pass
428  // any libraries that may be valid only for the host.
429  if (!II.isFilename())
430  continue;
431 
432  SmallString<256> Name(II.getFilename());
433  llvm::sys::path::replace_extension(Name, "cubin");
434 
435  const char *CubinF =
436  C.addTempFile(C.getArgs().MakeArgString(Name));
437 
438  CmdArgs.push_back(CubinF);
439  }
440 
441  AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
442 
443  const char *Exec =
444  Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
445  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
446 }
447 
448 /// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
449 /// which isn't properly a linker but nonetheless performs the step of stitching
450 /// together object files from the assembler into a single blob.
451 
452 CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
453  const ToolChain &HostTC, const ArgList &Args,
454  const Action::OffloadKind OK)
455  : ToolChain(D, Triple, Args), HostTC(HostTC),
456  CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
459  // Lookup binaries into the driver directory, this is used to
460  // discover the clang-offload-bundler executable.
461  getProgramPaths().push_back(getDriver().Dir);
462 }
463 
465  const llvm::opt::ArgList &DriverArgs,
466  llvm::opt::ArgStringList &CC1Args,
467  Action::OffloadKind DeviceOffloadingKind) const {
468  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
469 
470  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
471  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
472  assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
473  DeviceOffloadingKind == Action::OFK_Cuda) &&
474  "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
475 
476  if (DeviceOffloadingKind == Action::OFK_Cuda) {
477  CC1Args.push_back("-fcuda-is-device");
478 
479  if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
480  options::OPT_fno_cuda_flush_denormals_to_zero, false))
481  CC1Args.push_back("-fcuda-flush-denormals-to-zero");
482 
483  if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
484  options::OPT_fno_cuda_approx_transcendentals, false))
485  CC1Args.push_back("-fcuda-approx-transcendentals");
486  }
487 
488  if (DriverArgs.hasArg(options::OPT_nocudalib))
489  return;
490 
491  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
492 
493  if (LibDeviceFile.empty()) {
494  if (DeviceOffloadingKind == Action::OFK_OpenMP &&
495  DriverArgs.hasArg(options::OPT_S))
496  return;
497 
498  getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
499  return;
500  }
501 
502  CC1Args.push_back("-mlink-cuda-bitcode");
503  CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
504 
506  // CUDA-9 uses new instructions that are only available in PTX6.0
507  CC1Args.push_back("-target-feature");
508  CC1Args.push_back("+ptx60");
509  } else {
510  // Libdevice in CUDA-7.0 requires PTX version that's more recent
511  // than LLVM defaults to. Use PTX4.2 which is the PTX version that
512  // came with CUDA-7.0.
513  CC1Args.push_back("-target-feature");
514  CC1Args.push_back("+ptx42");
515  }
516 }
517 
518 void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
519  ArgStringList &CC1Args) const {
520  // Check our CUDA version if we're going to include the CUDA headers.
521  if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
522  !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
523  StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
524  assert(!Arch.empty() && "Must have an explicit GPU arch.");
526  }
527  CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
528 }
529 
530 llvm::opt::DerivedArgList *
531 CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
532  StringRef BoundArch,
533  Action::OffloadKind DeviceOffloadKind) const {
534  DerivedArgList *DAL =
535  HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
536  if (!DAL)
537  DAL = new DerivedArgList(Args.getBaseArgs());
538 
539  const OptTable &Opts = getDriver().getOpts();
540 
541  // For OpenMP device offloading, append derived arguments. Make sure
542  // flags are not duplicated.
543  // Also append the compute capability.
544  if (DeviceOffloadKind == Action::OFK_OpenMP) {
545  for (Arg *A : Args) {
546  bool IsDuplicate = false;
547  for (Arg *DALArg : *DAL) {
548  if (A == DALArg) {
549  IsDuplicate = true;
550  break;
551  }
552  }
553  if (!IsDuplicate)
554  DAL->append(A);
555  }
556 
557  StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
558  if (Arch.empty())
559  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
560  CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
561 
562  return DAL;
563  }
564 
565  for (Arg *A : Args) {
566  if (A->getOption().matches(options::OPT_Xarch__)) {
567  // Skip this argument unless the architecture matches BoundArch
568  if (BoundArch.empty() || A->getValue(0) != BoundArch)
569  continue;
570 
571  unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
572  unsigned Prev = Index;
573  std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
574 
575  // If the argument parsing failed or more than one argument was
576  // consumed, the -Xarch_ argument's parameter tried to consume
577  // extra arguments. Emit an error and ignore.
578  //
579  // We also want to disallow any options which would alter the
580  // driver behavior; that isn't going to work in our model. We
581  // use isDriverOption() as an approximation, although things
582  // like -O4 are going to slip through.
583  if (!XarchArg || Index > Prev + 1) {
584  getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
585  << A->getAsString(Args);
586  continue;
587  } else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
588  getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
589  << A->getAsString(Args);
590  continue;
591  }
592  XarchArg->setBaseArg(A);
593  A = XarchArg.release();
594  DAL->AddSynthesizedArg(A);
595  }
596  DAL->append(A);
597  }
598 
599  if (!BoundArch.empty()) {
600  DAL->eraseArg(options::OPT_march_EQ);
601  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
602  }
603  return DAL;
604 }
605 
607  return new tools::NVPTX::Assembler(*this);
608 }
609 
611  if (OK == Action::OFK_OpenMP)
612  return new tools::NVPTX::OpenMPLinker(*this);
613  return new tools::NVPTX::Linker(*this);
614 }
615 
616 void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
618 }
619 
621 CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
622  return HostTC.GetCXXStdlibType(Args);
623 }
624 
625 void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
626  ArgStringList &CC1Args) const {
627  HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
628 }
629 
631  ArgStringList &CC1Args) const {
632  HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
633 }
634 
635 void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
636  ArgStringList &CC1Args) const {
637  HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
638 }
639 
641  // The CudaToolChain only supports sanitizers in the sense that it allows
642  // sanitizer arguments on the command line if they are supported by the host
643  // toolchain. The CudaToolChain will actually ignore any command line
644  // arguments for any of these "supported" sanitizers. That means that no
645  // sanitization of device code is actually supported at this time.
646  //
647  // This behavior is necessary because the host and device toolchains
648  // invocations often share the command line, so the device toolchain must
649  // tolerate flags meant only for the host toolchain.
651 }
652 
654  const ArgList &Args) const {
655  return HostTC.computeMSVCVersion(D, Args);
656 }
virtual void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const
Add warning options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:571
CudaArch
Definition: Cuda.h:31
Represents a version number in the form major[.minor[.subminor[.build]]].
Definition: VersionTuple.h:26
const char * CudaArchToString(CudaArch A)
Definition: Cuda.cpp:25
llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: Cuda.cpp:531
SanitizerMask getSupportedSanitizers() const override
Return sanitizers which are available in this toolchain.
Definition: Cuda.cpp:640
StringRef P
virtual void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: ToolChain.cpp:657
VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const override
On Windows, returns the MSVC compatibility version.
Definition: Cuda.cpp:653
bool isHostOffloading(OffloadKind OKind) const
Check if this action have any offload kinds.
Definition: Action.h:187
DiagnosticBuilder Diag(unsigned DiagID) const
Definition: Driver.h:116
CudaArch StringToCudaArch(llvm::StringRef S)
Definition: Cuda.cpp:59
CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args)
Definition: Cuda.cpp:57
static CudaVersion ParseCudaVersionFile(llvm::StringRef V)
Definition: Cuda.cpp:32
void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override
Add warning options that need to be passed to cc1 for this target.
Definition: Cuda.cpp:616
void print(raw_ostream &OS) const
Print information about the detected CUDA installation.
Definition: Cuda.cpp:220
Tool * buildAssembler() const override
Definition: Cuda.cpp:606
const char * getFilename() const
Definition: InputInfo.h:84
path_list & getProgramPaths()
Definition: ToolChain.h:197
virtual void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Add the clang cc1 arguments for system include paths.
Definition: ToolChain.cpp:562
bool isOffloading(OffloadKind OKind) const
Definition: Action.h:193
void CheckCudaVersionSupportsArch(CudaArch Arch) const
Emit an error if Version does not support the given Arch.
Definition: Cuda.cpp:205
InputInfo - Wrapper for information about an input source.
Definition: InputInfo.h:23
bool isDeviceOffloading(OffloadKind OKind) const
Definition: Action.h:190
std::string getLibDeviceFile(StringRef Gpu) const
Get libdevice file for given architecture.
Definition: Cuda.h:76
Driver - Encapsulate logic for constructing compilation processes from a set of gcc-driver-like comma...
Definition: Driver.h:65
CudaInstallationDetector CudaInstallation
Definition: Cuda.h:182
void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C, const InputInfo &Output, const InputInfoList &Inputs, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const JobAction &JA)
void addDirectoryList(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const char *ArgName, const char *EnvVar)
const char * CudaVersionToString(CudaVersion V)
Definition: Cuda.cpp:9
void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CC1Args) const override
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: Cuda.cpp:630
virtual VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const
On Windows, returns the MSVC compatibility version.
Definition: ToolChain.cpp:767
void addCommand(std::unique_ptr< Command > C)
Definition: Compilation.h:189
const char * CudaVirtualArchToString(CudaVirtualArch A)
Definition: Cuda.cpp:77
Tool * buildLinker() const override
Definition: Cuda.cpp:610
void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const override
Add options that need to be passed to cc1 for this target.
Definition: Cuda.cpp:464
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Definition: Cuda.cpp:179
CudaVersion version() const
Get the detected Cuda install&#39;s version.
Definition: Cuda.h:64
const llvm::opt::DerivedArgList & getArgs() const
Definition: Compilation.h:170
vfs::FileSystem & getVFS() const
Definition: Driver.h:284
void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add the clang cc1 arguments for system include paths.
Definition: Cuda.cpp:625
const Driver & getDriver() const
Definition: ToolChain.h:164
CudaVersion
Definition: Cuda.h:19
bool isValid() const
Check whether we detected a valid Cuda install.
Definition: Cuda.h:59
StringRef getIncludePath() const
Get the detected Cuda Include path.
Definition: Cuda.h:70
CudaVersion MinVersionForCudaArch(CudaArch A)
Get the earliest CudaVersion that supports the given CudaArch.
Definition: Cuda.cpp:159
Dataflow Directional Tag Classes.
uint64_t SanitizerMask
Definition: Sanitizers.h:24
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use system-specific CUDA includes.
Definition: Cuda.cpp:518
CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override
Definition: Cuda.cpp:621
std::string SysRoot
sysroot, if present
Definition: Driver.h:149
Tool - Information on a specific compilation tool.
Definition: Tool.h:34
Defines the virtual file system interface vfs::FileSystem.
CudaVirtualArch VirtualArchForCudaArch(CudaArch A)
Get the compute_xx corresponding to an sm_yy.
Definition: Cuda.cpp:126
bool exists(const Twine &Path)
Check whether a file exists. Provided for convenience.
Compilation - A set of tasks to perform for a single driver invocation.
Definition: Compilation.h:34
const Driver & getDriver() const
Definition: Compilation.h:117
virtual SanitizerMask getSupportedSanitizers() const
Return sanitizers which are available in this toolchain.
Definition: ToolChain.cpp:730
bool isNothing() const
Definition: InputInfo.h:75
virtual llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: ToolChain.h:240
bool isFilename() const
Definition: InputInfo.h:76
const llvm::opt::OptTable & getOpts() const
Definition: Driver.h:280
virtual CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const
Definition: ToolChain.cpp:599
const char * addTempFile(const char *Name)
addTempFile - Add a file to remove on exit, and returns its argument.
Definition: Compilation.h:216
virtual void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Add arguments to use MCU GCC toolchain includes.
Definition: ToolChain.cpp:750
virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const
Add options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:567
StringRef getBinPath() const
Get the detected path to Cuda&#39;s bin directory.
Definition: Cuda.h:68
const char * getOffloadingArch() const
Definition: Action.h:183
void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use MCU GCC toolchain includes.
Definition: Cuda.cpp:635
ToolChain - Access to tools for a single platform.
Definition: ToolChain.h:72
std::string ResourceDir
The path to the compiler resource directory.
Definition: Driver.h:139