clang 23.0.0git
Cuda.cpp
Go to the documentation of this file.
1//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Cuda.h"
10#include "clang/Basic/Cuda.h"
11#include "clang/Config/config.h"
14#include "clang/Driver/Distro.h"
15#include "clang/Driver/Driver.h"
18#include "llvm/ADT/SmallSet.h"
19#include "llvm/ADT/StringExtras.h"
20#include "llvm/Config/llvm-config.h" // for LLVM_HOST_TRIPLE
21#include "llvm/Option/ArgList.h"
22#include "llvm/Support/FileSystem.h"
23#include "llvm/Support/Path.h"
24#include "llvm/Support/Process.h"
25#include "llvm/Support/Program.h"
26#include "llvm/Support/VirtualFileSystem.h"
27#include "llvm/TargetParser/Host.h"
28#include "llvm/TargetParser/TargetParser.h"
29#include <system_error>
30
31using namespace clang::driver;
32using namespace clang::driver::toolchains;
33using namespace clang::driver::tools;
34using namespace clang;
35using namespace llvm::opt;
36
37namespace {
38
39CudaVersion getCudaVersion(uint32_t raw_version) {
40 if (raw_version < 7050)
42 if (raw_version < 8000)
44 if (raw_version < 9000)
46 if (raw_version < 9010)
48 if (raw_version < 9020)
50 if (raw_version < 10000)
52 if (raw_version < 10010)
54 if (raw_version < 10020)
56 if (raw_version < 11000)
58 if (raw_version < 11010)
60 if (raw_version < 11020)
62 if (raw_version < 11030)
64 if (raw_version < 11040)
66 if (raw_version < 11050)
68 if (raw_version < 11060)
70 if (raw_version < 11070)
72 if (raw_version < 11080)
74 if (raw_version < 11090)
76 if (raw_version < 12010)
78 if (raw_version < 12020)
80 if (raw_version < 12030)
82 if (raw_version < 12040)
84 if (raw_version < 12050)
86 if (raw_version < 12060)
88 if (raw_version < 12070)
90 if (raw_version < 12090)
92 if (raw_version < 13000)
94 if (raw_version < 13010)
96 if (raw_version < 13020)
98 if (raw_version < 13030)
100 return CudaVersion::NEW;
101}
102
103CudaVersion parseCudaHFile(llvm::StringRef Input) {
104 // Helper lambda which skips the words if the line starts with them or returns
105 // std::nullopt otherwise.
106 auto StartsWithWords =
107 [](llvm::StringRef Line,
108 const SmallVector<StringRef, 3> words) -> std::optional<StringRef> {
109 for (StringRef word : words) {
110 if (!Line.consume_front(word))
111 return {};
112 Line = Line.ltrim();
113 }
114 return Line;
115 };
116
117 Input = Input.ltrim();
118 while (!Input.empty()) {
119 if (auto Line =
120 StartsWithWords(Input.ltrim(), {"#", "define", "CUDA_VERSION"})) {
121 uint32_t RawVersion;
122 Line->consumeInteger(10, RawVersion);
123 return getCudaVersion(RawVersion);
124 }
125 // Find next non-empty line.
126 Input = Input.drop_front(Input.find_first_of("\n\r")).ltrim();
127 }
129}
130} // namespace
131
133 if (Version > CudaVersion::PARTIALLY_SUPPORTED) {
134 std::string VersionString = CudaVersionToString(Version);
135 if (!VersionString.empty())
136 VersionString.insert(0, " ");
137 D.Diag(diag::warn_drv_new_cuda_version)
138 << VersionString
141 } else if (Version > CudaVersion::FULLY_SUPPORTED)
142 D.Diag(diag::warn_drv_partially_supported_cuda_version)
143 << CudaVersionToString(Version);
144}
145
147 const Driver &D, const llvm::Triple &HostTriple,
148 const llvm::opt::ArgList &Args)
149 : D(D) {
150 struct Candidate {
151 std::string Path;
152 bool StrictChecking;
153
154 Candidate(std::string Path, bool StrictChecking = false)
155 : Path(Path), StrictChecking(StrictChecking) {}
156 };
157 SmallVector<Candidate, 4> Candidates;
158
159 // In decreasing order so we prefer newer versions to older versions.
160 std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
161 auto &FS = D.getVFS();
162
163 if (Args.hasArg(options::OPT_cuda_path_EQ)) {
164 Candidates.emplace_back(
165 Args.getLastArgValue(options::OPT_cuda_path_EQ).str());
166 } else if (HostTriple.isOSWindows()) {
167 for (const char *Ver : Versions)
168 Candidates.emplace_back(
169 D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
170 Ver);
171 } else {
172 if (!Args.hasArg(options::OPT_cuda_path_ignore_env)) {
173 // Try to find ptxas binary. If the executable is located in a directory
174 // called 'bin/', its parent directory might be a good guess for a valid
175 // CUDA installation.
176 // However, some distributions might installs 'ptxas' to /usr/bin. In that
177 // case the candidate would be '/usr' which passes the following checks
178 // because '/usr/include' exists as well. To avoid this case, we always
179 // check for the directory potentially containing files for libdevice,
180 // even if the user passes -nocudalib.
181 if (llvm::ErrorOr<std::string> ptxas =
182 llvm::sys::findProgramByName("ptxas")) {
183 SmallString<256> ptxasAbsolutePath;
184 llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
185
186 StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
187 if (llvm::sys::path::filename(ptxasDir) == "bin")
188 Candidates.emplace_back(
189 std::string(llvm::sys::path::parent_path(ptxasDir)),
190 /*StrictChecking=*/true);
191 }
192 }
193
194 Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
195 for (const char *Ver : Versions)
196 Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
197
198 Distro Dist(FS, llvm::Triple(llvm::sys::getProcessTriple()));
199 if (Dist.IsDebian() || Dist.IsUbuntu())
200 // Special case for Debian to have nvidia-cuda-toolkit work
201 // out of the box. More info on http://bugs.debian.org/882505
202 Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
203 }
204
205 bool NoCudaLib =
206 !Args.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib, true);
207
208 for (const auto &Candidate : Candidates) {
209 InstallPath = Candidate.Path;
210 if (InstallPath.empty() || !FS.exists(InstallPath))
211 continue;
212
213 BinPath = InstallPath + "/bin";
214 IncludePath = InstallPath + "/include";
215 LibDevicePath = InstallPath + "/nvvm/libdevice";
216
217 if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
218 continue;
219 bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking);
220 if (CheckLibDevice && !FS.exists(LibDevicePath))
221 continue;
222
223 Version = CudaVersion::UNKNOWN;
224 if (auto CudaHFile = FS.getBufferForFile(InstallPath + "/include/cuda.h"))
225 Version = parseCudaHFile((*CudaHFile)->getBuffer());
226 // As the last resort, make an educated guess between CUDA-7.0, which had
227 // old-style libdevice bitcode, and an unknown recent CUDA version.
228 if (Version == CudaVersion::UNKNOWN) {
229 Version = FS.exists(LibDevicePath + "/libdevice.10.bc")
232 }
233
234 if (Version >= CudaVersion::CUDA_90) {
235 // CUDA-9+ uses single libdevice file for all GPU variants.
236 std::string FilePath = LibDevicePath + "/libdevice.10.bc";
237 if (FS.exists(FilePath)) {
238 for (int Arch = (int)OffloadArch::SM_30, E = (int)OffloadArch::LAST;
239 Arch < E; ++Arch) {
240 OffloadArch OA = static_cast<OffloadArch>(Arch);
241 if (!IsNVIDIAOffloadArch(OA))
242 continue;
243 std::string OffloadArchName(OffloadArchToString(OA));
244 LibDeviceMap[OffloadArchName] = FilePath;
245 }
246 }
247 } else {
248 std::error_code EC;
249 for (llvm::vfs::directory_iterator LI = FS.dir_begin(LibDevicePath, EC),
250 LE;
251 !EC && LI != LE; LI = LI.increment(EC)) {
252 StringRef FilePath = LI->path();
253 StringRef FileName = llvm::sys::path::filename(FilePath);
254 // Process all bitcode filenames that look like
255 // libdevice.compute_XX.YY.bc
256 const StringRef LibDeviceName = "libdevice.";
257 if (!(FileName.starts_with(LibDeviceName) && FileName.ends_with(".bc")))
258 continue;
259 StringRef GpuArch = FileName.slice(
260 LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
261 LibDeviceMap[GpuArch] = FilePath.str();
262 // Insert map entries for specific devices with this compute
263 // capability. NVCC's choice of the libdevice library version is
264 // rather peculiar and depends on the CUDA version.
265 if (GpuArch == "compute_20") {
266 LibDeviceMap["sm_20"] = std::string(FilePath);
267 LibDeviceMap["sm_21"] = std::string(FilePath);
268 LibDeviceMap["sm_32"] = std::string(FilePath);
269 } else if (GpuArch == "compute_30") {
270 LibDeviceMap["sm_30"] = std::string(FilePath);
271 if (Version < CudaVersion::CUDA_80) {
272 LibDeviceMap["sm_50"] = std::string(FilePath);
273 LibDeviceMap["sm_52"] = std::string(FilePath);
274 LibDeviceMap["sm_53"] = std::string(FilePath);
275 }
276 LibDeviceMap["sm_60"] = std::string(FilePath);
277 LibDeviceMap["sm_61"] = std::string(FilePath);
278 LibDeviceMap["sm_62"] = std::string(FilePath);
279 } else if (GpuArch == "compute_35") {
280 LibDeviceMap["sm_35"] = std::string(FilePath);
281 LibDeviceMap["sm_37"] = std::string(FilePath);
282 } else if (GpuArch == "compute_50") {
283 if (Version >= CudaVersion::CUDA_80) {
284 LibDeviceMap["sm_50"] = std::string(FilePath);
285 LibDeviceMap["sm_52"] = std::string(FilePath);
286 LibDeviceMap["sm_53"] = std::string(FilePath);
287 }
288 }
289 }
290 }
291
292 // Check that we have found at least one libdevice that we can link in if
293 // -nocudalib hasn't been specified.
294 if (LibDeviceMap.empty() && !NoCudaLib)
295 continue;
296
297 IsValid = true;
298 break;
299 }
300}
301
303 const ArgList &DriverArgs, ArgStringList &CC1Args) const {
304 if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
305 // Add cuda_wrappers/* to our system include path. This lets us wrap
306 // standard library headers.
307 SmallString<128> P(D.ResourceDir);
308 llvm::sys::path::append(P, "include");
309 llvm::sys::path::append(P, "cuda_wrappers");
310 CC1Args.push_back("-internal-isystem");
311 CC1Args.push_back(DriverArgs.MakeArgString(P));
312 }
313
314 if (!DriverArgs.hasFlag(options::OPT_offload_inc, options::OPT_no_offload_inc,
315 true))
316 return;
317
318 if (!isValid()) {
319 D.Diag(diag::err_drv_no_cuda_installation);
320 return;
321 }
322
323 CC1Args.push_back("-include");
324 CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
325}
326
328 OffloadArch Arch) const {
329 if (Arch == OffloadArch::Unknown || Version == CudaVersion::UNKNOWN ||
330 ArchsWithBadVersion[(int)Arch])
331 return;
332
333 auto MinVersion = MinVersionForOffloadArch(Arch);
334 auto MaxVersion = MaxVersionForOffloadArch(Arch);
335 if (Version < MinVersion || Version > MaxVersion) {
336 ArchsWithBadVersion[(int)Arch] = true;
337 D.Diag(diag::err_drv_cuda_version_unsupported)
339 << CudaVersionToString(MaxVersion) << InstallPath
340 << CudaVersionToString(Version);
341 }
342}
343
344void CudaInstallationDetector::print(raw_ostream &OS) const {
345 if (isValid())
346 OS << "Found CUDA installation: " << InstallPath << ", version "
347 << CudaVersionToString(Version) << "\n";
348}
349
350namespace {
351/// Debug info level for the NVPTX devices. We may need to emit different debug
352/// info level for the host and for the device itselfi. This type controls
353/// emission of the debug info for the devices. It either prohibits disable info
354/// emission completely, or emits debug directives only, or emits same debug
355/// info as for the host.
356enum DeviceDebugInfoLevel {
357 DisableDebugInfo, /// Do not emit debug info for the devices.
358 DebugDirectivesOnly, /// Emit only debug directives.
359 EmitSameDebugInfoAsHost, /// Use the same debug info level just like for the
360 /// host.
361};
362} // anonymous namespace
363
364/// Define debug info level for the NVPTX devices. If the debug info for both
365/// the host and device are disabled (-g0/-ggdb0 or no debug options at all). If
366/// only debug directives are requested for the both host and device
367/// (-gline-directvies-only), or the debug info only for the device is disabled
368/// (optimization is on and --cuda-noopt-device-debug was not specified), the
369/// debug directves only must be emitted for the device. Otherwise, use the same
370/// debug info level just like for the host (with the limitations of only
371/// supported DWARF2 standard).
372static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) {
373 const Arg *A = Args.getLastArg(options::OPT_O_Group);
374 bool IsDebugEnabled = !A || A->getOption().matches(options::OPT_O0) ||
375 Args.hasFlag(options::OPT_cuda_noopt_device_debug,
376 options::OPT_no_cuda_noopt_device_debug,
377 /*Default=*/false);
378 if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
379 const Option &Opt = A->getOption();
380 if (Opt.matches(options::OPT_gN_Group)) {
381 if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
382 return DisableDebugInfo;
383 if (Opt.matches(options::OPT_gline_directives_only))
384 return DebugDirectivesOnly;
385 }
386 return IsDebugEnabled ? EmitSameDebugInfoAsHost : DebugDirectivesOnly;
387 }
388 return willEmitRemarks(Args) ? DebugDirectivesOnly : DisableDebugInfo;
389}
390
392 const InputInfo &Output,
393 const InputInfoList &Inputs,
394 const ArgList &Args,
395 const char *LinkingOutput) const {
396 const auto &TC =
397 static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
398 assert(TC.getTriple().isNVPTX() && "Wrong platform");
399
400 StringRef GPUArchName;
401 // If this is a CUDA action we need to extract the device architecture
402 // from the Job's associated architecture, otherwise use the -march=arch
403 // option. This option may come from -Xopenmp-target flag or the default
404 // value.
406 GPUArchName = JA.getOffloadingArch();
407 } else {
408 GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
409 if (GPUArchName.empty()) {
410 C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch)
411 << getToolChain().getArchName() << getShortName();
412 return;
413 }
414 }
415
416 // Obtain architecture from the action.
417 OffloadArch gpu_arch = StringToOffloadArch(GPUArchName);
418 assert(gpu_arch != OffloadArch::Unknown &&
419 "Device action expected to have an architecture.");
420
421 // Check that our installation's ptxas supports gpu_arch.
422 if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
423 TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
424 }
425
426 ArgStringList CmdArgs;
427 CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
428 DeviceDebugInfoLevel DIKind = mustEmitDebugInfo(Args);
429 if (DIKind == EmitSameDebugInfoAsHost) {
430 // ptxas does not accept -g option if optimization is enabled, so
431 // we ignore the compiler's -O* options if we want debug info.
432 CmdArgs.push_back("-g");
433 CmdArgs.push_back("--dont-merge-basicblocks");
434 CmdArgs.push_back("--return-at-end");
435 } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
436 // Map the -O we received to -O{0,1,2,3}.
437 //
438 // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
439 // default, so it may correspond more closely to the spirit of clang -O2.
440
441 // -O3 seems like the least-bad option when -Osomething is specified to
442 // clang but it isn't handled below.
443 StringRef OOpt = "3";
444 if (A->getOption().matches(options::OPT_O4) ||
445 A->getOption().matches(options::OPT_Ofast))
446 OOpt = "3";
447 else if (A->getOption().matches(options::OPT_O0))
448 OOpt = "0";
449 else if (A->getOption().matches(options::OPT_O)) {
450 // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
451 OOpt = llvm::StringSwitch<const char *>(A->getValue())
452 .Case("1", "1")
453 .Case("2", "2")
454 .Case("3", "3")
455 .Case("s", "2")
456 .Case("z", "2")
457 .Default("2");
458 }
459 CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
460 } else {
461 // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
462 // to no optimizations, but ptxas's default is -O3.
463 CmdArgs.push_back("-O0");
464 }
465 if (DIKind == DebugDirectivesOnly)
466 CmdArgs.push_back("-lineinfo");
467
468 // Pass -v to ptxas if it was passed to the driver.
469 if (Args.hasArg(options::OPT_v))
470 CmdArgs.push_back("-v");
471
472 CmdArgs.push_back("--gpu-name");
473 CmdArgs.push_back(Args.MakeArgString(OffloadArchToString(gpu_arch)));
474 CmdArgs.push_back("--output-file");
475 std::string OutputFileName = TC.getInputFilename(Output);
476
477 if (Output.isFilename() && OutputFileName != Output.getFilename())
478 C.addTempFile(Args.MakeArgString(OutputFileName));
479
480 CmdArgs.push_back(Args.MakeArgString(OutputFileName));
481 for (const auto &II : Inputs)
482 CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
483
484 for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
485 CmdArgs.push_back(Args.MakeArgString(A));
486
487 bool Relocatable;
489 // In OpenMP we need to generate relocatable code.
490 Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
491 options::OPT_fnoopenmp_relocatable_target,
492 /*Default=*/true);
493 else if (JA.isOffloading(Action::OFK_Cuda))
494 // In CUDA we generate relocatable code by default.
495 Relocatable = Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
496 /*Default=*/false);
497 else
498 // Otherwise, we are compiling directly and should create linkable output.
499 Relocatable = true;
500
501 if (Relocatable)
502 CmdArgs.push_back("-c");
503
504 const char *Exec;
505 if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
506 Exec = A->getValue();
507 else
508 Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
509 C.addCommand(std::make_unique<Command>(
510 JA, *this,
512 "--options-file"},
513 Exec, CmdArgs, Inputs, Output));
514}
515
516static bool shouldIncludePTX(const ArgList &Args, StringRef InputArch) {
517 // The new driver does not include PTX by default to avoid overhead.
518 bool includePTX = !Args.hasFlag(options::OPT_offload_new_driver,
519 options::OPT_no_offload_new_driver, true);
520 for (Arg *A : Args.filtered(options::OPT_cuda_include_ptx_EQ,
521 options::OPT_no_cuda_include_ptx_EQ)) {
522 A->claim();
523 const StringRef ArchStr = A->getValue();
524 if (A->getOption().matches(options::OPT_cuda_include_ptx_EQ) &&
525 (ArchStr == "all" || ArchStr == InputArch))
526 includePTX = true;
527 else if (A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ) &&
528 (ArchStr == "all" || ArchStr == InputArch))
529 includePTX = false;
530 }
531 return includePTX;
532}
533
534// All inputs to this linker must be from CudaDeviceActions, as we need to look
535// at the Inputs' Actions in order to figure out which GPU architecture they
536// correspond to.
538 const InputInfo &Output,
539 const InputInfoList &Inputs,
540 const ArgList &Args,
541 const char *LinkingOutput) const {
542 const auto &TC =
543 static_cast<const toolchains::CudaToolChain &>(getToolChain());
544 assert(TC.getTriple().isNVPTX() && "Wrong platform");
545
546 ArgStringList CmdArgs;
547 if (TC.CudaInstallation.version() <= CudaVersion::CUDA_100)
548 CmdArgs.push_back("--cuda");
549 CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
550 CmdArgs.push_back(Args.MakeArgString("--create"));
551 CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
552 if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
553 CmdArgs.push_back("-g");
554
555 for (const auto &II : Inputs) {
556 auto *A = II.getAction();
557 assert(A->getInputs().size() == 1 &&
558 "Device offload action is expected to have a single input");
559 StringRef GpuArch = A->getOffloadingArch();
560 assert(!GpuArch.empty() &&
561 "Device action expected to have associated a GPU architecture!");
562
563 if (II.getType() == types::TY_PP_Asm && !shouldIncludePTX(Args, GpuArch))
564 continue;
565 StringRef Kind = (II.getType() == types::TY_PP_Asm) ? "ptx" : "elf";
566 CmdArgs.push_back(Args.MakeArgString(
567 "--image3=kind=" + Kind + ",sm=" + GpuArch.drop_front(3) +
568 ",file=" + getToolChain().getInputFilename(II)));
569 }
570
571 for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
572 CmdArgs.push_back(Args.MakeArgString(A));
573
574 const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
575 C.addCommand(std::make_unique<Command>(
576 JA, *this,
578 "--options-file"},
579 Exec, CmdArgs, Inputs, Output));
580}
581
583 const InputInfo &Output,
584 const InputInfoList &Inputs,
585 const ArgList &Args,
586 const char *LinkingOutput) const {
587 const auto &TC =
588 static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
589 ArgStringList CmdArgs;
590
591 assert(TC.getTriple().isNVPTX() && "Wrong platform");
592
593 assert((Output.isFilename() || Output.isNothing()) && "Invalid output.");
594 if (Output.isFilename()) {
595 CmdArgs.push_back("-o");
596 CmdArgs.push_back(Output.getFilename());
597 }
598
599 if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
600 CmdArgs.push_back("-g");
601
602 if (Args.hasArg(options::OPT_v))
603 CmdArgs.push_back("-v");
604
605 StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ);
606 if (GPUArch.empty() && !C.getDriver().isUsingLTO()) {
607 C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch)
608 << getToolChain().getArchName() << getShortName();
609 return;
610 }
611
612 if (!GPUArch.empty()) {
613 CmdArgs.push_back("-arch");
614 CmdArgs.push_back(Args.MakeArgString(GPUArch));
615 }
616
617 if (Args.hasArg(options::OPT_ptxas_path_EQ))
618 CmdArgs.push_back(Args.MakeArgString(
619 "--pxtas-path=" + Args.getLastArgValue(options::OPT_ptxas_path_EQ)));
620
621 if (Args.hasArg(options::OPT_cuda_path_EQ) || TC.CudaInstallation.isValid()) {
622 StringRef CudaPath = Args.getLastArgValue(
623 options::OPT_cuda_path_EQ,
624 llvm::sys::path::parent_path(TC.CudaInstallation.getBinPath()));
625 CmdArgs.push_back(Args.MakeArgString("--cuda-path=" + CudaPath));
626 }
627
628 // Add paths specified in LIBRARY_PATH environment variable as -L options.
629 addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
630
631 // Add standard library search paths passed on the command line.
632 Args.AddAllArgs(CmdArgs, options::OPT_L);
633 getToolChain().AddFilePathLibArgs(Args, CmdArgs);
634 AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
635
636 if (C.getDriver().isUsingLTO())
637 addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs,
638 C.getDriver().getLTOMode() == LTOK_Thin);
639
640 // Forward the PTX features if the nvlink-wrapper needs it.
641 std::vector<StringRef> Features;
642 getNVPTXTargetFeatures(C.getDriver(), getToolChain().getTriple(), Args,
643 Features);
644 CmdArgs.push_back(
645 Args.MakeArgString("--plugin-opt=-mattr=" + llvm::join(Features, ",")));
646
647 // Add paths for the default clang library path.
648 SmallString<256> DefaultLibPath =
649 llvm::sys::path::parent_path(TC.getDriver().Dir);
650 llvm::sys::path::append(DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME);
651 CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
652
653 getToolChain().addProfileRTLibs(Args, CmdArgs);
654 addSanitizerRuntimes(getToolChain(), Args, CmdArgs);
655
656 if (Args.hasArg(options::OPT_stdlib))
657 CmdArgs.append({"-lc", "-lm"});
658 if (Args.hasArg(options::OPT_startfiles)) {
659 std::optional<std::string> IncludePath = getToolChain().getStdlibPath();
660 if (!IncludePath)
661 IncludePath = "/lib";
662 SmallString<128> P(*IncludePath);
663 llvm::sys::path::append(P, "crt1.o");
664 CmdArgs.push_back(Args.MakeArgString(P));
665 }
666
667 C.addCommand(std::make_unique<Command>(
668 JA, *this,
670 "--options-file"},
671 Args.MakeArgString(getToolChain().GetProgramPath("clang-nvlink-wrapper")),
672 CmdArgs, Inputs, Output));
673}
674
675void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
676 const llvm::opt::ArgList &Args,
677 std::vector<StringRef> &Features) {
678 if (Args.hasArg(options::OPT_cuda_feature_EQ)) {
679 StringRef PtxFeature = Args.getLastArgValue(options::OPT_cuda_feature_EQ);
680 Features.push_back(Args.MakeArgString(PtxFeature));
681 return;
682 }
683 CudaInstallationDetector CudaInstallation(D, Triple, Args);
684
685 // New CUDA versions often introduce new instructions that are only supported
686 // by new PTX version, so we need to raise PTX level to enable them in NVPTX
687 // back-end.
688 const char *PtxFeature = nullptr;
689 switch (CudaInstallation.version()) {
690#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \
691 case CudaVersion::CUDA_##CUDA_VER: \
692 PtxFeature = "+ptx" #PTX_VER; \
693 break;
694 CASE_CUDA_VERSION(132, 92);
695 CASE_CUDA_VERSION(131, 91);
696 CASE_CUDA_VERSION(130, 90);
697 CASE_CUDA_VERSION(129, 88);
698 CASE_CUDA_VERSION(128, 87);
699 CASE_CUDA_VERSION(126, 85);
700 CASE_CUDA_VERSION(125, 85);
701 CASE_CUDA_VERSION(124, 84);
702 CASE_CUDA_VERSION(123, 83);
703 CASE_CUDA_VERSION(122, 82);
704 CASE_CUDA_VERSION(121, 81);
705 CASE_CUDA_VERSION(120, 80);
706 CASE_CUDA_VERSION(118, 78);
707 CASE_CUDA_VERSION(117, 77);
708 CASE_CUDA_VERSION(116, 76);
709 CASE_CUDA_VERSION(115, 75);
710 CASE_CUDA_VERSION(114, 74);
711 CASE_CUDA_VERSION(113, 73);
712 CASE_CUDA_VERSION(112, 72);
713 CASE_CUDA_VERSION(111, 71);
714 CASE_CUDA_VERSION(110, 70);
715 CASE_CUDA_VERSION(102, 65);
716 CASE_CUDA_VERSION(101, 64);
717 CASE_CUDA_VERSION(100, 63);
718 CASE_CUDA_VERSION(92, 61);
719 CASE_CUDA_VERSION(91, 61);
720 CASE_CUDA_VERSION(90, 60);
721 CASE_CUDA_VERSION(80, 50);
722 CASE_CUDA_VERSION(75, 43);
723 CASE_CUDA_VERSION(70, 42);
724#undef CASE_CUDA_VERSION
725 // TODO: Use specific CUDA version once it's public.
727 PtxFeature = "+ptx86";
728 break;
729 default:
730 // No PTX feature specified; let the backend choose based on the target SM.
731 break;
732 }
733 if (PtxFeature)
734 Features.push_back(PtxFeature);
735}
736
737/// NVPTX toolchain. Our assembler is ptxas, and our linker is nvlink. This
738/// operates as a stand-alone version of the NVPTX tools without the host
739/// toolchain.
740NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
741 const llvm::Triple &HostTriple,
742 const ArgList &Args)
743 : ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args) {
744 if (CudaInstallation.isValid())
745 getProgramPaths().push_back(std::string(CudaInstallation.getBinPath()));
746 // Lookup binaries into the driver directory, this is used to
747 // discover the 'nvptx-arch' executable.
748 getProgramPaths().push_back(getDriver().Dir);
749}
750
751/// We only need the host triple to locate the CUDA binary utilities, use the
752/// system's default triple if not provided.
753NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
754 const ArgList &Args)
755 : NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args) {
756 loadMultilibsFromYAML(Args, D);
757}
758
759llvm::opt::DerivedArgList *
760NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
761 StringRef BoundArch,
762 Action::OffloadKind OffloadKind) const {
763 DerivedArgList *DAL = ToolChain::TranslateArgs(Args, BoundArch, OffloadKind);
764 if (!DAL)
765 DAL = new DerivedArgList(Args.getBaseArgs());
766
767 const OptTable &Opts = getDriver().getOpts();
768
769 for (Arg *A : Args)
770 if (!llvm::is_contained(*DAL, A))
771 DAL->append(A);
772
773 if (!DAL->hasArg(options::OPT_march_EQ) && OffloadKind != Action::OFK_None) {
774 DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
776 } else if (DAL->getLastArgValue(options::OPT_march_EQ) == "generic" &&
777 OffloadKind == Action::OFK_None) {
778 DAL->eraseArg(options::OPT_march_EQ);
779 } else if (DAL->getLastArgValue(options::OPT_march_EQ) == "native") {
780 auto GPUsOrErr = getSystemGPUArchs(Args);
781 if (!GPUsOrErr) {
782 getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
783 << getArchName() << llvm::toString(GPUsOrErr.takeError()) << "-march";
784 } else {
785 auto &GPUs = *GPUsOrErr;
786 if (llvm::SmallSet<std::string, 1>(GPUs.begin(), GPUs.end()).size() > 1)
787 getDriver().Diag(diag::warn_drv_multi_gpu_arch)
788 << getArchName() << llvm::join(GPUs, ", ") << "-march";
789 DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
790 Args.MakeArgString(GPUs.front()));
791 }
792 }
793
794 return DAL;
795}
796
798 const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
799 Action::OffloadKind DeviceOffloadingKind) const {}
800
801void NVPTXToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
802 ArgStringList &CC1Args) const {
803 if (DriverArgs.hasArg(options::OPT_nostdinc) ||
804 DriverArgs.hasArg(options::OPT_nostdlibinc))
805 return;
806
807 // Add multilib variant include paths in priority order.
808 for (const Multilib &M : getOrderedMultilibs()) {
809 if (M.isDefault())
810 continue;
811 if (std::optional<std::string> StdlibIncDir = getStdlibIncludePath()) {
812 SmallString<128> Dir(*StdlibIncDir);
813 llvm::sys::path::append(Dir, M.includeSuffix());
814 if (getDriver().getVFS().exists(Dir))
815 addSystemInclude(DriverArgs, CC1Args, Dir);
816 }
817 }
818
819 if (std::optional<std::string> Path = getStdlibIncludePath())
820 addSystemInclude(DriverArgs, CC1Args, *Path);
821}
822
823bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
824 const Option &O = A->getOption();
825 return (O.matches(options::OPT_gN_Group) &&
826 !O.matches(options::OPT_gmodules)) ||
827 O.matches(options::OPT_g_Flag) ||
828 O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) ||
829 O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) ||
830 O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) ||
831 O.matches(options::OPT_gdwarf_5) ||
832 O.matches(options::OPT_gcolumn_info);
833}
834
836 llvm::codegenoptions::DebugInfoKind &DebugInfoKind,
837 const ArgList &Args) const {
838 switch (mustEmitDebugInfo(Args)) {
839 case DisableDebugInfo:
840 DebugInfoKind = llvm::codegenoptions::NoDebugInfo;
841 break;
842 case DebugDirectivesOnly:
843 DebugInfoKind = llvm::codegenoptions::DebugDirectivesOnly;
844 break;
845 case EmitSameDebugInfoAsHost:
846 // Use same debug info level as the host.
847 break;
848 }
849}
850
852NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const {
853 // Detect NVIDIA GPUs availible on the system.
854 std::string Program;
855 if (Arg *A = Args.getLastArg(options::OPT_offload_arch_tool_EQ))
856 Program = A->getValue();
857 else
858 Program = GetProgramPath("nvptx-arch");
859
860 auto StdoutOrErr = getDriver().executeProgram({Program});
861 if (!StdoutOrErr)
862 return StdoutOrErr.takeError();
863
865 for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n"))
866 if (!Arch.empty())
867 GPUArchs.push_back(Arch.str());
868
869 if (GPUArchs.empty())
870 return llvm::createStringError(std::error_code(),
871 "No NVIDIA GPU detected in the system");
872
873 return std::move(GPUArchs);
874}
875
876/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
877/// which isn't properly a linker but nonetheless performs the step of stitching
878/// together object files from the assembler into a single blob.
879
880CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
881 const ToolChain &HostTC, const ArgList &Args)
882 : NVPTXToolChain(D, Triple, HostTC.getTriple(), Args), HostTC(HostTC) {}
883
885 const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
886 Action::OffloadKind DeviceOffloadingKind) const {
887 HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
888
889 StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
890 assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
891 DeviceOffloadingKind == Action::OFK_Cuda) &&
892 "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
893
894 CC1Args.append({"-fcuda-is-device", "-mllvm",
895 "-enable-memcpyopt-without-libcalls",
896 "-fno-threadsafe-statics"});
897
898 if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
899 options::OPT_fno_cuda_short_ptr, false))
900 CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
901
902 if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib,
903 true))
904 return;
905
906 if (DeviceOffloadingKind == Action::OFK_OpenMP &&
907 DriverArgs.hasArg(options::OPT_S))
908 return;
909
910 std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
911 if (LibDeviceFile.empty()) {
912 getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
913 return;
914 }
915
916 CC1Args.push_back("-mlink-builtin-bitcode");
917 CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
918
919 // For now, we don't use any Offload/OpenMP device runtime when we offload
920 // CUDA via LLVM/Offload. We should split the Offload/OpenMP device runtime
921 // and include the "generic" (or CUDA-specific) parts.
922 if (DriverArgs.hasFlag(options::OPT_foffload_via_llvm,
923 options::OPT_fno_offload_via_llvm, false))
924 return;
925
926 clang::CudaVersion CudaInstallationVersion = CudaInstallation.version();
927
928 if (CudaInstallationVersion >= CudaVersion::UNKNOWN)
929 CC1Args.push_back(
930 DriverArgs.MakeArgString(Twine("-target-sdk-version=") +
931 CudaVersionToString(CudaInstallationVersion)));
932
933 if (DeviceOffloadingKind == Action::OFK_OpenMP) {
934 if (CudaInstallationVersion < CudaVersion::CUDA_92) {
935 getDriver().Diag(
936 diag::err_drv_omp_offload_target_cuda_version_not_support)
937 << CudaVersionToString(CudaInstallationVersion);
938 return;
939 }
940
941 // Link the bitcode library late if we're using device LTO.
942 if (getDriver().isUsingOffloadLTO())
943 return;
944
945 addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, GpuArch.str(),
946 getTriple(), HostTC);
947 }
948}
949
951 const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
952 const llvm::fltSemantics *FPType) const {
954 if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
955 DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
956 options::OPT_fno_gpu_flush_denormals_to_zero, false))
957 return llvm::DenormalMode::getPreserveSign();
958 }
959
961 return llvm::DenormalMode::getIEEE();
962}
963
964void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
965 ArgStringList &CC1Args) const {
966 // Check our CUDA version if we're going to include the CUDA headers.
967 if (DriverArgs.hasFlag(options::OPT_offload_inc, options::OPT_no_offload_inc,
968 true) &&
969 !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
970 StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
971 assert(!Arch.empty() && "Must have an explicit GPU arch.");
972 CudaInstallation.CheckCudaVersionSupportsArch(StringToOffloadArch(Arch));
973 }
974 CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
975}
976
977std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
978 // Only object files are changed, for example assembly files keep their .s
979 // extensions. If the user requested device-only compilation don't change it.
980 if (Input.getType() != types::TY_Object || getDriver().offloadDeviceOnly())
981 return ToolChain::getInputFilename(Input);
982
983 return ToolChain::getInputFilename(Input);
984}
985
986llvm::opt::DerivedArgList *
987CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
988 StringRef BoundArch,
989 Action::OffloadKind DeviceOffloadKind) const {
990 DerivedArgList *DAL =
991 HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
992 if (!DAL)
993 DAL = new DerivedArgList(Args.getBaseArgs());
994
995 const OptTable &Opts = getDriver().getOpts();
996
997 for (Arg *A : Args) {
998 // Make sure flags are not duplicated.
999 if (!llvm::is_contained(*DAL, A)) {
1000 DAL->append(A);
1001 }
1002 }
1003
1004 if (!BoundArch.empty()) {
1005 DAL->eraseArg(options::OPT_march_EQ);
1006 DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
1007 BoundArch);
1008 }
1009 return DAL;
1010}
1011
1013 return new tools::NVPTX::Assembler(*this);
1014}
1015
1017 return new tools::NVPTX::Linker(*this);
1018}
1019
1021 return new tools::NVPTX::Assembler(*this);
1022}
1023
1025 return new tools::NVPTX::FatBinary(*this);
1026}
1027
1028void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
1029 HostTC.addClangWarningOptions(CC1Args);
1030}
1031
1033CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
1034 return HostTC.GetCXXStdlibType(Args);
1035}
1036
1037void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
1038 ArgStringList &CC1Args) const {
1039 HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
1040
1041 if (DriverArgs.hasFlag(options::OPT_offload_inc, options::OPT_no_offload_inc,
1042 true) &&
1043 CudaInstallation.isValid())
1044 CC1Args.append(
1045 {"-internal-isystem",
1046 DriverArgs.MakeArgString(CudaInstallation.getIncludePath())});
1047}
1048
1050 ArgStringList &CC1Args) const {
1051 HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
1052}
1053
1054void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
1055 ArgStringList &CC1Args) const {
1056 HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
1057}
1058
1060 StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const {
1061 // The CudaToolChain only supports sanitizers in the sense that it allows
1062 // sanitizer arguments on the command line if they are supported by the host
1063 // toolchain. The CudaToolChain will actually ignore any command line
1064 // arguments for any of these "supported" sanitizers. That means that no
1065 // sanitization of device code is actually supported at this time.
1066 //
1067 // This behavior is necessary because the host and device toolchains
1068 // invocations often share the command line, so the device toolchain must
1069 // tolerate flags meant only for the host toolchain.
1070
1071 // FIXME: Be accurate and use DeviceOffloadKind.
1072 return HostTC.getSupportedSanitizers(BoundArch, DeviceOffloadKind);
1073}
1074
1076 const ArgList &Args) const {
1077 return HostTC.computeMSVCVersion(D, Args);
1078}
static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args)
Define debug info level for the NVPTX devices.
Definition Cuda.cpp:372
static bool shouldIncludePTX(const ArgList &Args, StringRef InputArch)
Definition Cuda.cpp:516
#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER)
static StringRef getTriple(const Command &Job)
const char * getOffloadingArch() const
Definition Action.h:216
OffloadKind getOffloadingDeviceKind() const
Definition Action.h:215
bool isDeviceOffloading(OffloadKind OKind) const
Definition Action.h:226
bool isOffloading(OffloadKind OKind) const
Definition Action.h:229
Compilation - A set of tasks to perform for a single driver invocation.
Definition Compilation.h:45
A class to find a viable CUDA installation.
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Definition Cuda.cpp:302
CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args)
Definition Cuda.cpp:146
CudaVersion version() const
Get the detected Cuda install's version.
void CheckCudaVersionSupportsArch(OffloadArch Arch) const
Emit an error if Version does not support the given Arch.
Definition Cuda.cpp:327
void print(raw_ostream &OS) const
Print information about the detected CUDA installation.
Definition Cuda.cpp:344
bool isValid() const
Check whether we detected a valid Cuda install.
Distro - Helper class for detecting and classifying Linux distributions.
Definition Distro.h:23
bool IsDebian() const
Definition Distro.h:124
bool IsUbuntu() const
Definition Distro.h:128
Driver - Encapsulate logic for constructing compilation processes from a set of gcc-driver-like comma...
Definition Driver.h:99
DiagnosticBuilder Diag(unsigned DiagID) const
Definition Driver.h:169
const llvm::opt::OptTable & getOpts() const
Definition Driver.h:417
InputInfo - Wrapper for information about an input source.
Definition InputInfo.h:22
const char * getFilename() const
Definition InputInfo.h:83
bool isNothing() const
Definition InputInfo.h:74
bool isFilename() const
Definition InputInfo.h:75
types::ID getType() const
Definition InputInfo.h:77
This corresponds to a single GCC Multilib, or a segment of one controlled by a command line flag.
Definition Multilib.h:35
ToolChain - Access to tools for a single platform.
Definition ToolChain.h:93
static void addSystemInclude(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, const Twine &Path)
Utility function to add a system include directory to CC1 arguments.
virtual std::string getInputFilename(const InputInfo &Input) const
Some toolchains need to modify the file name, for example to replace the extension for object files w...
const Driver & getDriver() const
Definition ToolChain.h:283
llvm::vfs::FileSystem & getVFS() const
ToolChain(const Driver &D, const llvm::Triple &T, const llvm::opt::ArgList &Args)
Definition ToolChain.cpp:91
virtual llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition ToolChain.h:398
const llvm::Triple & getTriple() const
Definition ToolChain.h:285
OrderedMultilibs getOrderedMultilibs() const
Get selected multilibs in priority order with default fallback.
std::string GetProgramPath(const char *Name) const
std::optional< std::string > getStdlibIncludePath() const
StringRef getArchName() const
Definition ToolChain.h:300
Tool - Information on a specific compilation tool.
Definition Tool.h:32
const ToolChain & getToolChain() const
Definition Tool.h:52
const char * getShortName() const
Definition Tool.h:50
SanitizerMask getSupportedSanitizers(StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override
Return sanitizers which are available in this toolchain.
Definition Cuda.cpp:1059
std::string getInputFilename(const InputInfo &Input) const override
Some toolchains need to modify the file name, for example to replace the extension for object files w...
Definition Cuda.cpp:977
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use system-specific CUDA includes.
Definition Cuda.cpp:964
void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CC1Args) const override
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition Cuda.cpp:1049
void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override
Add warning options that need to be passed to cc1 for this target.
Definition Cuda.cpp:1028
void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use MCU GCC toolchain includes.
Definition Cuda.cpp:1054
void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const override
Add options that need to be passed to cc1 for this target.
Definition Cuda.cpp:884
VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const override
On Windows, returns the MSVC compatibility version.
Definition Cuda.cpp:1075
CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override
Definition Cuda.cpp:1033
CudaToolChain(const Driver &D, const llvm::Triple &Triple, const ToolChain &HostTC, const llvm::opt::ArgList &Args)
CUDA toolchain.
Definition Cuda.cpp:880
Tool * buildLinker() const override
Definition Cuda.cpp:1024
void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add the clang cc1 arguments for system include paths.
Definition Cuda.cpp:1037
Tool * buildAssembler() const override
Definition Cuda.cpp:1020
llvm::DenormalMode getDefaultDenormalModeForType(const llvm::opt::ArgList &DriverArgs, const JobAction &JA, const llvm::fltSemantics *FPType=nullptr) const override
Returns the output denormal handling type in the default floating point environment for the given FPT...
Definition Cuda.cpp:950
llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition Cuda.cpp:987
CudaInstallationDetector CudaInstallation
Definition Cuda.h:126
void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add the clang cc1 arguments for system include paths.
Definition Cuda.cpp:801
Tool * buildAssembler() const override
Definition Cuda.cpp:1012
Tool * buildLinker() const override
Definition Cuda.cpp:1016
llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition Cuda.cpp:760
bool supportsDebugInfoOption(const llvm::opt::Arg *A) const override
Does this toolchain supports given debug info option or not.
Definition Cuda.cpp:823
virtual Expected< SmallVector< std::string > > getSystemGPUArchs(const llvm::opt::ArgList &Args) const override
Uses nvptx-arch tool to get arch of the system GPU.
Definition Cuda.cpp:852
void adjustDebugInfoKind(llvm::codegenoptions::DebugInfoKind &DebugInfoKind, const llvm::opt::ArgList &Args) const override
Adjust debug information kind considering all passed options.
Definition Cuda.cpp:835
NVPTXToolChain(const Driver &D, const llvm::Triple &Triple, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args)
void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const override
Add options that need to be passed to cc1 for this target.
Definition Cuda.cpp:797
void ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const llvm::opt::ArgList &TCArgs, const char *LinkingOutput) const override
ConstructJob - Construct jobs to perform the action JA, writing to Output and with Inputs,...
Definition Cuda.cpp:391
void ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const llvm::opt::ArgList &TCArgs, const char *LinkingOutput) const override
ConstructJob - Construct jobs to perform the action JA, writing to Output and with Inputs,...
Definition Cuda.cpp:537
void ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const llvm::opt::ArgList &TCArgs, const char *LinkingOutput) const override
ConstructJob - Construct jobs to perform the action JA, writing to Output and with Inputs,...
Definition Cuda.cpp:582
void getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args, std::vector< StringRef > &Features)
Definition Cuda.cpp:675
void addOpenMPDeviceRTL(const Driver &D, const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, StringRef BitcodeSuffix, const llvm::Triple &Triple, const ToolChain &HostTC)
void addLTOOptions(const ToolChain &ToolChain, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const InputInfo &Output, const InputInfoList &Inputs, bool IsThinLTO)
void addDirectoryList(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const char *ArgName, const char *EnvVar)
EnvVar is split by system delimiter for environment variables.
bool addSanitizerRuntimes(const ToolChain &TC, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs)
void AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const JobAction &JA)
SmallVector< InputInfo, 4 > InputInfoList
Definition Driver.h:50
bool willEmitRemarks(const llvm::opt::ArgList &Args)
The JSON file list parser is used to communicate input to InstallAPI.
CudaVersion MaxVersionForOffloadArch(OffloadArch A)
Get the latest CudaVersion that supports the given OffloadArch.
Definition Cuda.cpp:146
static bool IsNVIDIAOffloadArch(OffloadArch A)
const char * CudaVersionToString(CudaVersion V)
Definition Cuda.cpp:56
OffloadArch StringToOffloadArch(llvm::StringRef S)
CudaVersion
Definition Cuda.h:22
@ PARTIALLY_SUPPORTED
Definition Cuda.h:55
const char * OffloadArchToString(OffloadArch A)
CudaVersion MinVersionForOffloadArch(OffloadArch A)
Get the earliest CudaVersion that supports the given OffloadArch.
Definition Cuda.cpp:79
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30