12 #include "clang/Config/config.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/Option/ArgList.h"
22 #include "llvm/Support/FileSystem.h"
23 #include "llvm/Support/Host.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/Process.h"
26 #include "llvm/Support/Program.h"
27 #include "llvm/Support/TargetParser.h"
28 #include "llvm/Support/VirtualFileSystem.h"
29 #include <system_error>
34 using namespace clang;
40 if (raw_version < 7050)
42 if (raw_version < 8000)
44 if (raw_version < 9000)
46 if (raw_version < 9010)
48 if (raw_version < 9020)
50 if (raw_version < 10000)
52 if (raw_version < 10010)
54 if (raw_version < 10020)
56 if (raw_version < 11000)
58 if (raw_version < 11010)
60 if (raw_version < 11020)
62 if (raw_version < 11030)
64 if (raw_version < 11040)
66 if (raw_version < 11050)
68 if (raw_version < 11060)
76 auto StartsWithWords =
77 [](llvm::StringRef
Line,
79 for (StringRef word : words) {
80 if (!
Line.consume_front(word))
87 Input = Input.ltrim();
88 while (!Input.empty()) {
90 StartsWithWords(Input.ltrim(), {
"#",
"define",
"CUDA_VERSION"})) {
92 Line->consumeInteger(10, RawVersion);
93 return getCudaVersion(RawVersion);
96 Input = Input.drop_front(Input.find_first_of(
"\n\r")).ltrim();
105 if (!VersionString.empty())
106 VersionString.insert(0,
" ");
107 D.Diag(diag::warn_drv_new_cuda_version)
112 D.Diag(diag::warn_drv_partially_supported_cuda_version)
117 const Driver &D,
const llvm::Triple &HostTriple,
118 const llvm::opt::ArgList &Args)
124 Candidate(
std::string Path,
bool StrictChecking =
false)
125 : Path(Path), StrictChecking(StrictChecking) {}
130 std::initializer_list<const char *> Versions = {
"8.0",
"7.5",
"7.0"};
133 if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
134 Candidates.emplace_back(
135 Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
136 }
else if (HostTriple.isOSWindows()) {
137 for (
const char *Ver : Versions)
138 Candidates.emplace_back(
139 D.
SysRoot +
"/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
142 if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
151 if (llvm::ErrorOr<std::string> ptxas =
152 llvm::sys::findProgramByName(
"ptxas")) {
154 llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
156 StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
157 if (llvm::sys::path::filename(ptxasDir) ==
"bin")
158 Candidates.emplace_back(
159 std::string(llvm::sys::path::parent_path(ptxasDir)),
164 Candidates.emplace_back(D.
SysRoot +
"/usr/local/cuda");
165 for (
const char *Ver : Versions)
166 Candidates.emplace_back(D.
SysRoot +
"/usr/local/cuda-" + Ver);
168 Distro Dist(FS, llvm::Triple(llvm::sys::getProcessTriple()));
172 Candidates.emplace_back(D.
SysRoot +
"/usr/lib/cuda");
175 bool NoCudaLib = Args.hasArg(options::OPT_nogpulib);
177 for (
const auto &Candidate : Candidates) {
178 InstallPath = Candidate.Path;
179 if (InstallPath.empty() || !FS.exists(InstallPath))
182 BinPath = InstallPath +
"/bin";
183 IncludePath = InstallPath +
"/include";
184 LibDevicePath = InstallPath +
"/nvvm/libdevice";
186 if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
188 bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking);
189 if (CheckLibDevice && !FS.exists(LibDevicePath))
198 if (HostTriple.isArch64Bit() && FS.exists(InstallPath +
"/lib64"))
199 LibPath = InstallPath +
"/lib64";
200 else if (FS.exists(InstallPath +
"/lib"))
201 LibPath = InstallPath +
"/lib";
206 if (
auto CudaHFile = FS.getBufferForFile(InstallPath +
"/include/cuda.h"))
207 Version = parseCudaHFile((*CudaHFile)->getBuffer());
211 Version = FS.exists(LibDevicePath +
"/libdevice.10.bc")
218 std::string FilePath = LibDevicePath +
"/libdevice.10.bc";
219 if (FS.exists(FilePath)) {
226 LibDeviceMap[GpuArchName] = FilePath;
231 for (llvm::vfs::directory_iterator LI = FS.dir_begin(LibDevicePath, EC),
233 !EC && LI !=
LE; LI = LI.increment(EC)) {
234 StringRef FilePath = LI->path();
235 StringRef FileName = llvm::sys::path::filename(FilePath);
238 const StringRef LibDeviceName =
"libdevice.";
239 if (!(FileName.startswith(LibDeviceName) && FileName.endswith(
".bc")))
241 StringRef GpuArch = FileName.slice(
242 LibDeviceName.size(), FileName.find(
'.', LibDeviceName.size()));
243 LibDeviceMap[GpuArch] = FilePath.str();
247 if (GpuArch ==
"compute_20") {
251 }
else if (GpuArch ==
"compute_30") {
261 }
else if (GpuArch ==
"compute_35") {
264 }
else if (GpuArch ==
"compute_50") {
276 if (LibDeviceMap.empty() && !NoCudaLib)
285 const ArgList &DriverArgs, ArgStringList &CC1Args)
const {
286 if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
290 llvm::sys::path::append(
P,
"include");
291 llvm::sys::path::append(
P,
"cuda_wrappers");
292 CC1Args.push_back(
"-internal-isystem");
293 CC1Args.push_back(DriverArgs.MakeArgString(
P));
296 if (DriverArgs.hasArg(options::OPT_nogpuinc))
300 D.
Diag(diag::err_drv_no_cuda_installation);
304 CC1Args.push_back(
"-include");
305 CC1Args.push_back(
"__clang_cuda_runtime_wrapper.h");
311 ArchsWithBadVersion[(
int)Arch])
316 if (Version < MinVersion || Version > MaxVersion) {
317 ArchsWithBadVersion[(
int)Arch] =
true;
318 D.
Diag(diag::err_drv_cuda_version_unsupported)
327 OS <<
"Found CUDA installation: " << InstallPath <<
", version "
337 enum DeviceDebugInfoLevel {
340 EmitSameDebugInfoAsHost,
354 const Arg *A = Args.getLastArg(options::OPT_O_Group);
355 bool IsDebugEnabled = !A || A->getOption().matches(options::OPT_O0) ||
356 Args.hasFlag(options::OPT_cuda_noopt_device_debug,
357 options::OPT_no_cuda_noopt_device_debug,
359 if (
const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
360 const Option &Opt = A->getOption();
361 if (Opt.matches(options::OPT_gN_Group)) {
362 if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
363 return DisableDebugInfo;
364 if (Opt.matches(options::OPT_gline_directives_only))
376 const char *LinkingOutput)
const {
379 assert(TC.getTriple().isNVPTX() &&
"Wrong platform");
381 StringRef GPUArchName;
386 GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
387 assert(!GPUArchName.empty() &&
"Must have an architecture passed in.");
394 "Device action expected to have an architecture.");
397 if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
398 TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
401 ArgStringList CmdArgs;
402 CmdArgs.push_back(TC.getTriple().isArch64Bit() ?
"-m64" :
"-m32");
404 if (DIKind == EmitSameDebugInfoAsHost) {
407 CmdArgs.push_back(
"-g");
408 CmdArgs.push_back(
"--dont-merge-basicblocks");
409 CmdArgs.push_back(
"--return-at-end");
410 }
else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
418 StringRef OOpt =
"3";
419 if (A->getOption().matches(options::OPT_O4) ||
420 A->getOption().matches(options::OPT_Ofast))
422 else if (A->getOption().matches(options::OPT_O0))
424 else if (A->getOption().matches(options::OPT_O)) {
426 OOpt = llvm::StringSwitch<const char *>(A->getValue())
434 CmdArgs.push_back(Args.MakeArgString(llvm::Twine(
"-O") + OOpt));
438 CmdArgs.push_back(
"-O0");
441 CmdArgs.push_back(
"-lineinfo");
444 if (Args.hasArg(options::OPT_v))
445 CmdArgs.push_back(
"-v");
447 CmdArgs.push_back(
"--gpu-name");
449 CmdArgs.push_back(
"--output-file");
450 const char *OutputFileName = Args.MakeArgString(TC.getInputFilename(Output));
452 C.addTempFile(OutputFileName);
453 CmdArgs.push_back(OutputFileName);
454 for (
const auto& II : Inputs)
455 CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
457 for (
const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
458 CmdArgs.push_back(Args.MakeArgString(A));
460 bool Relocatable =
false;
463 Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
464 options::OPT_fnoopenmp_relocatable_target,
467 Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
468 options::OPT_fno_gpu_rdc,
false);
471 CmdArgs.push_back(
"-c");
474 if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
475 Exec = A->getValue();
477 Exec = Args.MakeArgString(TC.GetProgramPath(
"ptxas"));
478 C.addCommand(std::make_unique<Command>(
482 Exec, CmdArgs, Inputs, Output));
486 bool includePTX =
true;
487 for (Arg *A : Args) {
488 if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
489 A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
492 const StringRef ArchStr = A->getValue();
493 if (ArchStr ==
"all" || ArchStr == gpu_arch) {
494 includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
508 const char *LinkingOutput)
const {
511 assert(TC.getTriple().isNVPTX() &&
"Wrong platform");
513 ArgStringList CmdArgs;
515 CmdArgs.push_back(
"--cuda");
516 CmdArgs.push_back(TC.getTriple().isArch64Bit() ?
"-64" :
"-32");
517 CmdArgs.push_back(Args.MakeArgString(
"--create"));
518 CmdArgs.push_back(Args.MakeArgString(Output.
getFilename()));
520 CmdArgs.push_back(
"-g");
522 for (
const auto& II : Inputs) {
523 auto *A = II.getAction();
524 assert(A->getInputs().size() == 1 &&
525 "Device offload action is expected to have a single input");
526 const char *gpu_arch_str = A->getOffloadingArch();
527 assert(gpu_arch_str &&
528 "Device action expected to have associated a GPU architecture!");
531 if (II.getType() == types::TY_PP_Asm &&
536 const char *Arch = (II.getType() == types::TY_PP_Asm)
539 CmdArgs.push_back(Args.MakeArgString(llvm::Twine(
"--image=profile=") +
540 Arch +
",file=" + II.getFilename()));
543 for (
const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
544 CmdArgs.push_back(Args.MakeArgString(A));
546 const char *Exec = Args.MakeArgString(TC.GetProgramPath(
"fatbinary"));
547 C.addCommand(std::make_unique<Command>(
551 Exec, CmdArgs, Inputs, Output));
558 const char *LinkingOutput)
const {
561 assert(TC.getTriple().isNVPTX() &&
"Wrong platform");
563 ArgStringList CmdArgs;
568 "CUDA toolchain not expected for an OpenMP host device.");
571 CmdArgs.push_back(
"-o");
574 assert(Output.
isNothing() &&
"Invalid output.");
576 CmdArgs.push_back(
"-g");
578 if (Args.hasArg(options::OPT_v))
579 CmdArgs.push_back(
"-v");
582 Args.getLastArgValue(options::OPT_march_EQ);
583 assert(!GPUArch.empty() &&
"At least one GPU Arch required for ptxas.");
585 CmdArgs.push_back(
"-arch");
586 CmdArgs.push_back(Args.MakeArgString(GPUArch));
593 llvm::sys::path::parent_path(TC.getDriver().Dir);
594 llvm::sys::path::append(DefaultLibPath,
"lib" CLANG_LIBDIR_SUFFIX);
595 CmdArgs.push_back(Args.MakeArgString(Twine(
"-L") + DefaultLibPath));
597 for (
const auto &II : Inputs) {
598 if (II.getType() == types::TY_LLVM_IR ||
599 II.getType() == types::TY_LTO_IR ||
600 II.getType() == types::TY_LTO_BC ||
601 II.getType() == types::TY_LLVM_BC) {
602 C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
603 << getToolChain().getTripleString();
609 if (!II.isFilename())
613 C.getArgs().MakeArgString(getToolChain().getInputFilename(II));
615 CmdArgs.push_back(CubinF);
624 CmdArgs.push_back(Args.MakeArgString(
625 Twine(
"--nvlink-path=" + getToolChain().GetProgramPath(
"nvlink"))));
628 Args.MakeArgString(getToolChain().GetProgramPath(
"clang-nvlink-wrapper"));
629 C.addCommand(std::make_unique<Command>(
633 Exec, CmdArgs, Inputs, Output));
637 const llvm::opt::ArgList &Args,
638 std::vector<StringRef> &Features,
640 if (Args.hasArg(options::OPT_cuda_feature_EQ)) {
641 StringRef PtxFeature =
642 Args.getLastArgValue(options::OPT_cuda_feature_EQ,
"+ptx42");
643 Features.push_back(Args.MakeArgString(PtxFeature));
645 }
else if (!Version) {
647 Version = CudaInstallation.
version();
653 const char *PtxFeature =
nullptr;
655 #define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \
656 case CudaVersion::CUDA_##CUDA_VER: \
657 PtxFeature = "+ptx" #PTX_VER; \
671 #undef CASE_CUDA_VERSION
673 PtxFeature =
"+ptx42";
675 Features.push_back(PtxFeature);
682 CudaToolChain::CudaToolChain(
const Driver &D,
const llvm::Triple &Triple,
683 const ToolChain &HostTC,
const ArgList &Args,
685 :
ToolChain(D, Triple, Args), HostTC(HostTC),
686 CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
706 llvm::sys::path::replace_extension(
Filename,
"cubin");
711 const llvm::opt::ArgList &DriverArgs,
712 llvm::opt::ArgStringList &CC1Args,
716 StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
717 assert(!GpuArch.empty() &&
"Must have an explicit GPU arch.");
720 "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
724 {
"-fcuda-is-device",
"-mllvm",
"-enable-memcpyopt-without-libcalls"});
726 if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
727 options::OPT_fno_cuda_approx_transcendentals,
false))
728 CC1Args.push_back(
"-fcuda-approx-transcendentals");
731 if (DriverArgs.hasArg(options::OPT_nogpulib))
735 DriverArgs.hasArg(options::OPT_S))
739 if (LibDeviceFile.empty()) {
740 getDriver().
Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
744 CC1Args.push_back(
"-mlink-builtin-bitcode");
745 CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
749 std::vector<StringRef> Features;
751 CudaInstallationVersion);
752 for (StringRef PtxFeature : Features)
753 CC1Args.append({
"-target-feature", DriverArgs.MakeArgString(PtxFeature)});
754 if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
755 options::OPT_fno_cuda_short_ptr,
false))
756 CC1Args.append({
"-mllvm",
"--nvptx-short-ptr"});
760 DriverArgs.MakeArgString(Twine(
"-target-sdk-version=") +
766 diag::err_drv_omp_offload_target_cuda_version_not_support)
784 const llvm::opt::ArgList &DriverArgs,
const JobAction &JA,
785 const llvm::fltSemantics *FPType)
const {
787 if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
788 DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
789 options::OPT_fno_gpu_flush_denormals_to_zero,
false))
790 return llvm::DenormalMode::getPreserveSign();
794 return llvm::DenormalMode::getIEEE();
798 const Option &O = A->getOption();
799 return (O.matches(options::OPT_gN_Group) &&
800 !O.matches(options::OPT_gmodules)) ||
801 O.matches(options::OPT_g_Flag) ||
802 O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) ||
803 O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) ||
804 O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) ||
805 O.matches(options::OPT_gdwarf_5) ||
806 O.matches(options::OPT_gcolumn_info);
812 case DisableDebugInfo:
818 case EmitSameDebugInfoAsHost:
825 ArgStringList &CC1Args)
const {
827 if (!DriverArgs.hasArg(options::OPT_nogpuinc) &&
828 !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
829 StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
830 assert(!Arch.empty() &&
"Must have an explicit GPU arch.");
836 llvm::opt::DerivedArgList *
840 DerivedArgList *DAL =
843 DAL =
new DerivedArgList(Args.getBaseArgs());
852 if (!llvm::is_contained(*DAL, A))
855 if (!DAL->hasArg(options::OPT_march_EQ))
856 DAL->AddJoinedArg(
nullptr, Opts.getOption(options::OPT_march_EQ),
857 !BoundArch.empty() ? BoundArch
858 : CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
863 for (Arg *A : Args) {
867 if (!BoundArch.empty()) {
868 DAL->eraseArg(options::OPT_march_EQ);
869 DAL->AddJoinedArg(
nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
894 ArgStringList &CC1Args)
const {
899 {
"-internal-isystem",
904 ArgStringList &CC1Args)
const {
909 ArgStringList &CC1Args)
const {
927 const ArgList &Args)
const {