clang 20.0.0git
DeviceOffload.cpp
Go to the documentation of this file.
1//===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements offloading to CUDA devices.
10//
11//===----------------------------------------------------------------------===//
12
13#include "DeviceOffload.h"
14
18
19#include "llvm/IR/LegacyPassManager.h"
20#include "llvm/IR/Module.h"
21#include "llvm/MC/TargetRegistry.h"
22#include "llvm/Target/TargetMachine.h"
23
24namespace clang {
25
27 Interpreter &Interp, std::unique_ptr<CompilerInstance> Instance,
28 IncrementalParser &HostParser, llvm::LLVMContext &LLVMCtx,
30 llvm::Error &Err)
31 : IncrementalParser(Interp, std::move(Instance), LLVMCtx, Err),
32 HostParser(HostParser), VFS(FS) {
33 if (Err)
34 return;
35 StringRef Arch = CI->getTargetOpts().CPU;
36 if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
37 Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
38 "Invalid CUDA architecture",
39 llvm::inconvertibleErrorCode()));
40 return;
41 }
42}
43
45IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
46 auto PTU = IncrementalParser::Parse(Input);
47 if (!PTU)
48 return PTU.takeError();
49
50 auto PTX = GeneratePTX();
51 if (!PTX)
52 return PTX.takeError();
53
54 auto Err = GenerateFatbinary();
55 if (Err)
56 return std::move(Err);
57
58 std::string FatbinFileName =
59 "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
60 VFS->addFile(FatbinFileName, 0,
61 llvm::MemoryBuffer::getMemBuffer(
62 llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
63 "", false));
64
66
67 FatbinContent.clear();
68
69 return PTU;
70}
71
73 auto &PTU = PTUs.back();
74 std::string Error;
75
76 const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
77 PTU.TheModule->getTargetTriple(), Error);
78 if (!Target)
79 return llvm::make_error<llvm::StringError>(std::move(Error),
80 std::error_code());
81 llvm::TargetOptions TO = llvm::TargetOptions();
82 llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
83 PTU.TheModule->getTargetTriple(), getCI()->getTargetOpts().CPU, "", TO,
84 llvm::Reloc::Model::PIC_);
85 PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
86
87 PTXCode.clear();
88 llvm::raw_svector_ostream dest(PTXCode);
89
90 llvm::legacy::PassManager PM;
91 if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr,
92 llvm::CodeGenFileType::AssemblyFile)) {
93 return llvm::make_error<llvm::StringError>(
94 "NVPTX backend cannot produce PTX code.",
95 llvm::inconvertibleErrorCode());
96 }
97
98 if (!PM.run(*PTU.TheModule))
99 return llvm::make_error<llvm::StringError>("Failed to emit PTX code.",
100 llvm::inconvertibleErrorCode());
101
102 PTXCode += '\0';
103 while (PTXCode.size() % 8)
104 PTXCode += '\0';
105 return PTXCode.str();
106}
107
109 enum FatBinFlags {
110 AddressSize64 = 0x01,
111 HasDebugInfo = 0x02,
112 ProducerCuda = 0x04,
113 HostLinux = 0x10,
114 HostMac = 0x20,
115 HostWindows = 0x40
116 };
117
118 struct FatBinInnerHeader {
119 uint16_t Kind; // 0x00
120 uint16_t unknown02; // 0x02
121 uint32_t HeaderSize; // 0x04
122 uint32_t DataSize; // 0x08
123 uint32_t unknown0c; // 0x0c
124 uint32_t CompressedSize; // 0x10
125 uint32_t SubHeaderSize; // 0x14
126 uint16_t VersionMinor; // 0x18
127 uint16_t VersionMajor; // 0x1a
128 uint32_t CudaArch; // 0x1c
129 uint32_t unknown20; // 0x20
130 uint32_t unknown24; // 0x24
131 uint32_t Flags; // 0x28
132 uint32_t unknown2c; // 0x2c
133 uint32_t unknown30; // 0x30
134 uint32_t unknown34; // 0x34
135 uint32_t UncompressedSize; // 0x38
136 uint32_t unknown3c; // 0x3c
137 uint32_t unknown40; // 0x40
138 uint32_t unknown44; // 0x44
139 FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags)
140 : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)),
141 DataSize(DataSize), unknown0c(0), CompressedSize(0),
142 SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4),
143 CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags),
144 unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0),
145 unknown3c(0), unknown40(0), unknown44(0) {}
146 };
147
148 struct FatBinHeader {
149 uint32_t Magic; // 0x00
150 uint16_t Version; // 0x04
151 uint16_t HeaderSize; // 0x06
152 uint32_t DataSize; // 0x08
153 uint32_t unknown0c; // 0x0c
154 public:
155 FatBinHeader(uint32_t DataSize)
156 : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)),
157 DataSize(DataSize), unknown0c(0) {}
158 };
159
160 FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size());
161 FatbinContent.append((char *)&OuterHeader,
162 ((char *)&OuterHeader) + OuterHeader.HeaderSize);
163
164 FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion,
165 FatBinFlags::AddressSize64 |
166 FatBinFlags::HostLinux);
167 FatbinContent.append((char *)&InnerHeader,
168 ((char *)&InnerHeader) + InnerHeader.HeaderSize);
169
170 FatbinContent.append(PTXCode.begin(), PTXCode.end());
171
172 return llvm::Error::success();
173}
174
176
177} // namespace clang
enum clang::sema::@1651::IndirectLocalPathEntry::EntryKind Kind
llvm::MachO::Target Target
Definition: MachO.h:51
Defines the clang::TargetOptions class.
std::string CudaGpuBinaryFileName
Name of file passed with -fcuda-include-gpubinary option to forward to CUDA runtime back-end for inco...
CodeGenOptions & getCodeGenOpts()
llvm::SmallVector< char, 1024 > FatbinContent
Definition: DeviceOffload.h:45
llvm::SmallString< 1024 > PTXCode
Definition: DeviceOffload.h:44
IncrementalCUDADeviceParser(Interpreter &Interp, std::unique_ptr< CompilerInstance > Instance, IncrementalParser &HostParser, llvm::LLVMContext &LLVMCtx, llvm::IntrusiveRefCntPtr< llvm::vfs::InMemoryFileSystem > VFS, llvm::Error &Err)
llvm::Expected< PartialTranslationUnit & > Parse(llvm::StringRef Input) override
Parses incremental input by creating an in-memory file.
llvm::IntrusiveRefCntPtr< llvm::vfs::InMemoryFileSystem > VFS
Definition: DeviceOffload.h:46
llvm::Expected< llvm::StringRef > GeneratePTX()
Provides support for incremental compilation.
std::list< PartialTranslationUnit > PTUs
List containing every information about every incrementally parsed piece of code.
virtual llvm::Expected< PartialTranslationUnit & > Parse(llvm::StringRef Input)
Parses incremental input by creating an in-memory file.
CompilerInstance * getCI()
std::unique_ptr< CompilerInstance > CI
Compiler instance performing the incremental compilation.
Provides top-level interfaces for incremental compilation and execution.
Definition: Interpreter.h:91
The JSON file list parser is used to communicate input to InstallAPI.