clang  7.0.0svn
CGCUDANV.cpp
Go to the documentation of this file.
1 //===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for CUDA code generation targeting the NVIDIA CUDA
11 // runtime library.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "CGCUDARuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "clang/AST/Decl.h"
20 #include "llvm/IR/BasicBlock.h"
21 #include "llvm/IR/CallSite.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/Support/Format.h"
25 
26 using namespace clang;
27 using namespace CodeGen;
28 
29 namespace {
30 constexpr unsigned CudaFatMagic = 0x466243b1;
31 constexpr unsigned HIPFatMagic = 0x48495046; // "HIPF"
32 
33 class CGNVCUDARuntime : public CGCUDARuntime {
34 
35 private:
36  llvm::IntegerType *IntTy, *SizeTy;
37  llvm::Type *VoidTy;
38  llvm::PointerType *CharPtrTy, *VoidPtrTy, *VoidPtrPtrTy;
39 
40  /// Convenience reference to LLVM Context
41  llvm::LLVMContext &Context;
42  /// Convenience reference to the current module
43  llvm::Module &TheModule;
44  /// Keeps track of kernel launch stubs emitted in this module
47  /// Keeps track of variable containing handle of GPU binary. Populated by
48  /// ModuleCtorFunction() and used to create corresponding cleanup calls in
49  /// ModuleDtorFunction()
50  llvm::GlobalVariable *GpuBinaryHandle = nullptr;
51  /// Whether we generate relocatable device code.
52  bool RelocatableDeviceCode;
53 
54  llvm::Constant *getSetupArgumentFn() const;
55  llvm::Constant *getLaunchFn() const;
56 
57  llvm::FunctionType *getRegisterGlobalsFnTy() const;
58  llvm::FunctionType *getCallbackFnTy() const;
59  llvm::FunctionType *getRegisterLinkedBinaryFnTy() const;
60  std::string addPrefixToName(StringRef FuncName) const;
61  std::string addUnderscoredPrefixToName(StringRef FuncName) const;
62 
63  /// Creates a function to register all kernel stubs generated in this module.
64  llvm::Function *makeRegisterGlobalsFn();
65 
66  /// Helper function that generates a constant string and returns a pointer to
67  /// the start of the string. The result of this function can be used anywhere
68  /// where the C code specifies const char*.
69  llvm::Constant *makeConstantString(const std::string &Str,
70  const std::string &Name = "",
71  const std::string &SectionName = "",
72  unsigned Alignment = 0) {
73  llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
74  llvm::ConstantInt::get(SizeTy, 0)};
75  auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());
76  llvm::GlobalVariable *GV =
77  cast<llvm::GlobalVariable>(ConstStr.getPointer());
78  if (!SectionName.empty()) {
79  GV->setSection(SectionName);
80  // Mark the address as used which make sure that this section isn't
81  // merged and we will really have it in the object file.
82  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
83  }
84  if (Alignment)
85  GV->setAlignment(Alignment);
86 
87  return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),
88  ConstStr.getPointer(), Zeros);
89  }
90 
91  /// Helper function that generates an empty dummy function returning void.
92  llvm::Function *makeDummyFunction(llvm::FunctionType *FnTy) {
93  assert(FnTy->getReturnType()->isVoidTy() &&
94  "Can only generate dummy functions returning void!");
95  llvm::Function *DummyFunc = llvm::Function::Create(
96  FnTy, llvm::GlobalValue::InternalLinkage, "dummy", &TheModule);
97 
98  llvm::BasicBlock *DummyBlock =
99  llvm::BasicBlock::Create(Context, "", DummyFunc);
100  CGBuilderTy FuncBuilder(CGM, Context);
101  FuncBuilder.SetInsertPoint(DummyBlock);
102  FuncBuilder.CreateRetVoid();
103 
104  return DummyFunc;
105  }
106 
107  void emitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);
108 
109 public:
110  CGNVCUDARuntime(CodeGenModule &CGM);
111 
112  void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
113  void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) override {
114  DeviceVars.push_back(std::make_pair(&Var, Flags));
115  }
116 
117  /// Creates module constructor function
118  llvm::Function *makeModuleCtorFunction() override;
119  /// Creates module destructor function
120  llvm::Function *makeModuleDtorFunction() override;
121 };
122 
123 }
124 
125 std::string CGNVCUDARuntime::addPrefixToName(StringRef FuncName) const {
126  if (CGM.getLangOpts().HIP)
127  return ((Twine("hip") + Twine(FuncName)).str());
128  return ((Twine("cuda") + Twine(FuncName)).str());
129 }
130 std::string
131 CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const {
132  if (CGM.getLangOpts().HIP)
133  return ((Twine("__hip") + Twine(FuncName)).str());
134  return ((Twine("__cuda") + Twine(FuncName)).str());
135 }
136 
137 CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
138  : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
139  TheModule(CGM.getModule()),
140  RelocatableDeviceCode(CGM.getLangOpts().CUDARelocatableDeviceCode) {
141  CodeGen::CodeGenTypes &Types = CGM.getTypes();
142  ASTContext &Ctx = CGM.getContext();
143 
144  IntTy = CGM.IntTy;
145  SizeTy = CGM.SizeTy;
146  VoidTy = CGM.VoidTy;
147 
148  CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
149  VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
150  VoidPtrPtrTy = VoidPtrTy->getPointerTo();
151 }
152 
153 llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const {
154  // cudaError_t cudaSetupArgument(void *, size_t, size_t)
155  llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy};
156  return CGM.CreateRuntimeFunction(
157  llvm::FunctionType::get(IntTy, Params, false),
158  addPrefixToName("SetupArgument"));
159 }
160 
161 llvm::Constant *CGNVCUDARuntime::getLaunchFn() const {
162  if (CGM.getLangOpts().HIP) {
163  // hipError_t hipLaunchByPtr(char *);
164  return CGM.CreateRuntimeFunction(
165  llvm::FunctionType::get(IntTy, CharPtrTy, false), "hipLaunchByPtr");
166  } else {
167  // cudaError_t cudaLaunch(char *);
168  return CGM.CreateRuntimeFunction(
169  llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
170  }
171 }
172 
173 llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy() const {
174  return llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false);
175 }
176 
177 llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy() const {
178  return llvm::FunctionType::get(VoidTy, VoidPtrTy, false);
179 }
180 
181 llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const {
182  auto CallbackFnTy = getCallbackFnTy();
183  auto RegisterGlobalsFnTy = getRegisterGlobalsFnTy();
184  llvm::Type *Params[] = {RegisterGlobalsFnTy->getPointerTo(), VoidPtrTy,
185  VoidPtrTy, CallbackFnTy->getPointerTo()};
186  return llvm::FunctionType::get(VoidTy, Params, false);
187 }
188 
189 void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
190  FunctionArgList &Args) {
191  EmittedKernels.push_back(CGF.CurFn);
192  emitDeviceStubBody(CGF, Args);
193 }
194 
195 void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF,
196  FunctionArgList &Args) {
197  // Emit a call to cudaSetupArgument for each arg in Args.
198  llvm::Constant *cudaSetupArgFn = getSetupArgumentFn();
199  llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
201  for (const VarDecl *A : Args) {
202  CharUnits TyWidth, TyAlign;
203  std::tie(TyWidth, TyAlign) =
204  CGM.getContext().getTypeInfoInChars(A->getType());
205  Offset = Offset.alignTo(TyAlign);
206  llvm::Value *Args[] = {
207  CGF.Builder.CreatePointerCast(CGF.GetAddrOfLocalVar(A).getPointer(),
208  VoidPtrTy),
209  llvm::ConstantInt::get(SizeTy, TyWidth.getQuantity()),
210  llvm::ConstantInt::get(SizeTy, Offset.getQuantity()),
211  };
212  llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args);
213  llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0);
214  llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero);
215  llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next");
216  CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock);
217  CGF.EmitBlock(NextBlock);
218  Offset += TyWidth;
219  }
220 
221  // Emit the call to cudaLaunch
222  llvm::Constant *cudaLaunchFn = getLaunchFn();
223  llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy);
224  CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
225  CGF.EmitBranch(EndBlock);
226 
227  CGF.EmitBlock(EndBlock);
228 }
229 
230 /// Creates a function that sets up state on the host side for CUDA objects that
231 /// have a presence on both the host and device sides. Specifically, registers
232 /// the host side of kernel functions and device global variables with the CUDA
233 /// runtime.
234 /// \code
235 /// void __cuda_register_globals(void** GpuBinaryHandle) {
236 /// __cudaRegisterFunction(GpuBinaryHandle,Kernel0,...);
237 /// ...
238 /// __cudaRegisterFunction(GpuBinaryHandle,KernelM,...);
239 /// __cudaRegisterVar(GpuBinaryHandle, GlobalVar0, ...);
240 /// ...
241 /// __cudaRegisterVar(GpuBinaryHandle, GlobalVarN, ...);
242 /// }
243 /// \endcode
244 llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
245  // No need to register anything
246  if (EmittedKernels.empty() && DeviceVars.empty())
247  return nullptr;
248 
249  llvm::Function *RegisterKernelsFunc = llvm::Function::Create(
250  getRegisterGlobalsFnTy(), llvm::GlobalValue::InternalLinkage,
251  addUnderscoredPrefixToName("_register_globals"), &TheModule);
252  llvm::BasicBlock *EntryBB =
253  llvm::BasicBlock::Create(Context, "entry", RegisterKernelsFunc);
254  CGBuilderTy Builder(CGM, Context);
255  Builder.SetInsertPoint(EntryBB);
256 
257  // void __cudaRegisterFunction(void **, const char *, char *, const char *,
258  // int, uint3*, uint3*, dim3*, dim3*, int*)
259  llvm::Type *RegisterFuncParams[] = {
260  VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, IntTy,
261  VoidPtrTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()};
262  llvm::Constant *RegisterFunc = CGM.CreateRuntimeFunction(
263  llvm::FunctionType::get(IntTy, RegisterFuncParams, false),
264  addUnderscoredPrefixToName("RegisterFunction"));
265 
266  // Extract GpuBinaryHandle passed as the first argument passed to
267  // __cuda_register_globals() and generate __cudaRegisterFunction() call for
268  // each emitted kernel.
269  llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin();
270  for (llvm::Function *Kernel : EmittedKernels) {
271  llvm::Constant *KernelName = makeConstantString(Kernel->getName());
272  llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
273  llvm::Value *Args[] = {
274  &GpuBinaryHandlePtr, Builder.CreateBitCast(Kernel, VoidPtrTy),
275  KernelName, KernelName, llvm::ConstantInt::get(IntTy, -1), NullPtr,
276  NullPtr, NullPtr, NullPtr,
277  llvm::ConstantPointerNull::get(IntTy->getPointerTo())};
278  Builder.CreateCall(RegisterFunc, Args);
279  }
280 
281  // void __cudaRegisterVar(void **, char *, char *, const char *,
282  // int, int, int, int)
283  llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
284  CharPtrTy, IntTy, IntTy,
285  IntTy, IntTy};
286  llvm::Constant *RegisterVar = CGM.CreateRuntimeFunction(
287  llvm::FunctionType::get(IntTy, RegisterVarParams, false),
288  addUnderscoredPrefixToName("RegisterVar"));
289  for (auto &Pair : DeviceVars) {
290  llvm::GlobalVariable *Var = Pair.first;
291  unsigned Flags = Pair.second;
292  llvm::Constant *VarName = makeConstantString(Var->getName());
293  uint64_t VarSize =
294  CGM.getDataLayout().getTypeAllocSize(Var->getValueType());
295  llvm::Value *Args[] = {
296  &GpuBinaryHandlePtr,
297  Builder.CreateBitCast(Var, VoidPtrTy),
298  VarName,
299  VarName,
300  llvm::ConstantInt::get(IntTy, (Flags & ExternDeviceVar) ? 1 : 0),
301  llvm::ConstantInt::get(IntTy, VarSize),
302  llvm::ConstantInt::get(IntTy, (Flags & ConstantDeviceVar) ? 1 : 0),
303  llvm::ConstantInt::get(IntTy, 0)};
304  Builder.CreateCall(RegisterVar, Args);
305  }
306 
307  Builder.CreateRetVoid();
308  return RegisterKernelsFunc;
309 }
310 
311 /// Creates a global constructor function for the module:
312 /// \code
313 /// void __cuda_module_ctor(void*) {
314 /// Handle = __cudaRegisterFatBinary(GpuBinaryBlob);
315 /// __cuda_register_globals(Handle);
316 /// }
317 /// \endcode
318 llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
319  bool IsHIP = CGM.getLangOpts().HIP;
320  // No need to generate ctors/dtors if there is no GPU binary.
321  StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName;
322  if (CudaGpuBinaryFileName.empty() && !IsHIP)
323  return nullptr;
324 
325  // void __{cuda|hip}_register_globals(void* handle);
326  llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
327  // We always need a function to pass in as callback. Create a dummy
328  // implementation if we don't need to register anything.
329  if (RelocatableDeviceCode && !RegisterGlobalsFunc)
330  RegisterGlobalsFunc = makeDummyFunction(getRegisterGlobalsFnTy());
331 
332  // void ** __{cuda|hip}RegisterFatBinary(void *);
333  llvm::Constant *RegisterFatbinFunc = CGM.CreateRuntimeFunction(
334  llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false),
335  addUnderscoredPrefixToName("RegisterFatBinary"));
336  // struct { int magic, int version, void * gpu_binary, void * dont_care };
337  llvm::StructType *FatbinWrapperTy =
338  llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy);
339 
340  // Register GPU binary with the CUDA runtime, store returned handle in a
341  // global variable and save a reference in GpuBinaryHandle to be cleaned up
342  // in destructor on exit. Then associate all known kernels with the GPU binary
343  // handle so CUDA runtime can figure out what to call on the GPU side.
344  std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary;
345  if (!IsHIP) {
346  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CudaGpuBinaryOrErr =
347  llvm::MemoryBuffer::getFileOrSTDIN(CudaGpuBinaryFileName);
348  if (std::error_code EC = CudaGpuBinaryOrErr.getError()) {
349  CGM.getDiags().Report(diag::err_cannot_open_file)
350  << CudaGpuBinaryFileName << EC.message();
351  return nullptr;
352  }
353  CudaGpuBinary = std::move(CudaGpuBinaryOrErr.get());
354  }
355 
356  llvm::Function *ModuleCtorFunc = llvm::Function::Create(
357  llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
359  addUnderscoredPrefixToName("_module_ctor"), &TheModule);
360  llvm::BasicBlock *CtorEntryBB =
361  llvm::BasicBlock::Create(Context, "entry", ModuleCtorFunc);
362  CGBuilderTy CtorBuilder(CGM, Context);
363 
364  CtorBuilder.SetInsertPoint(CtorEntryBB);
365 
366  const char *FatbinConstantName;
367  const char *FatbinSectionName;
368  const char *ModuleIDSectionName;
369  StringRef ModuleIDPrefix;
370  llvm::Constant *FatBinStr;
371  unsigned FatMagic;
372  if (IsHIP) {
373  FatbinConstantName = ".hip_fatbin";
374  FatbinSectionName = ".hipFatBinSegment";
375 
376  ModuleIDSectionName = "__hip_module_id";
377  ModuleIDPrefix = "__hip_";
378 
379  // For HIP, create an external symbol __hip_fatbin in section .hip_fatbin.
380  // The external symbol is supposed to contain the fat binary but will be
381  // populated somewhere else, e.g. by lld through link script.
382  FatBinStr = new llvm::GlobalVariable(
383  CGM.getModule(), CGM.Int8Ty,
384  /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr,
385  "__hip_fatbin", nullptr,
386  llvm::GlobalVariable::NotThreadLocal);
387  cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName);
388 
389  FatMagic = HIPFatMagic;
390  } else {
391  if (RelocatableDeviceCode)
392  // TODO: Figure out how this is called on mac OS!
393  FatbinConstantName = "__nv_relfatbin";
394  else
395  FatbinConstantName =
396  CGM.getTriple().isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin";
397  // NVIDIA's cuobjdump looks for fatbins in this section.
398  FatbinSectionName =
399  CGM.getTriple().isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment";
400 
401  // TODO: Figure out how this is called on mac OS!
402  ModuleIDSectionName = "__nv_module_id";
403  ModuleIDPrefix = "__nv_";
404 
405  // For CUDA, create a string literal containing the fat binary loaded from
406  // the given file.
407  FatBinStr = makeConstantString(CudaGpuBinary->getBuffer(), "",
408  FatbinConstantName, 8);
409  FatMagic = CudaFatMagic;
410  }
411 
412  // Create initialized wrapper structure that points to the loaded GPU binary
414  auto Values = Builder.beginStruct(FatbinWrapperTy);
415  // Fatbin wrapper magic.
416  Values.addInt(IntTy, FatMagic);
417  // Fatbin version.
418  Values.addInt(IntTy, 1);
419  // Data.
420  Values.add(FatBinStr);
421  // Unused in fatbin v1.
422  Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
423  llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(
424  addUnderscoredPrefixToName("_fatbin_wrapper"), CGM.getPointerAlign(),
425  /*constant*/ true);
426  FatbinWrapper->setSection(FatbinSectionName);
427 
428  // Register binary with CUDA/HIP runtime. This is substantially different in
429  // default mode vs. separate compilation!
430  if (!RelocatableDeviceCode) {
431  // GpuBinaryHandle = __{cuda|hip}RegisterFatBinary(&FatbinWrapper);
432  llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
433  RegisterFatbinFunc,
434  CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
435  GpuBinaryHandle = new llvm::GlobalVariable(
436  TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
437  llvm::ConstantPointerNull::get(VoidPtrPtrTy),
438  addUnderscoredPrefixToName("_gpubin_handle"));
439 
440  CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
441  CGM.getPointerAlign());
442 
443  // Call __{cuda|hip}_register_globals(GpuBinaryHandle);
444  if (RegisterGlobalsFunc)
445  CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
446  } else {
447  // Generate a unique module ID.
448  SmallString<64> ModuleID;
449  llvm::raw_svector_ostream OS(ModuleID);
450  OS << ModuleIDPrefix << llvm::format("%x", FatbinWrapper->getGUID());
451  llvm::Constant *ModuleIDConstant =
452  makeConstantString(ModuleID.str(), "", ModuleIDSectionName, 32);
453 
454  // Create an alias for the FatbinWrapper that nvcc or hip backend will
455  // look for.
457  Twine("__fatbinwrap") + ModuleID, FatbinWrapper);
458 
459  // void __{cuda|hip}RegisterLinkedBinary%ModuleID%(void (*)(void *), void *,
460  // void *, void (*)(void **))
461  SmallString<128> RegisterLinkedBinaryName(
462  addUnderscoredPrefixToName("RegisterLinkedBinary"));
463  RegisterLinkedBinaryName += ModuleID;
464  llvm::Constant *RegisterLinkedBinaryFunc = CGM.CreateRuntimeFunction(
465  getRegisterLinkedBinaryFnTy(), RegisterLinkedBinaryName);
466 
467  assert(RegisterGlobalsFunc && "Expecting at least dummy function!");
468  llvm::Value *Args[] = {RegisterGlobalsFunc,
469  CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy),
470  ModuleIDConstant,
471  makeDummyFunction(getCallbackFnTy())};
472  CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args);
473  }
474 
475  CtorBuilder.CreateRetVoid();
476  return ModuleCtorFunc;
477 }
478 
479 /// Creates a global destructor function that unregisters the GPU code blob
480 /// registered by constructor.
481 /// \code
482 /// void __cuda_module_dtor(void*) {
483 /// __cudaUnregisterFatBinary(Handle);
484 /// }
485 /// \endcode
486 llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
487  // No need for destructor if we don't have a handle to unregister.
488  if (!GpuBinaryHandle)
489  return nullptr;
490 
491  // void __cudaUnregisterFatBinary(void ** handle);
492  llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
493  llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
494  addUnderscoredPrefixToName("UnregisterFatBinary"));
495 
496  llvm::Function *ModuleDtorFunc = llvm::Function::Create(
497  llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
499  addUnderscoredPrefixToName("_module_dtor"), &TheModule);
500 
501  llvm::BasicBlock *DtorEntryBB =
502  llvm::BasicBlock::Create(Context, "entry", ModuleDtorFunc);
503  CGBuilderTy DtorBuilder(CGM, Context);
504  DtorBuilder.SetInsertPoint(DtorEntryBB);
505 
506  auto HandleValue =
507  DtorBuilder.CreateAlignedLoad(GpuBinaryHandle, CGM.getPointerAlign());
508  DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
509 
510  DtorBuilder.CreateRetVoid();
511  return ModuleDtorFunc;
512 }
513 
515  return new CGNVCUDARuntime(CGM);
516 }
const llvm::DataLayout & getDataLayout() const
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition: CharUnits.h:184
llvm::IntegerType * IntTy
int
External linkage, which indicates that the entity can be referred to from other translation units...
Definition: Linkage.h:60
CanQualType VoidPtrTy
Definition: ASTContext.h:1025
const CodeGenOptions & getCodeGenOpts() const
The standard implementation of ConstantInitBuilder used in Clang.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1294
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
Represents a variable declaration or definition.
Definition: Decl.h:812
DiagnosticsEngine & getDiags() const
llvm::Value * getPointer() const
Definition: Address.h:38
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:150
static CharUnits Zero()
Zero - Construct a CharUnits quantity of zero.
Definition: CharUnits.h:53
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
uint32_t Offset
Definition: CacheTokens.cpp:43
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
llvm::Constant * CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false)
Create a new runtime function with the specified type and name.
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:179
const LangOptions & getLangOpts() const
ASTContext & getContext() const
The l-value was considered opaque, so the alignment was determined from a type.
Address CreateBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition: CGBuilder.h:142
llvm::CallSite EmitRuntimeCallOrInvoke(llvm::Value *callee, ArrayRef< llvm::Value *> args, const Twine &name="")
Emits a call or invoke instruction to the given runtime function.
Definition: CGCall.cpp:3732
std::pair< CharUnits, CharUnits > getTypeInfoInChars(const Type *T) const
FunctionArgList - Type for representing both the decl and type of parameters to a function...
Definition: CGCall.h:356
CanQualType CharTy
Definition: ASTContext.h:999
This class organizes the cross-function state that is used while generating LLVM code.
std::string CudaGpuBinaryFileName
Name of file passed with -fcuda-include-gpubinary option to forward to CUDA runtime back-end for inco...
Dataflow Directional Tag Classes.
std::unique_ptr< DiagnosticConsumer > create(StringRef OutputFile, DiagnosticOptions *Diags, bool MergeChildRecords=false)
Returns a DiagnosticConsumer that serializes diagnostics to a bitcode file.
llvm::Module & getModule() const
This class organizes the cross-module state that is used while lowering AST types to LLVM types...
Definition: CodeGenTypes.h:120
Internal linkage, which indicates that the entity can be referred to from within the translation unit...
Definition: Linkage.h:32
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition: CGStmt.cpp:445
CGCUDARuntime * CreateNVCUDARuntime(CodeGenModule &CGM)
Creates an instance of a CUDA runtime class.
Definition: CGCUDANV.cpp:514
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block, taking care to avoid creation of branches from dummy blocks.
Definition: CGStmt.cpp:465
static OMPLinearClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc, SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef< Expr *> VL, ArrayRef< Expr *> PL, ArrayRef< Expr *> IL, Expr *Step, Expr *CalcStep, Stmt *PreInit, Expr *PostUpdate)
Creates clause with a list of variables VL and a linear step Step.
const llvm::Triple & getTriple() const