clang  14.0.0git
CGCUDANV.cpp
Go to the documentation of this file.
1 //===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for CUDA code generation targeting the NVIDIA CUDA
10 // runtime library.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCUDARuntime.h"
15 #include "CGCXXABI.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "clang/AST/Decl.h"
19 #include "clang/Basic/Cuda.h"
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/ReplaceConstant.h"
26 #include "llvm/Support/Format.h"
27 
28 using namespace clang;
29 using namespace CodeGen;
30 
31 namespace {
32 constexpr unsigned CudaFatMagic = 0x466243b1;
33 constexpr unsigned HIPFatMagic = 0x48495046; // "HIPF"
34 
35 class CGNVCUDARuntime : public CGCUDARuntime {
36 
37 private:
38  llvm::IntegerType *IntTy, *SizeTy;
39  llvm::Type *VoidTy;
40  llvm::PointerType *CharPtrTy, *VoidPtrTy, *VoidPtrPtrTy;
41 
42  /// Convenience reference to LLVM Context
43  llvm::LLVMContext &Context;
44  /// Convenience reference to the current module
45  llvm::Module &TheModule;
46  /// Keeps track of kernel launch stubs and handles emitted in this module
47  struct KernelInfo {
48  llvm::Function *Kernel; // stub function to help launch kernel
49  const Decl *D;
50  };
51  llvm::SmallVector<KernelInfo, 16> EmittedKernels;
52  // Map a device stub function to a symbol for identifying kernel in host code.
53  // For CUDA, the symbol for identifying the kernel is the same as the device
54  // stub function. For HIP, they are different.
55  llvm::DenseMap<llvm::Function *, llvm::GlobalValue *> KernelHandles;
56  // Map a kernel handle to the kernel stub.
57  llvm::DenseMap<llvm::GlobalValue *, llvm::Function *> KernelStubs;
58  struct VarInfo {
59  llvm::GlobalVariable *Var;
60  const VarDecl *D;
61  DeviceVarFlags Flags;
62  };
64  /// Keeps track of variable containing handle of GPU binary. Populated by
65  /// ModuleCtorFunction() and used to create corresponding cleanup calls in
66  /// ModuleDtorFunction()
67  llvm::GlobalVariable *GpuBinaryHandle = nullptr;
68  /// Whether we generate relocatable device code.
69  bool RelocatableDeviceCode;
70  /// Mangle context for device.
71  std::unique_ptr<MangleContext> DeviceMC;
72 
73  llvm::FunctionCallee getSetupArgumentFn() const;
74  llvm::FunctionCallee getLaunchFn() const;
75 
76  llvm::FunctionType *getRegisterGlobalsFnTy() const;
77  llvm::FunctionType *getCallbackFnTy() const;
78  llvm::FunctionType *getRegisterLinkedBinaryFnTy() const;
79  std::string addPrefixToName(StringRef FuncName) const;
80  std::string addUnderscoredPrefixToName(StringRef FuncName) const;
81 
82  /// Creates a function to register all kernel stubs generated in this module.
83  llvm::Function *makeRegisterGlobalsFn();
84 
85  /// Helper function that generates a constant string and returns a pointer to
86  /// the start of the string. The result of this function can be used anywhere
87  /// where the C code specifies const char*.
88  llvm::Constant *makeConstantString(const std::string &Str,
89  const std::string &Name = "",
90  const std::string &SectionName = "",
91  unsigned Alignment = 0) {
92  llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
93  llvm::ConstantInt::get(SizeTy, 0)};
94  auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());
95  llvm::GlobalVariable *GV =
96  cast<llvm::GlobalVariable>(ConstStr.getPointer());
97  if (!SectionName.empty()) {
98  GV->setSection(SectionName);
99  // Mark the address as used which make sure that this section isn't
100  // merged and we will really have it in the object file.
101  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
102  }
103  if (Alignment)
104  GV->setAlignment(llvm::Align(Alignment));
105 
106  return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),
107  ConstStr.getPointer(), Zeros);
108  }
109 
110  /// Helper function that generates an empty dummy function returning void.
111  llvm::Function *makeDummyFunction(llvm::FunctionType *FnTy) {
112  assert(FnTy->getReturnType()->isVoidTy() &&
113  "Can only generate dummy functions returning void!");
114  llvm::Function *DummyFunc = llvm::Function::Create(
115  FnTy, llvm::GlobalValue::InternalLinkage, "dummy", &TheModule);
116 
117  llvm::BasicBlock *DummyBlock =
118  llvm::BasicBlock::Create(Context, "", DummyFunc);
119  CGBuilderTy FuncBuilder(CGM, Context);
120  FuncBuilder.SetInsertPoint(DummyBlock);
121  FuncBuilder.CreateRetVoid();
122 
123  return DummyFunc;
124  }
125 
126  void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args);
127  void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args);
128  std::string getDeviceSideName(const NamedDecl *ND) override;
129 
130  void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
131  bool Extern, bool Constant) {
132  DeviceVars.push_back({&Var,
133  VD,
134  {DeviceVarFlags::Variable, Extern, Constant,
135  VD->hasAttr<HIPManagedAttr>(),
136  /*Normalized*/ false, 0}});
137  }
138  void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var,
139  bool Extern, int Type) {
140  DeviceVars.push_back({&Var,
141  VD,
142  {DeviceVarFlags::Surface, Extern, /*Constant*/ false,
143  /*Managed*/ false,
144  /*Normalized*/ false, Type}});
145  }
146  void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var,
147  bool Extern, int Type, bool Normalized) {
148  DeviceVars.push_back({&Var,
149  VD,
150  {DeviceVarFlags::Texture, Extern, /*Constant*/ false,
151  /*Managed*/ false, Normalized, Type}});
152  }
153 
154  /// Creates module constructor function
155  llvm::Function *makeModuleCtorFunction();
156  /// Creates module destructor function
157  llvm::Function *makeModuleDtorFunction();
158  /// Transform managed variables for device compilation.
159  void transformManagedVars();
160 
161 public:
162  CGNVCUDARuntime(CodeGenModule &CGM);
163 
164  llvm::GlobalValue *getKernelHandle(llvm::Function *F, GlobalDecl GD) override;
165  llvm::Function *getKernelStub(llvm::GlobalValue *Handle) override {
166  auto Loc = KernelStubs.find(Handle);
167  assert(Loc != KernelStubs.end());
168  return Loc->second;
169  }
170  void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
171  void handleVarRegistration(const VarDecl *VD,
172  llvm::GlobalVariable &Var) override;
173  void
174  internalizeDeviceSideVar(const VarDecl *D,
175  llvm::GlobalValue::LinkageTypes &Linkage) override;
176 
177  llvm::Function *finalizeModule() override;
178 };
179 
180 }
181 
182 std::string CGNVCUDARuntime::addPrefixToName(StringRef FuncName) const {
183  if (CGM.getLangOpts().HIP)
184  return ((Twine("hip") + Twine(FuncName)).str());
185  return ((Twine("cuda") + Twine(FuncName)).str());
186 }
188 CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const {
189  if (CGM.getLangOpts().HIP)
190  return ((Twine("__hip") + Twine(FuncName)).str());
191  return ((Twine("__cuda") + Twine(FuncName)).str());
192 }
193 
194 static std::unique_ptr<MangleContext> InitDeviceMC(CodeGenModule &CGM) {
195  // If the host and device have different C++ ABIs, mark it as the device
196  // mangle context so that the mangling needs to retrieve the additional
197  // device lambda mangling number instead of the regular host one.
198  if (CGM.getContext().getAuxTargetInfo() &&
201  return std::unique_ptr<MangleContext>(
203  *CGM.getContext().getAuxTargetInfo()));
204  }
205 
206  return std::unique_ptr<MangleContext>(CGM.getContext().createMangleContext(
207  CGM.getContext().getAuxTargetInfo()));
208 }
209 
210 CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
211  : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
212  TheModule(CGM.getModule()),
213  RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode),
214  DeviceMC(InitDeviceMC(CGM)) {
215  CodeGen::CodeGenTypes &Types = CGM.getTypes();
216  ASTContext &Ctx = CGM.getContext();
217 
218  IntTy = CGM.IntTy;
219  SizeTy = CGM.SizeTy;
220  VoidTy = CGM.VoidTy;
221 
222  CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
223  VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
224  VoidPtrPtrTy = VoidPtrTy->getPointerTo();
225 }
226 
227 llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const {
228  // cudaError_t cudaSetupArgument(void *, size_t, size_t)
229  llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy};
230  return CGM.CreateRuntimeFunction(
231  llvm::FunctionType::get(IntTy, Params, false),
232  addPrefixToName("SetupArgument"));
233 }
234 
235 llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn() const {
236  if (CGM.getLangOpts().HIP) {
237  // hipError_t hipLaunchByPtr(char *);
238  return CGM.CreateRuntimeFunction(
239  llvm::FunctionType::get(IntTy, CharPtrTy, false), "hipLaunchByPtr");
240  } else {
241  // cudaError_t cudaLaunch(char *);
242  return CGM.CreateRuntimeFunction(
243  llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
244  }
245 }
246 
247 llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy() const {
248  return llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false);
249 }
250 
251 llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy() const {
252  return llvm::FunctionType::get(VoidTy, VoidPtrTy, false);
253 }
254 
255 llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const {
256  auto CallbackFnTy = getCallbackFnTy();
257  auto RegisterGlobalsFnTy = getRegisterGlobalsFnTy();
258  llvm::Type *Params[] = {RegisterGlobalsFnTy->getPointerTo(), VoidPtrTy,
259  VoidPtrTy, CallbackFnTy->getPointerTo()};
260  return llvm::FunctionType::get(VoidTy, Params, false);
261 }
262 
263 std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
264  GlobalDecl GD;
265  // D could be either a kernel or a variable.
266  if (auto *FD = dyn_cast<FunctionDecl>(ND))
267  GD = GlobalDecl(FD, KernelReferenceKind::Kernel);
268  else
269  GD = GlobalDecl(ND);
270  std::string DeviceSideName;
271  MangleContext *MC;
272  if (CGM.getLangOpts().CUDAIsDevice)
273  MC = &CGM.getCXXABI().getMangleContext();
274  else
275  MC = DeviceMC.get();
276  if (MC->shouldMangleDeclName(ND)) {
277  SmallString<256> Buffer;
278  llvm::raw_svector_ostream Out(Buffer);
279  MC->mangleName(GD, Out);
280  DeviceSideName = std::string(Out.str());
281  } else
282  DeviceSideName = std::string(ND->getIdentifier()->getName());
283 
284  // Make unique name for device side static file-scope variable for HIP.
285  if (CGM.getContext().shouldExternalizeStaticVar(ND) &&
286  CGM.getLangOpts().GPURelocatableDeviceCode &&
287  !CGM.getLangOpts().CUID.empty()) {
288  SmallString<256> Buffer;
289  llvm::raw_svector_ostream Out(Buffer);
290  Out << DeviceSideName;
292  DeviceSideName = std::string(Out.str());
293  }
294  return DeviceSideName;
295 }
296 
297 void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
298  FunctionArgList &Args) {
299  EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
300  if (auto *GV = dyn_cast<llvm::GlobalVariable>(KernelHandles[CGF.CurFn])) {
301  GV->setLinkage(CGF.CurFn->getLinkage());
302  GV->setInitializer(CGF.CurFn);
303  }
305  CudaFeature::CUDA_USES_NEW_LAUNCH) ||
306  (CGF.getLangOpts().HIP && CGF.getLangOpts().HIPUseNewLaunchAPI))
307  emitDeviceStubBodyNew(CGF, Args);
308  else
309  emitDeviceStubBodyLegacy(CGF, Args);
310 }
311 
312 // CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local
313 // array and kernels are launched using cudaLaunchKernel().
314 void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
315  FunctionArgList &Args) {
316  // Build the shadow stack entry at the very start of the function.
317 
318  // Calculate amount of space we will need for all arguments. If we have no
319  // args, allocate a single pointer so we still have a valid pointer to the
320  // argument array that we can pass to runtime, even if it will be unused.
321  Address KernelArgs = CGF.CreateTempAlloca(
322  VoidPtrTy, CharUnits::fromQuantity(16), "kernel_args",
323  llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size())));
324  // Store pointers to the arguments in a locally allocated launch_args.
325  for (unsigned i = 0; i < Args.size(); ++i) {
326  llvm::Value* VarPtr = CGF.GetAddrOfLocalVar(Args[i]).getPointer();
327  llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, VoidPtrTy);
329  VoidVarPtr,
330  CGF.Builder.CreateConstGEP1_32(VoidPtrTy, KernelArgs.getPointer(), i));
331  }
332 
333  llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
334 
335  // Lookup cudaLaunchKernel/hipLaunchKernel function.
336  // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
337  // void **args, size_t sharedMem,
338  // cudaStream_t stream);
339  // hipError_t hipLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
340  // void **args, size_t sharedMem,
341  // hipStream_t stream);
343  DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
344  auto LaunchKernelName = addPrefixToName("LaunchKernel");
345  IdentifierInfo &cudaLaunchKernelII =
346  CGM.getContext().Idents.get(LaunchKernelName);
347  FunctionDecl *cudaLaunchKernelFD = nullptr;
348  for (auto *Result : DC->lookup(&cudaLaunchKernelII)) {
349  if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result))
350  cudaLaunchKernelFD = FD;
351  }
352 
353  if (cudaLaunchKernelFD == nullptr) {
354  CGM.Error(CGF.CurFuncDecl->getLocation(),
355  "Can't find declaration for " + LaunchKernelName);
356  return;
357  }
358  // Create temporary dim3 grid_dim, block_dim.
359  ParmVarDecl *GridDimParam = cudaLaunchKernelFD->getParamDecl(1);
360  QualType Dim3Ty = GridDimParam->getType();
361  Address GridDim =
362  CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "grid_dim");
363  Address BlockDim =
364  CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "block_dim");
365  Address ShmemSize =
366  CGF.CreateTempAlloca(SizeTy, CGM.getSizeAlign(), "shmem_size");
367  Address Stream =
368  CGF.CreateTempAlloca(VoidPtrTy, CGM.getPointerAlign(), "stream");
369  llvm::FunctionCallee cudaPopConfigFn = CGM.CreateRuntimeFunction(
370  llvm::FunctionType::get(IntTy,
371  {/*gridDim=*/GridDim.getType(),
372  /*blockDim=*/BlockDim.getType(),
373  /*ShmemSize=*/ShmemSize.getType(),
374  /*Stream=*/Stream.getType()},
375  /*isVarArg=*/false),
376  addUnderscoredPrefixToName("PopCallConfiguration"));
377 
378  CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn,
379  {GridDim.getPointer(), BlockDim.getPointer(),
380  ShmemSize.getPointer(), Stream.getPointer()});
381 
382  // Emit the call to cudaLaunch
384  CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], VoidPtrTy);
385  CallArgList LaunchKernelArgs;
386  LaunchKernelArgs.add(RValue::get(Kernel),
387  cudaLaunchKernelFD->getParamDecl(0)->getType());
388  LaunchKernelArgs.add(RValue::getAggregate(GridDim), Dim3Ty);
389  LaunchKernelArgs.add(RValue::getAggregate(BlockDim), Dim3Ty);
390  LaunchKernelArgs.add(RValue::get(KernelArgs.getPointer()),
391  cudaLaunchKernelFD->getParamDecl(3)->getType());
392  LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(ShmemSize)),
393  cudaLaunchKernelFD->getParamDecl(4)->getType());
394  LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(Stream)),
395  cudaLaunchKernelFD->getParamDecl(5)->getType());
396 
397  QualType QT = cudaLaunchKernelFD->getType();
398  QualType CQT = QT.getCanonicalType();
399  llvm::Type *Ty = CGM.getTypes().ConvertType(CQT);
400  llvm::FunctionType *FTy = dyn_cast<llvm::FunctionType>(Ty);
401 
402  const CGFunctionInfo &FI =
403  CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD);
404  llvm::FunctionCallee cudaLaunchKernelFn =
405  CGM.CreateRuntimeFunction(FTy, LaunchKernelName);
406  CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(),
407  LaunchKernelArgs);
408  CGF.EmitBranch(EndBlock);
409 
410  CGF.EmitBlock(EndBlock);
411 }
412 
413 void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
414  FunctionArgList &Args) {
415  // Emit a call to cudaSetupArgument for each arg in Args.
416  llvm::FunctionCallee cudaSetupArgFn = getSetupArgumentFn();
417  llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
419  for (const VarDecl *A : Args) {
420  auto TInfo = CGM.getContext().getTypeInfoInChars(A->getType());
421  Offset = Offset.alignTo(TInfo.Align);
422  llvm::Value *Args[] = {
423  CGF.Builder.CreatePointerCast(CGF.GetAddrOfLocalVar(A).getPointer(),
424  VoidPtrTy),
425  llvm::ConstantInt::get(SizeTy, TInfo.Width.getQuantity()),
426  llvm::ConstantInt::get(SizeTy, Offset.getQuantity()),
427  };
428  llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args);
429  llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0);
430  llvm::Value *CBZero = CGF.Builder.CreateICmpEQ(CB, Zero);
431  llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next");
432  CGF.Builder.CreateCondBr(CBZero, NextBlock, EndBlock);
433  CGF.EmitBlock(NextBlock);
434  Offset += TInfo.Width;
435  }
436 
437  // Emit the call to cudaLaunch
438  llvm::FunctionCallee cudaLaunchFn = getLaunchFn();
439  llvm::Value *Arg =
440  CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], CharPtrTy);
441  CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
442  CGF.EmitBranch(EndBlock);
443 
444  CGF.EmitBlock(EndBlock);
445 }
446 
447 // Replace the original variable Var with the address loaded from variable
448 // ManagedVar populated by HIP runtime.
449 static void replaceManagedVar(llvm::GlobalVariable *Var,
450  llvm::GlobalVariable *ManagedVar) {
452  for (auto &&VarUse : Var->uses()) {
453  WorkList.push_back({VarUse.getUser()});
454  }
455  while (!WorkList.empty()) {
456  auto &&WorkItem = WorkList.pop_back_val();
457  auto *U = WorkItem.back();
458  if (isa<llvm::ConstantExpr>(U)) {
459  for (auto &&UU : U->uses()) {
460  WorkItem.push_back(UU.getUser());
461  WorkList.push_back(WorkItem);
462  WorkItem.pop_back();
463  }
464  continue;
465  }
466  if (auto *I = dyn_cast<llvm::Instruction>(U)) {
467  llvm::Value *OldV = Var;
468  llvm::Instruction *NewV =
469  new llvm::LoadInst(Var->getType(), ManagedVar, "ld.managed", false,
470  llvm::Align(Var->getAlignment()), I);
471  WorkItem.pop_back();
472  // Replace constant expressions directly or indirectly using the managed
473  // variable with instructions.
474  for (auto &&Op : WorkItem) {
475  auto *CE = cast<llvm::ConstantExpr>(Op);
476  auto *NewInst = llvm::createReplacementInstr(CE, I);
477  NewInst->replaceUsesOfWith(OldV, NewV);
478  OldV = CE;
479  NewV = NewInst;
480  }
481  I->replaceUsesOfWith(OldV, NewV);
482  } else {
483  llvm_unreachable("Invalid use of managed variable");
484  }
485  }
486 }
487 
488 /// Creates a function that sets up state on the host side for CUDA objects that
489 /// have a presence on both the host and device sides. Specifically, registers
490 /// the host side of kernel functions and device global variables with the CUDA
491 /// runtime.
492 /// \code
493 /// void __cuda_register_globals(void** GpuBinaryHandle) {
494 /// __cudaRegisterFunction(GpuBinaryHandle,Kernel0,...);
495 /// ...
496 /// __cudaRegisterFunction(GpuBinaryHandle,KernelM,...);
497 /// __cudaRegisterVar(GpuBinaryHandle, GlobalVar0, ...);
498 /// ...
499 /// __cudaRegisterVar(GpuBinaryHandle, GlobalVarN, ...);
500 /// }
501 /// \endcode
502 llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
503  // No need to register anything
504  if (EmittedKernels.empty() && DeviceVars.empty())
505  return nullptr;
506 
507  llvm::Function *RegisterKernelsFunc = llvm::Function::Create(
508  getRegisterGlobalsFnTy(), llvm::GlobalValue::InternalLinkage,
509  addUnderscoredPrefixToName("_register_globals"), &TheModule);
510  llvm::BasicBlock *EntryBB =
511  llvm::BasicBlock::Create(Context, "entry", RegisterKernelsFunc);
512  CGBuilderTy Builder(CGM, Context);
513  Builder.SetInsertPoint(EntryBB);
514 
515  // void __cudaRegisterFunction(void **, const char *, char *, const char *,
516  // int, uint3*, uint3*, dim3*, dim3*, int*)
517  llvm::Type *RegisterFuncParams[] = {
518  VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, IntTy,
519  VoidPtrTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()};
520  llvm::FunctionCallee RegisterFunc = CGM.CreateRuntimeFunction(
521  llvm::FunctionType::get(IntTy, RegisterFuncParams, false),
522  addUnderscoredPrefixToName("RegisterFunction"));
523 
524  // Extract GpuBinaryHandle passed as the first argument passed to
525  // __cuda_register_globals() and generate __cudaRegisterFunction() call for
526  // each emitted kernel.
527  llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin();
528  for (auto &&I : EmittedKernels) {
529  llvm::Constant *KernelName =
530  makeConstantString(getDeviceSideName(cast<NamedDecl>(I.D)));
531  llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
532  llvm::Value *Args[] = {
533  &GpuBinaryHandlePtr,
534  Builder.CreateBitCast(KernelHandles[I.Kernel], VoidPtrTy),
535  KernelName,
536  KernelName,
537  llvm::ConstantInt::get(IntTy, -1),
538  NullPtr,
539  NullPtr,
540  NullPtr,
541  NullPtr,
542  llvm::ConstantPointerNull::get(IntTy->getPointerTo())};
543  Builder.CreateCall(RegisterFunc, Args);
544  }
545 
546  llvm::Type *VarSizeTy = IntTy;
547  // For HIP or CUDA 9.0+, device variable size is type of `size_t`.
548  if (CGM.getLangOpts().HIP ||
549  ToCudaVersion(CGM.getTarget().getSDKVersion()) >= CudaVersion::CUDA_90)
550  VarSizeTy = SizeTy;
551 
552  // void __cudaRegisterVar(void **, char *, char *, const char *,
553  // int, int, int, int)
554  llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
555  CharPtrTy, IntTy, VarSizeTy,
556  IntTy, IntTy};
557  llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction(
558  llvm::FunctionType::get(VoidTy, RegisterVarParams, false),
559  addUnderscoredPrefixToName("RegisterVar"));
560  // void __hipRegisterManagedVar(void **, char *, char *, const char *,
561  // size_t, unsigned)
562  llvm::Type *RegisterManagedVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
563  CharPtrTy, VarSizeTy, IntTy};
564  llvm::FunctionCallee RegisterManagedVar = CGM.CreateRuntimeFunction(
565  llvm::FunctionType::get(VoidTy, RegisterManagedVarParams, false),
566  addUnderscoredPrefixToName("RegisterManagedVar"));
567  // void __cudaRegisterSurface(void **, const struct surfaceReference *,
568  // const void **, const char *, int, int);
569  llvm::FunctionCallee RegisterSurf = CGM.CreateRuntimeFunction(
570  llvm::FunctionType::get(
571  VoidTy, {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy},
572  false),
573  addUnderscoredPrefixToName("RegisterSurface"));
574  // void __cudaRegisterTexture(void **, const struct textureReference *,
575  // const void **, const char *, int, int, int)
576  llvm::FunctionCallee RegisterTex = CGM.CreateRuntimeFunction(
577  llvm::FunctionType::get(
578  VoidTy,
579  {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy},
580  false),
581  addUnderscoredPrefixToName("RegisterTexture"));
582  for (auto &&Info : DeviceVars) {
583  llvm::GlobalVariable *Var = Info.Var;
584  assert((!Var->isDeclaration() || Info.Flags.isManaged()) &&
585  "External variables should not show up here, except HIP managed "
586  "variables");
587  llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D));
588  switch (Info.Flags.getKind()) {
589  case DeviceVarFlags::Variable: {
590  uint64_t VarSize =
591  CGM.getDataLayout().getTypeAllocSize(Var->getValueType());
592  if (Info.Flags.isManaged()) {
593  auto ManagedVar = new llvm::GlobalVariable(
594  CGM.getModule(), Var->getType(),
595  /*isConstant=*/false, Var->getLinkage(),
596  /*Init=*/Var->isDeclaration()
597  ? nullptr
598  : llvm::ConstantPointerNull::get(Var->getType()),
599  /*Name=*/"", /*InsertBefore=*/nullptr,
600  llvm::GlobalVariable::NotThreadLocal);
601  ManagedVar->setDSOLocal(Var->isDSOLocal());
602  ManagedVar->setVisibility(Var->getVisibility());
603  ManagedVar->setExternallyInitialized(true);
604  ManagedVar->takeName(Var);
605  Var->setName(Twine(ManagedVar->getName() + ".managed"));
606  replaceManagedVar(Var, ManagedVar);
607  llvm::Value *Args[] = {
608  &GpuBinaryHandlePtr,
609  Builder.CreateBitCast(ManagedVar, VoidPtrTy),
610  Builder.CreateBitCast(Var, VoidPtrTy),
611  VarName,
612  llvm::ConstantInt::get(VarSizeTy, VarSize),
613  llvm::ConstantInt::get(IntTy, Var->getAlignment())};
614  if (!Var->isDeclaration())
615  Builder.CreateCall(RegisterManagedVar, Args);
616  } else {
617  llvm::Value *Args[] = {
618  &GpuBinaryHandlePtr,
619  Builder.CreateBitCast(Var, VoidPtrTy),
620  VarName,
621  VarName,
622  llvm::ConstantInt::get(IntTy, Info.Flags.isExtern()),
623  llvm::ConstantInt::get(VarSizeTy, VarSize),
624  llvm::ConstantInt::get(IntTy, Info.Flags.isConstant()),
625  llvm::ConstantInt::get(IntTy, 0)};
626  Builder.CreateCall(RegisterVar, Args);
627  }
628  break;
629  }
630  case DeviceVarFlags::Surface:
631  Builder.CreateCall(
632  RegisterSurf,
633  {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName,
634  VarName, llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
635  llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});
636  break;
637  case DeviceVarFlags::Texture:
638  Builder.CreateCall(
639  RegisterTex,
640  {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName,
641  VarName, llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
642  llvm::ConstantInt::get(IntTy, Info.Flags.isNormalized()),
643  llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});
644  break;
645  }
646  }
647 
648  Builder.CreateRetVoid();
649  return RegisterKernelsFunc;
650 }
651 
652 /// Creates a global constructor function for the module:
653 ///
654 /// For CUDA:
655 /// \code
656 /// void __cuda_module_ctor(void*) {
657 /// Handle = __cudaRegisterFatBinary(GpuBinaryBlob);
658 /// __cuda_register_globals(Handle);
659 /// }
660 /// \endcode
661 ///
662 /// For HIP:
663 /// \code
664 /// void __hip_module_ctor(void*) {
665 /// if (__hip_gpubin_handle == 0) {
666 /// __hip_gpubin_handle = __hipRegisterFatBinary(GpuBinaryBlob);
667 /// __hip_register_globals(__hip_gpubin_handle);
668 /// }
669 /// }
670 /// \endcode
671 llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
672  bool IsHIP = CGM.getLangOpts().HIP;
673  bool IsCUDA = CGM.getLangOpts().CUDA;
674  // No need to generate ctors/dtors if there is no GPU binary.
675  StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName;
676  if (CudaGpuBinaryFileName.empty() && !IsHIP)
677  return nullptr;
678  if ((IsHIP || (IsCUDA && !RelocatableDeviceCode)) && EmittedKernels.empty() &&
679  DeviceVars.empty())
680  return nullptr;
681 
682  // void __{cuda|hip}_register_globals(void* handle);
683  llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
684  // We always need a function to pass in as callback. Create a dummy
685  // implementation if we don't need to register anything.
686  if (RelocatableDeviceCode && !RegisterGlobalsFunc)
687  RegisterGlobalsFunc = makeDummyFunction(getRegisterGlobalsFnTy());
688 
689  // void ** __{cuda|hip}RegisterFatBinary(void *);
690  llvm::FunctionCallee RegisterFatbinFunc = CGM.CreateRuntimeFunction(
691  llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false),
692  addUnderscoredPrefixToName("RegisterFatBinary"));
693  // struct { int magic, int version, void * gpu_binary, void * dont_care };
694  llvm::StructType *FatbinWrapperTy =
695  llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy);
696 
697  // Register GPU binary with the CUDA runtime, store returned handle in a
698  // global variable and save a reference in GpuBinaryHandle to be cleaned up
699  // in destructor on exit. Then associate all known kernels with the GPU binary
700  // handle so CUDA runtime can figure out what to call on the GPU side.
701  std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary = nullptr;
702  if (!CudaGpuBinaryFileName.empty()) {
703  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CudaGpuBinaryOrErr =
704  llvm::MemoryBuffer::getFileOrSTDIN(CudaGpuBinaryFileName);
705  if (std::error_code EC = CudaGpuBinaryOrErr.getError()) {
706  CGM.getDiags().Report(diag::err_cannot_open_file)
707  << CudaGpuBinaryFileName << EC.message();
708  return nullptr;
709  }
710  CudaGpuBinary = std::move(CudaGpuBinaryOrErr.get());
711  }
712 
713  llvm::Function *ModuleCtorFunc = llvm::Function::Create(
714  llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
716  addUnderscoredPrefixToName("_module_ctor"), &TheModule);
717  llvm::BasicBlock *CtorEntryBB =
718  llvm::BasicBlock::Create(Context, "entry", ModuleCtorFunc);
719  CGBuilderTy CtorBuilder(CGM, Context);
720 
721  CtorBuilder.SetInsertPoint(CtorEntryBB);
722 
723  const char *FatbinConstantName;
724  const char *FatbinSectionName;
725  const char *ModuleIDSectionName;
726  StringRef ModuleIDPrefix;
727  llvm::Constant *FatBinStr;
728  unsigned FatMagic;
729  if (IsHIP) {
730  FatbinConstantName = ".hip_fatbin";
731  FatbinSectionName = ".hipFatBinSegment";
732 
733  ModuleIDSectionName = "__hip_module_id";
734  ModuleIDPrefix = "__hip_";
735 
736  if (CudaGpuBinary) {
737  // If fatbin is available from early finalization, create a string
738  // literal containing the fat binary loaded from the given file.
739  const unsigned HIPCodeObjectAlign = 4096;
740  FatBinStr =
741  makeConstantString(std::string(CudaGpuBinary->getBuffer()), "",
742  FatbinConstantName, HIPCodeObjectAlign);
743  } else {
744  // If fatbin is not available, create an external symbol
745  // __hip_fatbin in section .hip_fatbin. The external symbol is supposed
746  // to contain the fat binary but will be populated somewhere else,
747  // e.g. by lld through link script.
748  FatBinStr = new llvm::GlobalVariable(
749  CGM.getModule(), CGM.Int8Ty,
750  /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr,
751  "__hip_fatbin", nullptr,
752  llvm::GlobalVariable::NotThreadLocal);
753  cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName);
754  }
755 
756  FatMagic = HIPFatMagic;
757  } else {
758  if (RelocatableDeviceCode)
759  FatbinConstantName = CGM.getTriple().isMacOSX()
760  ? "__NV_CUDA,__nv_relfatbin"
761  : "__nv_relfatbin";
762  else
763  FatbinConstantName =
764  CGM.getTriple().isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin";
765  // NVIDIA's cuobjdump looks for fatbins in this section.
766  FatbinSectionName =
767  CGM.getTriple().isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment";
768 
769  ModuleIDSectionName = CGM.getTriple().isMacOSX()
770  ? "__NV_CUDA,__nv_module_id"
771  : "__nv_module_id";
772  ModuleIDPrefix = "__nv_";
773 
774  // For CUDA, create a string literal containing the fat binary loaded from
775  // the given file.
776  FatBinStr = makeConstantString(std::string(CudaGpuBinary->getBuffer()), "",
777  FatbinConstantName, 8);
778  FatMagic = CudaFatMagic;
779  }
780 
781  // Create initialized wrapper structure that points to the loaded GPU binary
782  ConstantInitBuilder Builder(CGM);
783  auto Values = Builder.beginStruct(FatbinWrapperTy);
784  // Fatbin wrapper magic.
785  Values.addInt(IntTy, FatMagic);
786  // Fatbin version.
787  Values.addInt(IntTy, 1);
788  // Data.
789  Values.add(FatBinStr);
790  // Unused in fatbin v1.
791  Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
792  llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(
793  addUnderscoredPrefixToName("_fatbin_wrapper"), CGM.getPointerAlign(),
794  /*constant*/ true);
795  FatbinWrapper->setSection(FatbinSectionName);
796 
797  // There is only one HIP fat binary per linked module, however there are
798  // multiple constructor functions. Make sure the fat binary is registered
799  // only once. The constructor functions are executed by the dynamic loader
800  // before the program gains control. The dynamic loader cannot execute the
801  // constructor functions concurrently since doing that would not guarantee
802  // thread safety of the loaded program. Therefore we can assume sequential
803  // execution of constructor functions here.
804  if (IsHIP) {
805  auto Linkage = CudaGpuBinary ? llvm::GlobalValue::InternalLinkage :
806  llvm::GlobalValue::LinkOnceAnyLinkage;
807  llvm::BasicBlock *IfBlock =
808  llvm::BasicBlock::Create(Context, "if", ModuleCtorFunc);
809  llvm::BasicBlock *ExitBlock =
810  llvm::BasicBlock::Create(Context, "exit", ModuleCtorFunc);
811  // The name, size, and initialization pattern of this variable is part
812  // of HIP ABI.
813  GpuBinaryHandle = new llvm::GlobalVariable(
814  TheModule, VoidPtrPtrTy, /*isConstant=*/false,
815  Linkage,
816  /*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
817  "__hip_gpubin_handle");
818  GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign());
819  // Prevent the weak symbol in different shared libraries being merged.
821  GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);
822  Address GpuBinaryAddr(
823  GpuBinaryHandle,
824  CharUnits::fromQuantity(GpuBinaryHandle->getAlignment()));
825  {
826  auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
827  llvm::Constant *Zero =
828  llvm::Constant::getNullValue(HandleValue->getType());
829  llvm::Value *EQZero = CtorBuilder.CreateICmpEQ(HandleValue, Zero);
830  CtorBuilder.CreateCondBr(EQZero, IfBlock, ExitBlock);
831  }
832  {
833  CtorBuilder.SetInsertPoint(IfBlock);
834  // GpuBinaryHandle = __hipRegisterFatBinary(&FatbinWrapper);
835  llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
836  RegisterFatbinFunc,
837  CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
838  CtorBuilder.CreateStore(RegisterFatbinCall, GpuBinaryAddr);
839  CtorBuilder.CreateBr(ExitBlock);
840  }
841  {
842  CtorBuilder.SetInsertPoint(ExitBlock);
843  // Call __hip_register_globals(GpuBinaryHandle);
844  if (RegisterGlobalsFunc) {
845  auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
846  CtorBuilder.CreateCall(RegisterGlobalsFunc, HandleValue);
847  }
848  }
849  } else if (!RelocatableDeviceCode) {
850  // Register binary with CUDA runtime. This is substantially different in
851  // default mode vs. separate compilation!
852  // GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
853  llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
854  RegisterFatbinFunc,
855  CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
856  GpuBinaryHandle = new llvm::GlobalVariable(
857  TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
858  llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
859  GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign());
860  CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
861  CGM.getPointerAlign());
862 
863  // Call __cuda_register_globals(GpuBinaryHandle);
864  if (RegisterGlobalsFunc)
865  CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
866 
867  // Call __cudaRegisterFatBinaryEnd(Handle) if this CUDA version needs it.
869  CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) {
870  // void __cudaRegisterFatBinaryEnd(void **);
871  llvm::FunctionCallee RegisterFatbinEndFunc = CGM.CreateRuntimeFunction(
872  llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
873  "__cudaRegisterFatBinaryEnd");
874  CtorBuilder.CreateCall(RegisterFatbinEndFunc, RegisterFatbinCall);
875  }
876  } else {
877  // Generate a unique module ID.
878  SmallString<64> ModuleID;
879  llvm::raw_svector_ostream OS(ModuleID);
880  OS << ModuleIDPrefix << llvm::format("%" PRIx64, FatbinWrapper->getGUID());
881  llvm::Constant *ModuleIDConstant = makeConstantString(
882  std::string(ModuleID.str()), "", ModuleIDSectionName, 32);
883 
884  // Create an alias for the FatbinWrapper that nvcc will look for.
886  Twine("__fatbinwrap") + ModuleID, FatbinWrapper);
887 
888  // void __cudaRegisterLinkedBinary%ModuleID%(void (*)(void *), void *,
889  // void *, void (*)(void **))
890  SmallString<128> RegisterLinkedBinaryName("__cudaRegisterLinkedBinary");
891  RegisterLinkedBinaryName += ModuleID;
892  llvm::FunctionCallee RegisterLinkedBinaryFunc = CGM.CreateRuntimeFunction(
893  getRegisterLinkedBinaryFnTy(), RegisterLinkedBinaryName);
894 
895  assert(RegisterGlobalsFunc && "Expecting at least dummy function!");
896  llvm::Value *Args[] = {RegisterGlobalsFunc,
897  CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy),
898  ModuleIDConstant,
899  makeDummyFunction(getCallbackFnTy())};
900  CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args);
901  }
902 
903  // Create destructor and register it with atexit() the way NVCC does it. Doing
904  // it during regular destructor phase worked in CUDA before 9.2 but results in
905  // double-free in 9.2.
906  if (llvm::Function *CleanupFn = makeModuleDtorFunction()) {
907  // extern "C" int atexit(void (*f)(void));
908  llvm::FunctionType *AtExitTy =
909  llvm::FunctionType::get(IntTy, CleanupFn->getType(), false);
910  llvm::FunctionCallee AtExitFunc =
911  CGM.CreateRuntimeFunction(AtExitTy, "atexit", llvm::AttributeList(),
912  /*Local=*/true);
913  CtorBuilder.CreateCall(AtExitFunc, CleanupFn);
914  }
915 
916  CtorBuilder.CreateRetVoid();
917  return ModuleCtorFunc;
918 }
919 
920 /// Creates a global destructor function that unregisters the GPU code blob
921 /// registered by constructor.
922 ///
923 /// For CUDA:
924 /// \code
925 /// void __cuda_module_dtor(void*) {
926 /// __cudaUnregisterFatBinary(Handle);
927 /// }
928 /// \endcode
929 ///
930 /// For HIP:
931 /// \code
932 /// void __hip_module_dtor(void*) {
933 /// if (__hip_gpubin_handle) {
934 /// __hipUnregisterFatBinary(__hip_gpubin_handle);
935 /// __hip_gpubin_handle = 0;
936 /// }
937 /// }
938 /// \endcode
939 llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
940  // No need for destructor if we don't have a handle to unregister.
941  if (!GpuBinaryHandle)
942  return nullptr;
943 
944  // void __cudaUnregisterFatBinary(void ** handle);
945  llvm::FunctionCallee UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
946  llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
947  addUnderscoredPrefixToName("UnregisterFatBinary"));
948 
949  llvm::Function *ModuleDtorFunc = llvm::Function::Create(
950  llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
952  addUnderscoredPrefixToName("_module_dtor"), &TheModule);
953 
954  llvm::BasicBlock *DtorEntryBB =
955  llvm::BasicBlock::Create(Context, "entry", ModuleDtorFunc);
956  CGBuilderTy DtorBuilder(CGM, Context);
957  DtorBuilder.SetInsertPoint(DtorEntryBB);
958 
959  Address GpuBinaryAddr(GpuBinaryHandle, CharUnits::fromQuantity(
960  GpuBinaryHandle->getAlignment()));
961  auto HandleValue = DtorBuilder.CreateLoad(GpuBinaryAddr);
962  // There is only one HIP fat binary per linked module, however there are
963  // multiple destructor functions. Make sure the fat binary is unregistered
964  // only once.
965  if (CGM.getLangOpts().HIP) {
966  llvm::BasicBlock *IfBlock =
967  llvm::BasicBlock::Create(Context, "if", ModuleDtorFunc);
968  llvm::BasicBlock *ExitBlock =
969  llvm::BasicBlock::Create(Context, "exit", ModuleDtorFunc);
970  llvm::Constant *Zero = llvm::Constant::getNullValue(HandleValue->getType());
971  llvm::Value *NEZero = DtorBuilder.CreateICmpNE(HandleValue, Zero);
972  DtorBuilder.CreateCondBr(NEZero, IfBlock, ExitBlock);
973 
974  DtorBuilder.SetInsertPoint(IfBlock);
975  DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
976  DtorBuilder.CreateStore(Zero, GpuBinaryAddr);
977  DtorBuilder.CreateBr(ExitBlock);
978 
979  DtorBuilder.SetInsertPoint(ExitBlock);
980  } else {
981  DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
982  }
983  DtorBuilder.CreateRetVoid();
984  return ModuleDtorFunc;
985 }
986 
988  return new CGNVCUDARuntime(CGM);
989 }
990 
991 void CGNVCUDARuntime::internalizeDeviceSideVar(
992  const VarDecl *D, llvm::GlobalValue::LinkageTypes &Linkage) {
993  // For -fno-gpu-rdc, host-side shadows of external declarations of device-side
994  // global variables become internal definitions. These have to be internal in
995  // order to prevent name conflicts with global host variables with the same
996  // name in a different TUs.
997  //
998  // For -fgpu-rdc, the shadow variables should not be internalized because
999  // they may be accessed by different TU.
1000  if (CGM.getLangOpts().GPURelocatableDeviceCode)
1001  return;
1002 
1003  // __shared__ variables are odd. Shadows do get created, but
1004  // they are not registered with the CUDA runtime, so they
1005  // can't really be used to access their device-side
1006  // counterparts. It's not clear yet whether it's nvcc's bug or
1007  // a feature, but we've got to do the same for compatibility.
1008  if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
1009  D->hasAttr<CUDASharedAttr>() ||
1013  }
1014 }
1015 
1016 void CGNVCUDARuntime::handleVarRegistration(const VarDecl *D,
1017  llvm::GlobalVariable &GV) {
1018  if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) {
1019  // Shadow variables and their properties must be registered with CUDA
1020  // runtime. Skip Extern global variables, which will be registered in
1021  // the TU where they are defined.
1022  //
1023  // Don't register a C++17 inline variable. The local symbol can be
1024  // discarded and referencing a discarded local symbol from outside the
1025  // comdat (__cuda_register_globals) is disallowed by the ELF spec.
1026  //
1027  // HIP managed variables need to be always recorded in device and host
1028  // compilations for transformation.
1029  //
1030  // HIP managed variables and variables in CUDADeviceVarODRUsedByHost are
1031  // added to llvm.compiler-used, therefore they are safe to be registered.
1032  if ((!D->hasExternalStorage() && !D->isInline()) ||
1033  CGM.getContext().CUDADeviceVarODRUsedByHost.contains(D) ||
1034  D->hasAttr<HIPManagedAttr>()) {
1035  registerDeviceVar(D, GV, !D->hasDefinition(),
1036  D->hasAttr<CUDAConstantAttr>());
1037  }
1038  } else if (D->getType()->isCUDADeviceBuiltinSurfaceType() ||
1040  // Builtin surfaces and textures and their template arguments are
1041  // also registered with CUDA runtime.
1042  const auto *TD = cast<ClassTemplateSpecializationDecl>(
1043  D->getType()->castAs<RecordType>()->getDecl());
1044  const TemplateArgumentList &Args = TD->getTemplateArgs();
1045  if (TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) {
1046  assert(Args.size() == 2 &&
1047  "Unexpected number of template arguments of CUDA device "
1048  "builtin surface type.");
1049  auto SurfType = Args[1].getAsIntegral();
1050  if (!D->hasExternalStorage())
1051  registerDeviceSurf(D, GV, !D->hasDefinition(), SurfType.getSExtValue());
1052  } else {
1053  assert(Args.size() == 3 &&
1054  "Unexpected number of template arguments of CUDA device "
1055  "builtin texture type.");
1056  auto TexType = Args[1].getAsIntegral();
1057  auto Normalized = Args[2].getAsIntegral();
1058  if (!D->hasExternalStorage())
1059  registerDeviceTex(D, GV, !D->hasDefinition(), TexType.getSExtValue(),
1060  Normalized.getZExtValue());
1061  }
1062  }
1063 }
1064 
1065 // Transform managed variables to pointers to managed variables in device code.
1066 // Each use of the original managed variable is replaced by a load from the
1067 // transformed managed variable. The transformed managed variable contains
1068 // the address of managed memory which will be allocated by the runtime.
1069 void CGNVCUDARuntime::transformManagedVars() {
1070  for (auto &&Info : DeviceVars) {
1071  llvm::GlobalVariable *Var = Info.Var;
1072  if (Info.Flags.getKind() == DeviceVarFlags::Variable &&
1073  Info.Flags.isManaged()) {
1074  auto ManagedVar = new llvm::GlobalVariable(
1075  CGM.getModule(), Var->getType(),
1076  /*isConstant=*/false, Var->getLinkage(),
1077  /*Init=*/Var->isDeclaration()
1078  ? nullptr
1079  : llvm::ConstantPointerNull::get(Var->getType()),
1080  /*Name=*/"", /*InsertBefore=*/nullptr,
1081  llvm::GlobalVariable::NotThreadLocal,
1082  CGM.getContext().getTargetAddressSpace(LangAS::cuda_device));
1083  ManagedVar->setDSOLocal(Var->isDSOLocal());
1084  ManagedVar->setVisibility(Var->getVisibility());
1085  ManagedVar->setExternallyInitialized(true);
1086  replaceManagedVar(Var, ManagedVar);
1087  ManagedVar->takeName(Var);
1088  Var->setName(Twine(ManagedVar->getName()) + ".managed");
1089  // Keep managed variables even if they are not used in device code since
1090  // they need to be allocated by the runtime.
1091  if (!Var->isDeclaration()) {
1092  assert(!ManagedVar->isDeclaration());
1093  CGM.addCompilerUsedGlobal(Var);
1094  CGM.addCompilerUsedGlobal(ManagedVar);
1095  }
1096  }
1097  }
1098 }
1099 
1100 // Returns module constructor to be added.
1101 llvm::Function *CGNVCUDARuntime::finalizeModule() {
1102  if (CGM.getLangOpts().CUDAIsDevice) {
1103  transformManagedVars();
1104 
1105  // Mark ODR-used device variables as compiler used to prevent it from being
1106  // eliminated by optimization. This is necessary for device variables
1107  // ODR-used by host functions. Sema correctly marks them as ODR-used no
1108  // matter whether they are ODR-used by device or host functions.
1109  //
1110  // We do not need to do this if the variable has used attribute since it
1111  // has already been added.
1112  //
1113  // Static device variables have been externalized at this point, therefore
1114  // variables with LLVM private or internal linkage need not be added.
1115  for (auto &&Info : DeviceVars) {
1116  auto Kind = Info.Flags.getKind();
1117  if (!Info.Var->isDeclaration() &&
1118  !llvm::GlobalValue::isLocalLinkage(Info.Var->getLinkage()) &&
1119  (Kind == DeviceVarFlags::Variable ||
1120  Kind == DeviceVarFlags::Surface ||
1121  Kind == DeviceVarFlags::Texture) &&
1122  Info.D->isUsed() && !Info.D->hasAttr<UsedAttr>()) {
1123  CGM.addCompilerUsedGlobal(Info.Var);
1124  }
1125  }
1126  return nullptr;
1127  }
1128  return makeModuleCtorFunction();
1129 }
1130 
1131 llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F,
1132  GlobalDecl GD) {
1133  auto Loc = KernelHandles.find(F);
1134  if (Loc != KernelHandles.end())
1135  return Loc->second;
1136 
1137  if (!CGM.getLangOpts().HIP) {
1138  KernelHandles[F] = F;
1139  KernelStubs[F] = F;
1140  return F;
1141  }
1142 
1143  auto *Var = new llvm::GlobalVariable(
1144  TheModule, F->getType(), /*isConstant=*/true, F->getLinkage(),
1145  /*Initializer=*/nullptr,
1146  CGM.getMangledName(
1147  GD.getWithKernelReferenceKind(KernelReferenceKind::Kernel)));
1148  Var->setAlignment(CGM.getPointerAlign().getAsAlign());
1149  Var->setDSOLocal(F->isDSOLocal());
1150  Var->setVisibility(F->getVisibility());
1151  KernelHandles[F] = Var;
1152  KernelStubs[Var] = F;
1153  return Var;
1154 }
clang::InternalLinkage
@ InternalLinkage
Internal linkage, which indicates that the entity can be referred to from within the translation unit...
Definition: Linkage.h:31
clang::CodeGen::CodeGenTypeCache::SizeTy
llvm::IntegerType * SizeTy
Definition: CodeGenTypeCache.h:50
clang::HiddenVisibility
@ HiddenVisibility
Objects with "hidden" visibility are not seen by the dynamic linker.
Definition: Visibility.h:36
clang::CodeGen::CGCUDARuntime
Definition: CGCUDARuntime.h:41
clang::CharUnits::getAsAlign
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:183
clang::CodeGen::CodeGenTypeCache::IntTy
llvm::IntegerType * IntTy
int
Definition: CodeGenTypeCache.h:42
clang::CodeGen::CodeGenModule::CreateRuntimeFunction
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
Definition: CodeGenModule.cpp:3790
clang::CodeGen::ConstantInitBuilder
The standard implementation of ConstantInitBuilder used in Clang.
Definition: ConstantInitBuilder.h:535
CodeGenFunction.h
string
string(SUBSTRING ${CMAKE_CURRENT_BINARY_DIR} 0 ${PATH_LIB_START} PATH_HEAD) string(SUBSTRING $
Definition: CMakeLists.txt:22
clang::DeclContext
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition: DeclBase.h:1347
clang::CodeGen::CodeGenFunction::EmitCall
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **callOrInvoke, bool IsMustTail, SourceLocation Loc)
EmitCall - Generate a call of the given function, expecting the given result type,...
Definition: CGCall.cpp:4616
clang::Decl::hasAttr
bool hasAttr() const
Definition: DeclBase.h:547
clang::CodeGen::CodeGenTypeCache::getSizeAlign
CharUnits getSizeAlign() const
Definition: CodeGenTypeCache.h:111
clang::ASTContext::shouldExternalizeStaticVar
bool shouldExternalizeStaticVar(const Decl *D) const
Whether a C++ static variable should be externalized.
Definition: ASTContext.cpp:11797
llvm::SmallVector
Definition: LLVM.h:38
clang::IdentifierTable::get
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
Definition: IdentifierTable.h:592
InitDeviceMC
static std::unique_ptr< MangleContext > InitDeviceMC(CodeGenModule &CGM)
Definition: CGCUDANV.cpp:194
clang::TargetCXXABI::isMicrosoft
bool isMicrosoft() const
Is this ABI an MSVC-compatible ABI?
Definition: TargetCXXABI.h:138
clang::CodeGen::CGCXXABI::getMangleContext
MangleContext & getMangleContext()
Gets the mangle context.
Definition: CGCXXABI.h:97
clang::NamedDecl
This represents a decl that may have a name.
Definition: Decl.h:249
clang::ASTContext::CUDADeviceVarODRUsedByHost
llvm::DenseSet< const VarDecl * > CUDADeviceVarODRUsedByHost
Keep track of CUDA/HIP device-side variables ODR-used by host code.
Definition: ASTContext.h:1140
clang::GlobalDecl::getWithKernelReferenceKind
GlobalDecl getWithKernelReferenceKind(KernelReferenceKind Kind)
Definition: GlobalDecl.h:189
clang::CodeGen::CodeGenTypeCache::getPointerAlign
CharUnits getPointerAlign() const
Definition: CodeGenTypeCache.h:117
clang::QualType
A (possibly-)qualified type.
Definition: Type.h:673
clang::FunctionDecl::getParamDecl
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2506
clang::CodeGen::CodeGenModule::getContext
ASTContext & getContext() const
Definition: CodeGenModule.h:702
clang::QualType::getCanonicalType
QualType getCanonicalType() const
Definition: Type.h:6464
clang::TargetInfo::getCXXABI
TargetCXXABI getCXXABI() const
Get the C++ ABI currently in use.
Definition: TargetInfo.h:1203
clang::TargetInfo::getSDKVersion
const llvm::VersionTuple & getSDKVersion() const
Definition: TargetInfo.h:1562
clang::ParmVarDecl
Represents a parameter to a function.
Definition: Decl.h:1665
clang::TargetCXXABI::isItaniumFamily
bool isItaniumFamily() const
Does this ABI generally fall into the Itanium family of ABIs?
Definition: TargetCXXABI.h:124
clang::ASTContext::getAuxTargetInfo
const TargetInfo * getAuxTargetInfo() const
Definition: ASTContext.h:748
clang::CodeGen::CodeGenFunction::createBasicBlock
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
Definition: CodeGenFunction.h:2394
clang::ASTContext::createMangleContext
MangleContext * createMangleContext(const TargetInfo *T=nullptr)
If T is null pointer, assume the target in ASTContext.
Definition: ASTContext.cpp:11192
clang::CodeGen::CodeGenModule::getLangOpts
const LangOptions & getLangOpts() const
Definition: CodeGenModule.h:703
replaceManagedVar
static void replaceManagedVar(llvm::GlobalVariable *Var, llvm::GlobalVariable *ManagedVar)
Definition: CGCUDANV.cpp:449
clang::CodeGen::CodeGenFunction::CreateTempAlloca
llvm::AllocaInst * CreateTempAlloca(llvm::Type *Ty, const Twine &Name="tmp", llvm::Value *ArraySize=nullptr)
CreateTempAlloca - This creates an alloca and inserts it into the entry block if ArraySize is nullptr...
Definition: CGExpr.cpp:110
clang::CodeGen::CGBuilderTy
Definition: CGBuilder.h:43
clang::CodeGen::CodeGenModule::printPostfixForExternalizedStaticVar
void printPostfixForExternalizedStaticVar(llvm::raw_ostream &OS) const
Print the postfix for externalized static variable for single source offloading languages CUDA and HI...
Definition: CodeGenModule.cpp:6475
clang::ASTContext::getTranslationUnitDecl
TranslationUnitDecl * getTranslationUnitDecl() const
Definition: ASTContext.h:1057
clang::CodeGen::CallArgList::add
void add(RValue rvalue, QualType type)
Definition: CGCall.h:288
clang::CodeGen::CodeGenFunction::Builder
CGBuilderTy Builder
Definition: CodeGenFunction.h:274
CodeGenABITypes.h
clang::CodeGen::CodeGenTypes::ConvertType
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
Definition: CodeGenTypes.cpp:395
clang::CodeGen::CodeGenModule::getMangledName
StringRef getMangledName(GlobalDecl GD)
Definition: CodeGenModule.cpp:1346
clang::Type
The base class of the type hierarchy.
Definition: Type.h:1490
Decl.h
clang::CodeGen::CodeGenFunction::GetAddrOfLocalVar
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
Definition: CodeGenFunction.h:2679
Offset
unsigned Offset
Definition: Format.cpp:2335
clang::GlobalDecl
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:55
U
clang::Type::isCUDADeviceBuiltinTextureType
bool isCUDADeviceBuiltinTextureType() const
Check if the type is the CUDA device builtin texture type.
Definition: Type.cpp:4329
clang::CodeGen::ReturnValueSlot
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition: CGCall.h:362
clang::VarDecl::isInline
bool isInline() const
Whether this variable is (C++1z) inline.
Definition: Decl.h:1461
clang::TranslationUnitDecl
The top declaration context.
Definition: Decl.h:82
clang::RecordType
A helper class that allows the use of isa/cast/dyncast to detect TagType objects of structs/unions/cl...
Definition: Type.h:4613
clang::MangleContext
MangleContext - Context for tracking state which persists across multiple calls to the C++ name mangl...
Definition: Mangle.h:44
clang::Linkage
Linkage
Describes the different kinds of linkage (C++ [basic.link], C99 6.2.2) that an entity may have.
Definition: Linkage.h:23
clang::XRayInstrKind::None
constexpr XRayInstrMask None
Definition: XRayInstr.h:38
clang::CodeGen::CodeGenModule::getCodeGenOpts
const CodeGenOptions & getCodeGenOpts() const
Definition: CodeGenModule.h:708
clang::CodeGen::CreateNVCUDARuntime
CGCUDARuntime * CreateNVCUDARuntime(CodeGenModule &CGM)
Creates an instance of a CUDA runtime class.
Definition: CGCUDANV.cpp:987
clang::CodeGen::Address::getType
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:43
clang::ASTContext
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:212
clang::MangleContext::mangleName
void mangleName(GlobalDecl GD, raw_ostream &)
Definition: Mangle.cpp:141
clang::CodeGen::CodeGenTypeCache::VoidTy
llvm::Type * VoidTy
void
Definition: CodeGenTypeCache.h:34
clang::CudaFeatureEnabled
bool CudaFeatureEnabled(llvm::VersionTuple, CudaFeature)
Definition: Cuda.cpp:249
clang::CodeGen::CodeGenModule::getModule
llvm::Module & getModule() const
Definition: CodeGenModule.h:709
clang::VarDecl::hasExternalStorage
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition: Decl.h:1134
llvm::SmallString
Definition: LLVM.h:37
clang::interp::Zero
bool Zero(InterpState &S, CodePtr OpPC)
Definition: Interp.h:814
clang::VarDecl
Represents a variable declaration or definition.
Definition: Decl.h:876
clang::CodeGen::CodeGenModule::getTypes
CodeGenTypes & getTypes()
Definition: CodeGenModule.h:726
clang::KernelReferenceKind::Kernel
@ Kernel
clang::CodeGen::Address
An aligned address.
Definition: Address.h:24
CodeGenModule.h
clang::CodeGen::CodeGenModule::getTarget
const TargetInfo & getTarget() const
Definition: CodeGenModule.h:714
clang::ASTContext::getTargetAddressSpace
unsigned getTargetAddressSpace(QualType T) const
Definition: ASTContext.h:2720
clang::CodeGen::CodeGenModule::getDataLayout
const llvm::DataLayout & getDataLayout() const
Definition: CodeGenModule.h:711
clang::Type::castAs
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:7227
clang::serialized_diags::create
std::unique_ptr< DiagnosticConsumer > create(StringRef OutputFile, DiagnosticOptions *Diags, bool MergeChildRecords=false)
Returns a DiagnosticConsumer that serializes diagnostics to a bitcode file.
Definition: SerializedDiagnosticPrinter.cpp:302
clang::ASTContext::VoidPtrTy
CanQualType VoidPtrTy
Definition: ASTContext.h:1102
clang::CodeGen::Address::getPointer
llvm::Value * getPointer() const
Definition: Address.h:37
clang::CodeGen::CodeGenFunction
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
Definition: CodeGenFunction.h:235
clang::NamedDecl::getIdentifier
IdentifierInfo * getIdentifier() const
Get the identifier that names this declaration, if there is one.
Definition: Decl.h:270
clang::CodeGen::CodeGenModule::getCXXABI
CGCXXABI & getCXXABI() const
Definition: CodeGenModule.h:719
clang::ASTContext::Idents
IdentifierTable & Idents
Definition: ASTContext.h:648
clang::MangleContext::shouldMangleDeclName
bool shouldMangleDeclName(const NamedDecl *D)
Definition: Mangle.cpp:107
clang::CodeGen::CodeGenModule
This class organizes the cross-function state that is used while generating LLVM code.
Definition: CodeGenModule.h:284
Value
Value
Definition: UninitializedValues.cpp:102
clang::Decl
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:89
clang::CodeGen::CodeGenModule::getTriple
const llvm::Triple & getTriple() const
Definition: CodeGenModule.h:715
clang::ASTContext::createDeviceMangleContext
MangleContext * createDeviceMangleContext(const TargetInfo &T)
Creates a device mangle context to correctly mangle lambdas in a mixed architecture compile by settin...
Definition: ASTContext.cpp:11213
clang::CodeGen::FunctionArgList
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:358
clang::ToCudaVersion
CudaVersion ToCudaVersion(llvm::VersionTuple)
Definition: Cuda.cpp:212
clang::ASTContext::CharTy
CanQualType CharTy
Definition: ASTContext.h:1077
clang::CodeGen::CodeGenTypes
This class organizes the cross-module state that is used while lowering AST types to LLVM types.
Definition: CodeGenTypes.h:59
clang::TemplateArgumentList
A template argument list.
Definition: DeclTemplate.h:236
clang::LangOptions::CUID
std::string CUID
The user provided compilation unit ID, if non-empty.
Definition: LangOptions.h:372
clang::IdentifierInfo
One of these records is kept for each identifier that is lexed.
Definition: IdentifierTable.h:84
clang::ASTContext::getTargetInfo
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:747
clang::CodeGen::CodeGenFunction::EmitRuntimeCallOrInvoke
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")
Emits a call or invoke instruction to the given runtime function.
Definition: CGCall.cpp:4473
clang::ObjCPropertyAttribute::Kind
Kind
Definition: DeclObjCCommon.h:22
clang::CodeGen::CGFunctionInfo
CGFunctionInfo - Class to encapsulate the information about a function definition.
Definition: CGFunctionInfo.h:546
CGCXXABI.h
Cuda.h
clang::IdentifierInfo::getName
StringRef getName() const
Return the actual identifier string.
Definition: IdentifierTable.h:195
clang::CodeGen::CodeGenFunction::CurFn
llvm::Function * CurFn
Definition: CodeGenFunction.h:330
clang
Definition: CalledOnceCheck.h:17
clang::DeclContext::lookup
lookup_result lookup(DeclarationName Name) const
lookup - Find the declarations (if any) with the given Name in this context.
Definition: DeclBase.cpp:1645
clang::CodeGen::CodeGenModule::addCompilerUsedGlobal
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
Definition: CodeGenModule.cpp:2209
clang::ExternalLinkage
@ ExternalLinkage
External linkage, which indicates that the entity can be referred to from other translation units.
Definition: Linkage.h:59
CGCUDARuntime.h
clang::CodeGen::CodeGenTypes::arrangeFunctionDeclaration
const CGFunctionInfo & arrangeFunctionDeclaration(const FunctionDecl *FD)
Free functions are functions that are compatible with an ordinary C function pointer type.
Definition: CGCall.cpp:438
clang::CodeGenOptions::CudaGpuBinaryFileName
std::string CudaGpuBinaryFileName
Name of file passed with -fcuda-include-gpubinary option to forward to CUDA runtime back-end for inco...
Definition: CodeGenOptions.h:269
clang::CodeGen::CodeGenModule::getDiags
DiagnosticsEngine & getDiags() const
Definition: CodeGenModule.h:710
clang::RecordType::getDecl
RecordDecl * getDecl() const
Definition: Type.h:4623
clang::CodeGen::CGBuilderTy::CreateDefaultAlignedStore
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Definition: CGBuilder.h:109
clang::CharUnits
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
clang::ASTContext::getTypeInfoInChars
TypeInfoChars getTypeInfoInChars(const Type *T) const
Definition: ASTContext.cpp:1874
clang::ValueDecl::getType
QualType getType() const
Definition: Decl.h:687
clang::CodeGen::CodeGenFunction::CreateMemTemp
Address CreateMemTemp(QualType T, const Twine &Name="tmp", Address *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition: CGExpr.cpp:135
clang::CodeGen::CodeGenTypeCache::Int8Ty
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
Definition: CodeGenTypeCache.h:37
clang::Decl::getLocation
SourceLocation getLocation() const
Definition: DeclBase.h:430
clang::CodeGen::CallArgList
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:264
clang::TemplateArgumentList::size
unsigned size() const
Retrieve the number of template arguments in this template argument list.
Definition: DeclTemplate.h:296
clang::FunctionDecl
Represents a function declaration or definition.
Definition: Decl.h:1856
clang::Type::isCUDADeviceBuiltinSurfaceType
bool isCUDADeviceBuiltinSurfaceType() const
Check if the type is the CUDA device builtin surface type.
Definition: Type.cpp:4322
clang::CodeGen::CodeGenFunction::EmitBlock
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition: CGStmt.cpp:529
clang::CodeGen::CodeGenModule::Error
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
Definition: CodeGenModule.cpp:952
clang::CodeGen::CodeGenFunction::getLangOpts
const LangOptions & getLangOpts() const
Definition: CodeGenFunction.h:1960
ConstantInitBuilder.h
clang::VarDecl::hasDefinition
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition: Decl.cpp:2259
clang::ento::ObjKind::OS
@ OS
Indicates that the tracking object is a descendant of a referenced-counted OSObject,...
clang::CodeGen::CodeGenFunction::CurFuncDecl
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
Definition: CodeGenFunction.h:325
clang::DiagnosticsEngine::Report
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1523
clang::CodeGen::CGBuilderTy::CreateLoad
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:68
clang::CodeGen::CodeGenFunction::EmitBranch
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
Definition: CGStmt.cpp:549