23#include "llvm/ADT/StringRef.h"
24#include "llvm/Frontend/Offloading/Utility.h"
25#include "llvm/IR/BasicBlock.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/DerivedTypes.h"
28#include "llvm/IR/ReplaceConstant.h"
29#include "llvm/Support/Format.h"
30#include "llvm/Support/VirtualFileSystem.h"
31#include "llvm/Transforms/Utils/ModuleUtils.h"
37constexpr unsigned CudaFatMagic = 0x466243b1;
38constexpr unsigned HIPFatMagic = 0x48495046;
46 llvm::IntegerType *IntTy, *SizeTy;
48 llvm::PointerType *PtrTy;
51 llvm::LLVMContext &Context;
53 llvm::Module &TheModule;
56 llvm::Function *Kernel;
59 llvm::SmallVector<KernelInfo, 16> EmittedKernels;
63 llvm::DenseMap<StringRef, llvm::GlobalValue *> KernelHandles;
65 llvm::DenseMap<llvm::GlobalValue *, llvm::Function *> KernelStubs;
67 llvm::GlobalVariable *Var;
71 llvm::SmallVector<VarInfo, 16> DeviceVars;
75 llvm::GlobalVariable *GpuBinaryHandle =
nullptr;
80 llvm::GlobalVariable *OffloadProfShadow =
nullptr;
82 bool RelocatableDeviceCode;
84 std::unique_ptr<MangleContext> DeviceMC;
86 llvm::FunctionCallee getSetupArgumentFn()
const;
87 llvm::FunctionCallee getLaunchFn()
const;
89 llvm::FunctionType *getRegisterGlobalsFnTy()
const;
90 llvm::FunctionType *getCallbackFnTy()
const;
91 llvm::FunctionType *getRegisterLinkedBinaryFnTy()
const;
92 std::string addPrefixToName(StringRef FuncName)
const;
93 std::string addUnderscoredPrefixToName(StringRef FuncName)
const;
96 llvm::Function *makeRegisterGlobalsFn();
101 llvm::Constant *makeConstantString(
const std::string &Str,
102 const std::string &Name =
"") {
103 return CGM.GetAddrOfConstantCString(Str, Name).getPointer();
109 llvm::Constant *makeConstantArray(StringRef Str,
111 StringRef SectionName =
"",
112 unsigned Alignment = 0,
113 bool AddNull =
false) {
114 llvm::Constant *
Value =
115 llvm::ConstantDataArray::getString(Context, Str, AddNull);
116 auto *GV =
new llvm::GlobalVariable(
117 TheModule,
Value->getType(),
true,
118 llvm::GlobalValue::PrivateLinkage,
Value, Name);
119 if (!SectionName.empty()) {
120 GV->setSection(SectionName);
123 GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
126 GV->setAlignment(llvm::Align(Alignment));
131 llvm::Function *makeDummyFunction(llvm::FunctionType *FnTy) {
132 assert(FnTy->getReturnType()->isVoidTy() &&
133 "Can only generate dummy functions returning void!");
134 llvm::Function *DummyFunc = llvm::Function::Create(
135 FnTy, llvm::GlobalValue::InternalLinkage,
"dummy", &TheModule);
137 llvm::BasicBlock *DummyBlock =
138 llvm::BasicBlock::Create(Context,
"", DummyFunc);
139 CGBuilderTy FuncBuilder(CGM, Context);
140 FuncBuilder.SetInsertPoint(DummyBlock);
141 FuncBuilder.CreateRetVoid();
146 Address prepareKernelArgs(CodeGenFunction &CGF, FunctionArgList &Args);
147 Address prepareKernelArgsLLVMOffload(CodeGenFunction &CGF,
148 FunctionArgList &Args);
149 void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args);
150 void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args);
151 std::string getDeviceSideName(
const NamedDecl *ND)
override;
153 void registerDeviceVar(
const VarDecl *VD, llvm::GlobalVariable &Var,
155 DeviceVars.push_back({&Var,
157 {DeviceVarFlags::Variable, Extern,
Constant,
158 VD->hasAttr<HIPManagedAttr>(),
161 void registerDeviceSurf(
const VarDecl *VD, llvm::GlobalVariable &Var,
162 bool Extern,
int Type) {
163 DeviceVars.push_back({&Var,
165 {DeviceVarFlags::Surface, Extern,
false,
169 void registerDeviceTex(
const VarDecl *VD, llvm::GlobalVariable &Var,
170 bool Extern,
int Type,
bool Normalized) {
171 DeviceVars.push_back({&Var,
173 {DeviceVarFlags::Texture, Extern,
false,
174 false, Normalized,
Type}});
178 llvm::Function *makeModuleCtorFunction();
180 llvm::Function *makeModuleDtorFunction();
182 void transformManagedVars();
184 void createOffloadingEntries();
191 void emitOffloadProfilingSections();
194 CGNVCUDARuntime(CodeGenModule &CGM);
196 llvm::GlobalValue *getKernelHandle(llvm::Function *F, GlobalDecl GD)
override;
197 llvm::Function *getKernelStub(llvm::GlobalValue *Handle)
override {
198 auto Loc = KernelStubs.find(Handle);
199 assert(Loc != KernelStubs.end());
202 void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args)
override;
203 void handleVarRegistration(
const VarDecl *VD,
204 llvm::GlobalVariable &Var)
override;
206 internalizeDeviceSideVar(
const VarDecl *D,
207 llvm::GlobalValue::LinkageTypes &
Linkage)
override;
209 llvm::Function *finalizeModule()
override;
214std::string CGNVCUDARuntime::addPrefixToName(StringRef FuncName)
const {
215 return (Prefix + FuncName).str();
218CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName)
const {
219 return (
"__" + Prefix + FuncName).str();
229 return std::unique_ptr<MangleContext>(
238CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
239 : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
240 TheModule(CGM.getModule()),
241 RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode),
256llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn()
const {
258 llvm::Type *Params[] = {PtrTy, SizeTy, SizeTy};
260 llvm::FunctionType::get(IntTy, Params,
false),
261 addPrefixToName(
"SetupArgument"));
264llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn()
const {
268 llvm::FunctionType::get(IntTy, PtrTy,
false),
"hipLaunchByPtr");
275llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy()
const {
276 return llvm::FunctionType::get(VoidTy, PtrTy,
false);
279llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy()
const {
280 return llvm::FunctionType::get(VoidTy, PtrTy,
false);
283llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy()
const {
284 llvm::Type *Params[] = {llvm::PointerType::getUnqual(Context), PtrTy, PtrTy,
285 llvm::PointerType::getUnqual(Context)};
286 return llvm::FunctionType::get(VoidTy, Params,
false);
289std::string CGNVCUDARuntime::getDeviceSideName(
const NamedDecl *ND) {
292 if (
auto *FD = dyn_cast<FunctionDecl>(ND))
293 GD = GlobalDecl(FD, KernelReferenceKind::Kernel);
296 std::string DeviceSideName;
303 SmallString<256> Buffer;
304 llvm::raw_svector_ostream
Out(Buffer);
306 DeviceSideName = std::string(
Out.str());
313 SmallString<256> Buffer;
314 llvm::raw_svector_ostream
Out(Buffer);
315 Out << DeviceSideName;
317 DeviceSideName = std::string(
Out.str());
319 return DeviceSideName;
322void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
323 FunctionArgList &Args) {
326 dyn_cast<llvm::GlobalVariable>(KernelHandles[CGF.
CurFn->getName()])) {
327 GV->setLinkage(CGF.
CurFn->getLinkage());
328 GV->setInitializer(CGF.
CurFn);
331 CudaFeature::CUDA_USES_NEW_LAUNCH) ||
334 emitDeviceStubBodyNew(CGF, Args);
336 emitDeviceStubBodyLegacy(CGF, Args);
346Address CGNVCUDARuntime::prepareKernelArgsLLVMOffload(CodeGenFunction &CGF,
347 FunctionArgList &Args) {
348 SmallVector<llvm::Type *> ArgTypes, KernelLaunchParamsTypes;
349 for (
auto &Arg : Args)
351 llvm::StructType *KernelArgsTy = llvm::StructType::create(ArgTypes);
353 auto *Int64Ty = CGF.
Builder.getInt64Ty();
354 KernelLaunchParamsTypes.push_back(Int64Ty);
355 KernelLaunchParamsTypes.push_back(PtrTy);
356 KernelLaunchParamsTypes.push_back(PtrTy);
358 llvm::StructType *KernelLaunchParamsTy =
359 llvm::StructType::create(KernelLaunchParamsTypes);
364 "kernel_launch_params");
366 auto KernelArgsSize = CGM.
getDataLayout().getTypeAllocSize(KernelArgsTy);
374 for (
unsigned i = 0; i < Args.size(); ++i) {
379 return KernelLaunchParams;
382Address CGNVCUDARuntime::prepareKernelArgs(CodeGenFunction &CGF,
383 FunctionArgList &Args) {
389 llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size())));
391 for (
unsigned i = 0; i < Args.size(); ++i) {
393 llvm::Value *VoidVarPtr = CGF.
Builder.CreatePointerCast(VarPtr, PtrTy);
395 VoidVarPtr, CGF.
Builder.CreateConstGEP1_32(
403void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
404 FunctionArgList &Args) {
407 ? prepareKernelArgsLLVMOffload(CGF, Args)
408 : prepareKernelArgs(CGF, Args);
422 TranslationUnitDecl *TUDecl = CGM.
getContext().getTranslationUnitDecl();
424 std::string KernelLaunchAPI =
"LaunchKernel";
426 LangOptions::GPUDefaultStreamKind::PerThread) {
428 KernelLaunchAPI = KernelLaunchAPI +
"_spt";
430 KernelLaunchAPI = KernelLaunchAPI +
"_ptsz";
432 auto LaunchKernelName = addPrefixToName(KernelLaunchAPI);
433 const IdentifierInfo &cudaLaunchKernelII =
435 FunctionDecl *cudaLaunchKernelFD =
nullptr;
437 if (FunctionDecl *FD = dyn_cast<FunctionDecl>(
Result))
438 cudaLaunchKernelFD = FD;
441 if (cudaLaunchKernelFD ==
nullptr) {
443 "Can't find declaration for " + LaunchKernelName);
447 ParmVarDecl *GridDimParam = cudaLaunchKernelFD->
getParamDecl(1);
448 QualType Dim3Ty = GridDimParam->
getType();
458 llvm::FunctionType::get(IntTy,
464 addUnderscoredPrefixToName(
"PopCallConfiguration"));
473 CGF.
Builder.CreatePointerCast(KernelHandles[CGF.
CurFn->getName()], PtrTy);
474 CallArgList LaunchKernelArgs;
486 QualType QT = cudaLaunchKernelFD->
getType();
491 const CGFunctionInfo &FI =
493 llvm::FunctionCallee cudaLaunchKernelFn =
503 llvm::Function *KernelFunction = llvm::cast<llvm::Function>(
Kernel);
504 std::string GlobalVarName = (KernelFunction->getName() +
".id").str();
506 llvm::GlobalVariable *HandleVar =
507 CGM.
getModule().getNamedGlobal(GlobalVarName);
509 HandleVar =
new llvm::GlobalVariable(
511 false, KernelFunction->getLinkage(),
512 llvm::ConstantInt::get(CGM.
Int8Ty, 0), GlobalVarName);
513 HandleVar->setDSOLocal(KernelFunction->isDSOLocal());
514 HandleVar->setVisibility(KernelFunction->getVisibility());
515 if (KernelFunction->hasComdat())
516 HandleVar->setComdat(CGM.
getModule().getOrInsertComdat(GlobalVarName));
529void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
530 FunctionArgList &Args) {
532 llvm::FunctionCallee cudaSetupArgFn = getSetupArgumentFn();
535 for (
const VarDecl *A : Args) {
537 Offset = Offset.
alignTo(TInfo.Align);
538 llvm::Value *Args[] = {
541 llvm::ConstantInt::get(SizeTy, TInfo.Width.getQuantity()),
542 llvm::ConstantInt::get(SizeTy, Offset.
getQuantity()),
545 llvm::Constant *
Zero = llvm::ConstantInt::get(IntTy, 0);
546 llvm::Value *CBZero = CGF.
Builder.CreateICmpEQ(CB,
Zero);
548 CGF.
Builder.CreateCondBr(CBZero, NextBlock, EndBlock);
550 Offset += TInfo.Width;
554 llvm::FunctionCallee cudaLaunchFn = getLaunchFn();
556 CGF.
Builder.CreatePointerCast(KernelHandles[CGF.
CurFn->getName()], PtrTy);
566 llvm::GlobalVariable *ManagedVar) {
568 for (
auto &&VarUse : Var->uses()) {
569 WorkList.push_back({VarUse.getUser()});
571 while (!WorkList.empty()) {
572 auto &&WorkItem = WorkList.pop_back_val();
573 auto *
U = WorkItem.back();
575 for (
auto &&UU :
U->uses()) {
576 WorkItem.push_back(UU.getUser());
577 WorkList.push_back(WorkItem);
582 if (
auto *I = dyn_cast<llvm::Instruction>(
U)) {
583 llvm::Value *OldV = Var;
584 llvm::Instruction *NewV =
new llvm::LoadInst(
585 Var->getType(), ManagedVar,
"ld.managed",
false,
586 llvm::Align(Var->getAlignment()), I->getIterator());
590 for (
auto &&Op : WorkItem) {
592 auto *NewInst = CE->getAsInstruction();
593 NewInst->insertBefore(*I->getParent(), I->getIterator());
594 NewInst->replaceUsesOfWith(OldV, NewV);
598 I->replaceUsesOfWith(OldV, NewV);
600 llvm_unreachable(
"Invalid use of managed variable");
619llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
621 if (EmittedKernels.empty() && DeviceVars.empty())
624 llvm::Function *RegisterKernelsFunc = llvm::Function::Create(
625 getRegisterGlobalsFnTy(), llvm::GlobalValue::InternalLinkage,
626 addUnderscoredPrefixToName(
"_register_globals"), &TheModule);
627 llvm::BasicBlock *EntryBB =
628 llvm::BasicBlock::Create(Context,
"entry", RegisterKernelsFunc);
629 CGBuilderTy Builder(CGM, Context);
630 Builder.SetInsertPoint(EntryBB);
634 llvm::Type *RegisterFuncParams[] = {
635 PtrTy, PtrTy, PtrTy, PtrTy, IntTy,
636 PtrTy, PtrTy, PtrTy, PtrTy, llvm::PointerType::getUnqual(Context)};
638 llvm::FunctionType::get(IntTy, RegisterFuncParams,
false),
639 addUnderscoredPrefixToName(
"RegisterFunction"));
644 llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin();
645 for (
auto &&I : EmittedKernels) {
646 llvm::Constant *KernelName =
648 llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(PtrTy);
649 llvm::Value *Args[] = {
651 KernelHandles[I.Kernel->getName()],
654 llvm::ConstantInt::getAllOnesValue(IntTy),
659 llvm::ConstantPointerNull::get(llvm::PointerType::getUnqual(Context))};
660 Builder.CreateCall(RegisterFunc, Args);
663 llvm::Type *VarSizeTy = IntTy;
671 llvm::Type *RegisterVarParams[] = {PtrTy, PtrTy, PtrTy, PtrTy,
672 IntTy, VarSizeTy, IntTy, IntTy};
674 llvm::FunctionType::get(VoidTy, RegisterVarParams,
false),
675 addUnderscoredPrefixToName(
"RegisterVar"));
678 llvm::Type *RegisterManagedVarParams[] = {PtrTy, PtrTy, PtrTy,
679 PtrTy, VarSizeTy, IntTy};
681 llvm::FunctionType::get(VoidTy, RegisterManagedVarParams,
false),
682 addUnderscoredPrefixToName(
"RegisterManagedVar"));
686 llvm::FunctionType::get(
687 VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy},
false),
688 addUnderscoredPrefixToName(
"RegisterSurface"));
692 llvm::FunctionType::get(
693 VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy, IntTy},
false),
694 addUnderscoredPrefixToName(
"RegisterTexture"));
695 for (
auto &&Info : DeviceVars) {
696 llvm::GlobalVariable *Var = Info.Var;
697 assert((!Var->isDeclaration() || Info.Flags.isManaged()) &&
698 "External variables should not show up here, except HIP managed "
700 llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D));
701 switch (Info.Flags.getKind()) {
702 case DeviceVarFlags::Variable: {
705 if (Info.Flags.isManaged()) {
706 assert(Var->getName().ends_with(
".managed") &&
707 "HIP managed variables not transformed");
708 auto *ManagedVar = CGM.
getModule().getNamedGlobal(
709 Var->getName().drop_back(StringRef(
".managed").size()));
710 llvm::Value *Args[] = {
715 llvm::ConstantInt::get(VarSizeTy, VarSize),
716 llvm::ConstantInt::get(IntTy, Var->getAlignment())};
717 if (!Var->isDeclaration())
718 Builder.CreateCall(RegisterManagedVar, Args);
720 llvm::Value *Args[] = {
725 llvm::ConstantInt::get(IntTy, Info.Flags.isExtern()),
726 llvm::ConstantInt::get(VarSizeTy, VarSize),
727 llvm::ConstantInt::get(IntTy, Info.Flags.isConstant()),
728 llvm::ConstantInt::get(IntTy, 0)};
729 Builder.CreateCall(RegisterVar, Args);
733 case DeviceVarFlags::Surface:
736 {&GpuBinaryHandlePtr, Var, VarName, VarName,
737 llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
738 llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});
740 case DeviceVarFlags::Texture:
743 {&GpuBinaryHandlePtr, Var, VarName, VarName,
744 llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
745 llvm::ConstantInt::get(IntTy, Info.Flags.isNormalized()),
746 llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});
757 if (OffloadProfShadow) {
758 llvm::Constant *Name =
759 makeConstantString(std::string(OffloadProfShadow->getName()));
760 llvm::Value *RegisterVarArgs[] = {
765 llvm::ConstantInt::get(IntTy, 0),
766 llvm::ConstantInt::get(VarSizeTy, CGM.
getDataLayout().getPointerSize()),
767 llvm::ConstantInt::get(IntTy, 0),
768 llvm::ConstantInt::get(IntTy, 0)};
769 Builder.CreateCall(RegisterVar, RegisterVarArgs);
772 llvm::FunctionType::get(VoidTy, {PtrTy},
false),
773 "__llvm_profile_offload_register_shadow_variable");
774 Builder.CreateCall(RegisterShadow, {OffloadProfShadow});
777 Builder.CreateRetVoid();
778 return RegisterKernelsFunc;
800llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
805 if (CudaGpuBinaryFileName.empty() && !IsHIP)
807 if ((IsHIP || (IsCUDA && !RelocatableDeviceCode)) && EmittedKernels.empty() &&
812 llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
815 if (RelocatableDeviceCode && !RegisterGlobalsFunc)
816 RegisterGlobalsFunc = makeDummyFunction(getRegisterGlobalsFnTy());
820 llvm::FunctionType::get(PtrTy, PtrTy,
false),
821 addUnderscoredPrefixToName(
"RegisterFatBinary"));
823 llvm::StructType *FatbinWrapperTy =
824 llvm::StructType::get(IntTy, IntTy, PtrTy, PtrTy);
830 std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary =
nullptr;
831 if (!CudaGpuBinaryFileName.empty()) {
833 auto CudaGpuBinaryOrErr =
834 VFS->getBufferForFile(CudaGpuBinaryFileName, -1,
false);
835 if (std::error_code EC = CudaGpuBinaryOrErr.getError()) {
837 << CudaGpuBinaryFileName << EC.message();
840 CudaGpuBinary = std::move(CudaGpuBinaryOrErr.get());
843 llvm::Function *ModuleCtorFunc = llvm::Function::Create(
844 llvm::FunctionType::get(VoidTy,
false),
845 llvm::GlobalValue::InternalLinkage,
846 addUnderscoredPrefixToName(
"_module_ctor"), &TheModule);
847 llvm::BasicBlock *CtorEntryBB =
848 llvm::BasicBlock::Create(Context,
"entry", ModuleCtorFunc);
849 CGBuilderTy CtorBuilder(CGM, Context);
851 CtorBuilder.SetInsertPoint(CtorEntryBB);
853 const char *FatbinConstantName;
854 const char *FatbinSectionName;
855 const char *ModuleIDSectionName;
856 StringRef ModuleIDPrefix;
857 llvm::Constant *FatBinStr;
862 CGM.
getTriple().isMacOSX() ?
"__HIP,__hip_fatbin" :
".hip_fatbin";
864 CGM.
getTriple().isMacOSX() ?
"__HIP,__fatbin" :
".hipFatBinSegment";
866 ModuleIDSectionName =
867 CGM.
getTriple().isMacOSX() ?
"__HIP,__module_id" :
"__hip_module_id";
868 ModuleIDPrefix =
"__hip_";
873 const unsigned HIPCodeObjectAlign = 4096;
874 FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()),
"",
875 FatbinConstantName, HIPCodeObjectAlign);
881 FatBinStr =
new llvm::GlobalVariable(
883 true, llvm::GlobalValue::ExternalLinkage,
nullptr,
887 nullptr, llvm::GlobalVariable::NotThreadLocal);
891 FatMagic = HIPFatMagic;
893 if (RelocatableDeviceCode)
894 FatbinConstantName = CGM.
getTriple().isMacOSX()
895 ?
"__NV_CUDA,__nv_relfatbin"
899 CGM.
getTriple().isMacOSX() ?
"__NV_CUDA,__nv_fatbin" :
".nv_fatbin";
902 CGM.
getTriple().isMacOSX() ?
"__NV_CUDA,__fatbin" :
".nvFatBinSegment";
904 ModuleIDSectionName = CGM.
getTriple().isMacOSX()
905 ?
"__NV_CUDA,__nv_module_id"
907 ModuleIDPrefix =
"__nv_";
911 FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()),
"",
912 FatbinConstantName, 8);
913 FatMagic = CudaFatMagic;
917 ConstantInitBuilder Builder(CGM);
918 auto Values = Builder.beginStruct(FatbinWrapperTy);
920 Values.addInt(IntTy, FatMagic);
922 Values.addInt(IntTy, 1);
924 Values.add(FatBinStr);
926 Values.add(llvm::ConstantPointerNull::get(PtrTy));
927 llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(
930 FatbinWrapper->setSection(FatbinSectionName);
941 auto Linkage = RelocatableDeviceCode ? llvm::GlobalValue::ExternalLinkage
942 : llvm::GlobalValue::InternalLinkage;
943 llvm::BasicBlock *IfBlock =
944 llvm::BasicBlock::Create(Context,
"if", ModuleCtorFunc);
945 llvm::BasicBlock *ExitBlock =
946 llvm::BasicBlock::Create(Context,
"exit", ModuleCtorFunc);
949 GpuBinaryHandle =
new llvm::GlobalVariable(
950 TheModule, PtrTy,
false,
Linkage,
952 !RelocatableDeviceCode ? llvm::ConstantPointerNull::get(PtrTy)
959 if (
Linkage != llvm::GlobalValue::InternalLinkage)
960 GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);
962 GpuBinaryHandle, PtrTy,
965 auto *HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
966 llvm::Constant *
Zero =
967 llvm::Constant::getNullValue(HandleValue->getType());
968 llvm::Value *EQZero = CtorBuilder.CreateICmpEQ(HandleValue,
Zero);
969 CtorBuilder.CreateCondBr(EQZero, IfBlock, ExitBlock);
972 CtorBuilder.SetInsertPoint(IfBlock);
974 llvm::CallInst *RegisterFatbinCall =
975 CtorBuilder.CreateCall(RegisterFatbinFunc, FatbinWrapper);
976 CtorBuilder.CreateStore(RegisterFatbinCall, GpuBinaryAddr);
977 CtorBuilder.CreateBr(ExitBlock);
980 CtorBuilder.SetInsertPoint(ExitBlock);
982 if (RegisterGlobalsFunc) {
983 auto *HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
984 CtorBuilder.CreateCall(RegisterGlobalsFunc, HandleValue);
987 }
else if (!RelocatableDeviceCode) {
991 llvm::CallInst *RegisterFatbinCall =
992 CtorBuilder.CreateCall(RegisterFatbinFunc, FatbinWrapper);
993 GpuBinaryHandle =
new llvm::GlobalVariable(
994 TheModule, PtrTy,
false, llvm::GlobalValue::InternalLinkage,
995 llvm::ConstantPointerNull::get(PtrTy),
"__cuda_gpubin_handle");
997 CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
1001 if (RegisterGlobalsFunc)
1002 CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
1006 CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) {
1009 llvm::FunctionType::get(VoidTy, PtrTy,
false),
1010 "__cudaRegisterFatBinaryEnd");
1011 CtorBuilder.CreateCall(RegisterFatbinEndFunc, RegisterFatbinCall);
1015 SmallString<64> ModuleID;
1016 llvm::raw_svector_ostream
OS(ModuleID);
1017 OS << ModuleIDPrefix << llvm::format(
"%" PRIx64, FatbinWrapper->getGUID());
1018 llvm::Constant *ModuleIDConstant = makeConstantArray(
1019 std::string(ModuleID),
"", ModuleIDSectionName, 32,
true);
1022 llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage,
1023 Twine(
"__fatbinwrap") + ModuleID, FatbinWrapper);
1027 SmallString<128> RegisterLinkedBinaryName(
"__cudaRegisterLinkedBinary");
1028 RegisterLinkedBinaryName += ModuleID;
1030 getRegisterLinkedBinaryFnTy(), RegisterLinkedBinaryName);
1032 assert(RegisterGlobalsFunc &&
"Expecting at least dummy function!");
1033 llvm::Value *Args[] = {RegisterGlobalsFunc, FatbinWrapper, ModuleIDConstant,
1034 makeDummyFunction(getCallbackFnTy())};
1035 CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args);
1041 if (llvm::Function *CleanupFn = makeModuleDtorFunction()) {
1043 llvm::FunctionType *AtExitTy =
1044 llvm::FunctionType::get(IntTy, CleanupFn->getType(),
false);
1045 llvm::FunctionCallee AtExitFunc =
1048 CtorBuilder.CreateCall(AtExitFunc, CleanupFn);
1051 CtorBuilder.CreateRetVoid();
1052 return ModuleCtorFunc;
1074llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
1076 if (!GpuBinaryHandle)
1081 llvm::FunctionType::get(VoidTy, PtrTy,
false),
1082 addUnderscoredPrefixToName(
"UnregisterFatBinary"));
1084 llvm::Function *ModuleDtorFunc = llvm::Function::Create(
1085 llvm::FunctionType::get(VoidTy,
false),
1086 llvm::GlobalValue::InternalLinkage,
1087 addUnderscoredPrefixToName(
"_module_dtor"), &TheModule);
1089 llvm::BasicBlock *DtorEntryBB =
1090 llvm::BasicBlock::Create(Context,
"entry", ModuleDtorFunc);
1091 CGBuilderTy DtorBuilder(CGM, Context);
1092 DtorBuilder.SetInsertPoint(DtorEntryBB);
1095 GpuBinaryHandle, GpuBinaryHandle->getValueType(),
1097 auto *HandleValue = DtorBuilder.CreateLoad(GpuBinaryAddr);
1102 llvm::BasicBlock *IfBlock =
1103 llvm::BasicBlock::Create(Context,
"if", ModuleDtorFunc);
1104 llvm::BasicBlock *ExitBlock =
1105 llvm::BasicBlock::Create(Context,
"exit", ModuleDtorFunc);
1106 llvm::Constant *
Zero = llvm::Constant::getNullValue(HandleValue->getType());
1107 llvm::Value *NEZero = DtorBuilder.CreateICmpNE(HandleValue,
Zero);
1108 DtorBuilder.CreateCondBr(NEZero, IfBlock, ExitBlock);
1110 DtorBuilder.SetInsertPoint(IfBlock);
1111 DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
1112 DtorBuilder.CreateStore(
Zero, GpuBinaryAddr);
1113 DtorBuilder.CreateBr(ExitBlock);
1115 DtorBuilder.SetInsertPoint(ExitBlock);
1117 DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
1119 DtorBuilder.CreateRetVoid();
1120 return ModuleDtorFunc;
1124 return new CGNVCUDARuntime(CGM);
1127void CGNVCUDARuntime::internalizeDeviceSideVar(
1144 if (D->
hasAttr<CUDADeviceAttr>() || D->
hasAttr<CUDAConstantAttr>() ||
1145 D->
hasAttr<CUDASharedAttr>() ||
1148 Linkage = llvm::GlobalValue::InternalLinkage;
1152void CGNVCUDARuntime::handleVarRegistration(
const VarDecl *D,
1153 llvm::GlobalVariable &GV) {
1154 if (D->
hasAttr<CUDADeviceAttr>() || D->
hasAttr<CUDAConstantAttr>()) {
1170 D->
hasAttr<HIPManagedAttr>()) {
1172 D->
hasAttr<CUDAConstantAttr>());
1180 const TemplateArgumentList &Args = TD->getTemplateArgs();
1181 if (TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) {
1182 assert(Args.
size() == 2 &&
1183 "Unexpected number of template arguments of CUDA device "
1184 "builtin surface type.");
1185 auto SurfType = Args[1].getAsIntegral();
1187 registerDeviceSurf(D, GV, !D->
hasDefinition(), SurfType.getSExtValue());
1189 assert(Args.
size() == 3 &&
1190 "Unexpected number of template arguments of CUDA device "
1191 "builtin texture type.");
1192 auto TexType = Args[1].getAsIntegral();
1193 auto Normalized = Args[2].getAsIntegral();
1195 registerDeviceTex(D, GV, !D->
hasDefinition(), TexType.getSExtValue(),
1196 Normalized.getZExtValue());
1205void CGNVCUDARuntime::transformManagedVars() {
1206 for (
auto &&Info : DeviceVars) {
1207 llvm::GlobalVariable *Var = Info.Var;
1208 if (Info.Flags.getKind() == DeviceVarFlags::Variable &&
1209 Info.Flags.isManaged()) {
1210 auto *ManagedVar =
new llvm::GlobalVariable(
1212 false, Var->getLinkage(),
1213 Var->isDeclaration()
1215 : llvm::ConstantPointerNull::get(Var->getType()),
1217 llvm::GlobalVariable::NotThreadLocal,
1219 ? LangAS::cuda_device
1220 : LangAS::Default));
1221 ManagedVar->setDSOLocal(Var->isDSOLocal());
1222 ManagedVar->setVisibility(Var->getVisibility());
1223 ManagedVar->setExternallyInitialized(
true);
1225 ManagedVar->takeName(Var);
1226 Var->setName(Twine(ManagedVar->getName()) +
".managed");
1229 if (CGM.
getLangOpts().CUDAIsDevice && !Var->isDeclaration()) {
1230 assert(!ManagedVar->isDeclaration());
1241void CGNVCUDARuntime::createOffloadingEntries() {
1243 ? llvm::object::OffloadKind::OFK_HIP
1244 : llvm::object::OffloadKind::OFK_Cuda;
1247 Kind = llvm::object::OffloadKind::OFK_OpenMP;
1250 for (KernelInfo &I : EmittedKernels)
1251 llvm::offloading::emitOffloadingEntry(
1252 M, Kind, KernelHandles[I.Kernel->getName()],
1254 llvm::offloading::OffloadGlobalEntry);
1256 for (VarInfo &I : DeviceVars) {
1258 CGM.
getDataLayout().getTypeAllocSize(I.Var->getValueType());
1261 ?
static_cast<int32_t>(llvm::offloading::OffloadGlobalExtern)
1263 (I.Flags.isConstant()
1264 ?
static_cast<int32_t>(llvm::offloading::OffloadGlobalConstant)
1266 (I.Flags.isNormalized()
1267 ?
static_cast<int32_t>(llvm::offloading::OffloadGlobalNormalized)
1269 if (I.Flags.getKind() == DeviceVarFlags::Variable) {
1270 if (I.Flags.isManaged()) {
1271 assert(I.Var->getName().ends_with(
".managed") &&
1272 "HIP managed variables not transformed");
1274 auto *ManagedVar = M.getNamedGlobal(
1275 I.Var->getName().drop_back(StringRef(
".managed").size()));
1276 llvm::offloading::emitOffloadingEntry(
1277 M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
1278 llvm::offloading::OffloadGlobalManagedEntry | Flags,
1279 I.Var->getAlignment(), ManagedVar);
1281 llvm::offloading::emitOffloadingEntry(
1282 M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
1283 llvm::offloading::OffloadGlobalEntry | Flags,
1286 }
else if (I.Flags.getKind() == DeviceVarFlags::Surface) {
1287 llvm::offloading::emitOffloadingEntry(
1288 M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
1289 llvm::offloading::OffloadGlobalSurfaceEntry | Flags,
1290 I.Flags.getSurfTexType());
1291 }
else if (I.Flags.getKind() == DeviceVarFlags::Texture) {
1292 llvm::offloading::emitOffloadingEntry(
1293 M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
1294 llvm::offloading::OffloadGlobalTextureEntry | Flags,
1295 I.Flags.getSurfTexType());
1303 if (OffloadProfShadow) {
1304 llvm::offloading::emitOffloadingEntry(
1305 M, Kind, OffloadProfShadow, OffloadProfShadow->getName(),
1307 llvm::offloading::OffloadGlobalEntry, 0);
1309 llvm::LLVMContext &Ctx = M.getContext();
1310 auto *PtrTy = llvm::PointerType::getUnqual(Ctx);
1312 llvm::FunctionType::get(VoidTy, {PtrTy},
false),
1313 "__llvm_profile_offload_register_shadow_variable");
1314 auto *CtorFn = llvm::Function::Create(
1315 llvm::FunctionType::get(VoidTy,
false),
1316 llvm::GlobalValue::InternalLinkage,
1318 auto *Entry = llvm::BasicBlock::Create(Ctx,
"entry", CtorFn);
1319 llvm::IRBuilder<> B(Entry);
1320 B.CreateCall(RegisterShadow, {OffloadProfShadow});
1322 llvm::appendToGlobalCtors(M, CtorFn, 65535);
1333void CGNVCUDARuntime::emitOffloadProfilingSections() {
1340 if (CUIDHash.empty())
1344 llvm::LLVMContext &Ctx = M.getContext();
1345 std::string Name = (
"__llvm_profile_sections_" + CUIDHash).str();
1349 if (M.getNamedValue(Name))
1356 unsigned GlobalAS = M.getDataLayout().getDefaultGlobalsAddressSpace();
1357 auto *PtrTy = llvm::PointerType::get(Ctx, GlobalAS);
1358 auto getOrDeclare = [&](StringRef SymName) {
1359 if (
auto *GV = M.getNamedGlobal(SymName))
1361 auto *GV =
new llvm::GlobalVariable(
1362 M, llvm::Type::getInt8Ty(Ctx),
false,
1363 llvm::GlobalValue::ExternalLinkage,
nullptr, SymName,
1364 nullptr, llvm::GlobalValue::NotThreadLocal,
1366 GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
1369 auto *VersionGV = M.getNamedGlobal(
"__llvm_profile_raw_version");
1371 VersionGV =
new llvm::GlobalVariable(
1372 M, llvm::Type::getInt64Ty(Ctx),
true,
1373 llvm::GlobalValue::ExternalLinkage,
nullptr,
1374 "__llvm_profile_raw_version",
1375 nullptr, llvm::GlobalValue::NotThreadLocal,
1379 auto *StructTy = llvm::StructType::get(
1380 Ctx, {PtrTy, PtrTy, PtrTy, PtrTy, PtrTy, PtrTy, PtrTy});
1381 llvm::Constant *Fields[] = {
1382 getOrDeclare(
"__start___llvm_prf_names"),
1383 getOrDeclare(
"__stop___llvm_prf_names"),
1384 getOrDeclare(
"__start___llvm_prf_cnts"),
1385 getOrDeclare(
"__stop___llvm_prf_cnts"),
1386 getOrDeclare(
"__start___llvm_prf_data"),
1387 getOrDeclare(
"__stop___llvm_prf_data"),
1390 auto *
Init = llvm::ConstantStruct::get(StructTy, Fields);
1391 auto *GV =
new llvm::GlobalVariable(
1392 M, StructTy,
true, llvm::GlobalValue::ExternalLinkage,
1393 Init, Name,
nullptr, llvm::GlobalValue::NotThreadLocal,
1395 GV->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1405 auto *PtrTy = llvm::PointerType::getUnqual(Ctx);
1406 OffloadProfShadow =
new llvm::GlobalVariable(
1407 M, PtrTy,
false, llvm::GlobalValue::ExternalLinkage,
1408 llvm::ConstantPointerNull::get(PtrTy), Name);
1413llvm::Function *CGNVCUDARuntime::finalizeModule() {
1414 transformManagedVars();
1415 emitOffloadProfilingSections();
1427 for (
auto &&Info : DeviceVars) {
1428 auto Kind = Info.Flags.getKind();
1429 if (!Info.Var->isDeclaration() &&
1430 !llvm::GlobalValue::isLocalLinkage(Info.Var->getLinkage()) &&
1431 (Kind == DeviceVarFlags::Variable ||
1432 Kind == DeviceVarFlags::Surface ||
1433 Kind == DeviceVarFlags::Texture) &&
1434 Info.D->isUsed() && !Info.D->hasAttr<UsedAttr>()) {
1441 (CGM.
getLangOpts().OffloadingNewDriver && RelocatableDeviceCode))
1442 createOffloadingEntries();
1444 return makeModuleCtorFunction();
1449llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F,
1451 auto Loc = KernelHandles.find(F->getName());
1452 if (Loc != KernelHandles.end()) {
1453 auto OldHandle = Loc->second;
1454 if (KernelStubs[OldHandle] == F)
1462 KernelStubs[OldHandle] = F;
1467 KernelStubs.erase(OldHandle);
1471 KernelHandles[F->getName()] = F;
1476 auto *Var =
new llvm::GlobalVariable(
1477 TheModule, F->getType(),
true, F->getLinkage(),
1482 Var->setDSOLocal(F->isDSOLocal());
1483 Var->setVisibility(F->getVisibility());
1485 auto *FT = FD->getPrimaryTemplate();
1486 if (!FT || FT->isThisDeclarationADefinition())
1488 KernelHandles[F->getName()] = Var;
1489 KernelStubs[Var] = F;
static std::unique_ptr< MangleContext > InitDeviceMC(CodeGenModule &CGM)
static void replaceManagedVar(llvm::GlobalVariable *Var, llvm::GlobalVariable *ManagedVar)
Result
Implement __builtin_bit_cast and related operations.
MangleContext * createMangleContext(const TargetInfo *T=nullptr)
If T is null pointer, assume the target in ASTContext.
bool shouldExternalize(const Decl *D) const
Whether a C++ static variable or CUDA/HIP kernel should be externalized.
StringRef getCUIDHash() const
llvm::SetVector< const VarDecl * > CUDADeviceVarODRUsedByHost
Keep track of CUDA/HIP device-side variables ODR-used by host code.
const TargetInfo * getAuxTargetInfo() const
MangleContext * createDeviceMangleContext(const TargetInfo &T)
Creates a device mangle context to correctly mangle lambdas in a mixed architecture compile by settin...
TypeInfoChars getTypeInfoInChars(const Type *T) const
const TargetInfo & getTargetInfo() const
unsigned getTargetAddressSpace(LangAS AS) const
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
static CharUnits One()
One - Construct a CharUnits quantity of one.
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
static CharUnits Zero()
Zero - Construct a CharUnits quantity of zero.
bool hasProfileInstr() const
Check if any form of instrumentation is on.
std::string CudaGpuBinaryFileName
Name of file passed with -fcuda-include-gpubinary option to forward to CUDA runtime back-end for inco...
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
llvm::PointerType * getType() const
Return the type of the pointer value.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Address CreateStructGEP(Address Addr, unsigned Index, const llvm::Twine &Name="")
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
MangleContext & getMangleContext()
Gets the mangle context.
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
void add(RValue rvalue, QualType type)
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")
Emits a call or invoke instruction to the given runtime function.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
llvm::AllocaInst * CreateTempAlloca(llvm::Type *Ty, const Twine &Name="tmp", llvm::Value *ArraySize=nullptr)
CreateTempAlloca - This creates an alloca and inserts it into the entry block if ArraySize is nullptr...
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **CallOrInvoke, bool IsMustTail, SourceLocation Loc, bool IsVirtualFunctionPointerThunk=false)
EmitCall - Generate a call of the given function, expecting the given result type,...
RawAddress CreateMemTempWithoutCast(QualType T, const Twine &Name="tmp")
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen without...
RawAddress CreateTempAllocaWithoutCast(llvm::Type *Ty, CharUnits align, const Twine &Name="tmp", llvm::Value *ArraySize=nullptr)
CreateTempAlloca - This creates a alloca and inserts it into the entry block.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
llvm::Type * ConvertTypeForMem(QualType T)
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
This class organizes the cross-function state that is used while generating LLVM code.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
const IntrusiveRefCntPtr< llvm::vfs::FileSystem > & getFileSystem() const
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CodeGenTypes & getTypes()
const TargetInfo & getTarget() const
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
CGCXXABI & getCXXABI() const
SanitizerMetadata * getSanitizerMetadata()
const llvm::Triple & getTriple() const
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
void maybeSetTrivialComdat(const Decl &D, llvm::GlobalObject &GO)
void printPostfixForExternalizedDecl(llvm::raw_ostream &OS, const Decl *D) const
Print the postfix for externalized static variable or kernels for single source offloading languages ...
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
const CGFunctionInfo & arrangeFunctionDeclaration(const GlobalDecl GD)
Free functions are functions that are compatible with an ordinary C function pointer type.
static RValue get(llvm::Value *V)
static RValue getAggregate(Address addr, bool isVolatile=false)
Convert an Address to an RValue.
lookup_result lookup(DeclarationName Name) const
lookup - Find the declarations (if any) with the given Name in this context.
SourceLocation getLocation() const
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
const ParmVarDecl * getParamDecl(unsigned i) const
GlobalDecl getWithKernelReferenceKind(KernelReferenceKind Kind)
const Decl * getDecl() const
StringRef getName() const
Return the actual identifier string.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
std::string CUID
The user provided compilation unit ID, if non-empty.
GPUDefaultStreamKind GPUDefaultStream
The default stream kind used for HIP kernel launching.
bool shouldMangleDeclName(const NamedDecl *D)
void mangleName(GlobalDecl GD, raw_ostream &)
IdentifierInfo * getIdentifier() const
Get the identifier that names this declaration, if there is one.
QualType getCanonicalType() const
bool isMicrosoft() const
Is this ABI an MSVC-compatible ABI?
bool isItaniumFamily() const
Does this ABI generally fall into the Itanium family of ABIs?
TargetCXXABI getCXXABI() const
Get the C++ ABI currently in use.
const llvm::VersionTuple & getSDKVersion() const
unsigned size() const
Retrieve the number of template arguments in this template argument list.
static DeclContext * castToDeclContext(const TranslationUnitDecl *D)
CXXRecordDecl * castAsCXXRecordDecl() const
bool isCUDADeviceBuiltinSurfaceType() const
Check if the type is the CUDA device builtin surface type.
bool isCUDADeviceBuiltinTextureType() const
Check if the type is the CUDA device builtin texture type.
Represents a variable declaration or definition.
bool isInline() const
Whether this variable is (C++1z) inline.
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
CGCUDARuntime * CreateNVCUDARuntime(CodeGenModule &CGM)
Creates an instance of a CUDA runtime class.
@ VFS
Remove unused -ivfsoverlay arguments.
@ OS
Indicates that the tracking object is a descendant of a referenced-counted OSObject,...
@ Address
A pointer to a ValueDecl.
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
CudaVersion ToCudaVersion(llvm::VersionTuple)
bool CudaFeatureEnabled(llvm::VersionTuple, CudaFeature)
Linkage
Describes the different kinds of linkage (C++ [basic.link], C99 6.2.2) that an entity may have.
@ Type
The name was classified as a type.
U cast(CodeGen::Address addr)
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::IntegerType * SizeTy
llvm::IntegerType * IntTy
int
CharUnits getSizeAlign() const
CharUnits getPointerAlign() const
llvm::PointerType * DefaultPtrTy