23#include "llvm/ADT/StringRef.h"
24#include "llvm/Frontend/Offloading/Utility.h"
25#include "llvm/IR/BasicBlock.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/DerivedTypes.h"
28#include "llvm/IR/ReplaceConstant.h"
29#include "llvm/ProfileData/InstrProf.h"
30#include "llvm/Support/Format.h"
31#include "llvm/Support/VirtualFileSystem.h"
32#include "llvm/Transforms/Utils/ModuleUtils.h"
38constexpr unsigned CudaFatMagic = 0x466243b1;
39constexpr unsigned HIPFatMagic = 0x48495046;
47 llvm::IntegerType *IntTy, *SizeTy;
49 llvm::PointerType *PtrTy;
52 llvm::LLVMContext &Context;
54 llvm::Module &TheModule;
57 llvm::Function *Kernel;
60 llvm::SmallVector<KernelInfo, 16> EmittedKernels;
64 llvm::DenseMap<StringRef, llvm::GlobalValue *> KernelHandles;
66 llvm::DenseMap<llvm::GlobalValue *, llvm::Function *> KernelStubs;
68 llvm::GlobalVariable *Var;
72 llvm::SmallVector<VarInfo, 16> DeviceVars;
76 llvm::GlobalVariable *GpuBinaryHandle =
nullptr;
81 llvm::GlobalVariable *OffloadProfShadow =
nullptr;
82 struct OffloadProfSectionShadowInfo {
83 llvm::GlobalVariable *Shadow;
84 std::string DeviceName;
86 llvm::SmallVector<OffloadProfSectionShadowInfo, 16> OffloadProfSectionShadows;
88 bool RelocatableDeviceCode;
90 std::unique_ptr<MangleContext> DeviceMC;
92 llvm::FunctionCallee getSetupArgumentFn()
const;
93 llvm::FunctionCallee getLaunchFn()
const;
95 llvm::FunctionType *getRegisterGlobalsFnTy()
const;
96 llvm::FunctionType *getCallbackFnTy()
const;
97 llvm::FunctionType *getRegisterLinkedBinaryFnTy()
const;
98 std::string addPrefixToName(StringRef FuncName)
const;
99 std::string addUnderscoredPrefixToName(StringRef FuncName)
const;
102 llvm::Function *makeRegisterGlobalsFn();
107 llvm::Constant *makeConstantString(
const std::string &Str,
108 const std::string &Name =
"") {
109 return CGM.GetAddrOfConstantCString(Str, Name).getPointer();
115 llvm::Constant *makeConstantArray(StringRef Str,
117 StringRef SectionName =
"",
118 unsigned Alignment = 0,
119 bool AddNull =
false) {
120 llvm::Constant *
Value =
121 llvm::ConstantDataArray::getString(Context, Str, AddNull);
122 auto *GV =
new llvm::GlobalVariable(
123 TheModule,
Value->getType(),
true,
124 llvm::GlobalValue::PrivateLinkage,
Value, Name);
125 if (!SectionName.empty()) {
126 GV->setSection(SectionName);
129 GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
132 GV->setAlignment(llvm::Align(Alignment));
137 llvm::Function *makeDummyFunction(llvm::FunctionType *FnTy) {
138 assert(FnTy->getReturnType()->isVoidTy() &&
139 "Can only generate dummy functions returning void!");
140 llvm::Function *DummyFunc = llvm::Function::Create(
141 FnTy, llvm::GlobalValue::InternalLinkage,
"dummy", &TheModule);
143 llvm::BasicBlock *DummyBlock =
144 llvm::BasicBlock::Create(Context,
"", DummyFunc);
145 CGBuilderTy FuncBuilder(CGM, Context);
146 FuncBuilder.SetInsertPoint(DummyBlock);
147 FuncBuilder.CreateRetVoid();
152 Address prepareKernelArgs(CodeGenFunction &CGF, FunctionArgList &Args);
153 Address prepareKernelArgsLLVMOffload(CodeGenFunction &CGF,
154 FunctionArgList &Args);
155 void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args);
156 void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args);
157 std::string getDeviceSideName(
const NamedDecl *ND)
override;
159 void registerDeviceVar(
const VarDecl *VD, llvm::GlobalVariable &Var,
161 DeviceVars.push_back({&Var,
163 {DeviceVarFlags::Variable, Extern,
Constant,
164 VD->hasAttr<HIPManagedAttr>(),
167 void registerDeviceSurf(
const VarDecl *VD, llvm::GlobalVariable &Var,
168 bool Extern,
int Type) {
169 DeviceVars.push_back({&Var,
171 {DeviceVarFlags::Surface, Extern,
false,
175 void registerDeviceTex(
const VarDecl *VD, llvm::GlobalVariable &Var,
176 bool Extern,
int Type,
bool Normalized) {
177 DeviceVars.push_back({&Var,
179 {DeviceVarFlags::Texture, Extern,
false,
180 false, Normalized,
Type}});
184 llvm::Function *makeModuleCtorFunction();
186 llvm::Function *makeModuleDtorFunction();
188 void transformManagedVars();
190 void createOffloadingEntries();
197 void emitOffloadProfilingSections();
200 CGNVCUDARuntime(CodeGenModule &CGM);
202 llvm::GlobalValue *getKernelHandle(llvm::Function *F, GlobalDecl GD)
override;
203 llvm::Function *getKernelStub(llvm::GlobalValue *Handle)
override {
204 auto Loc = KernelStubs.find(Handle);
205 assert(Loc != KernelStubs.end());
208 void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args)
override;
209 void handleVarRegistration(
const VarDecl *VD,
210 llvm::GlobalVariable &Var)
override;
212 internalizeDeviceSideVar(
const VarDecl *D,
213 llvm::GlobalValue::LinkageTypes &
Linkage)
override;
215 llvm::Function *finalizeModule()
override;
220std::string CGNVCUDARuntime::addPrefixToName(StringRef FuncName)
const {
221 return (Prefix + FuncName).str();
224CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName)
const {
225 return (
"__" + Prefix + FuncName).str();
235 return std::unique_ptr<MangleContext>(
244CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
245 : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
246 TheModule(CGM.getModule()),
247 RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode),
262llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn()
const {
264 llvm::Type *Params[] = {PtrTy, SizeTy, SizeTy};
266 llvm::FunctionType::get(IntTy, Params,
false),
267 addPrefixToName(
"SetupArgument"));
270llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn()
const {
274 llvm::FunctionType::get(IntTy, PtrTy,
false),
"hipLaunchByPtr");
281llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy()
const {
282 return llvm::FunctionType::get(VoidTy, PtrTy,
false);
285llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy()
const {
286 return llvm::FunctionType::get(VoidTy, PtrTy,
false);
289llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy()
const {
290 llvm::Type *Params[] = {llvm::PointerType::getUnqual(Context), PtrTy, PtrTy,
291 llvm::PointerType::getUnqual(Context)};
292 return llvm::FunctionType::get(VoidTy, Params,
false);
295std::string CGNVCUDARuntime::getDeviceSideName(
const NamedDecl *ND) {
298 if (
auto *FD = dyn_cast<FunctionDecl>(ND))
299 GD = GlobalDecl(FD, KernelReferenceKind::Kernel);
302 std::string DeviceSideName;
309 SmallString<256> Buffer;
310 llvm::raw_svector_ostream
Out(Buffer);
312 DeviceSideName = std::string(
Out.str());
319 SmallString<256> Buffer;
320 llvm::raw_svector_ostream
Out(Buffer);
321 Out << DeviceSideName;
323 DeviceSideName = std::string(
Out.str());
325 return DeviceSideName;
328void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
329 FunctionArgList &Args) {
332 dyn_cast<llvm::GlobalVariable>(KernelHandles[CGF.
CurFn->getName()])) {
333 GV->setLinkage(CGF.
CurFn->getLinkage());
334 GV->setInitializer(CGF.
CurFn);
337 CudaFeature::CUDA_USES_NEW_LAUNCH) ||
340 emitDeviceStubBodyNew(CGF, Args);
342 emitDeviceStubBodyLegacy(CGF, Args);
352Address CGNVCUDARuntime::prepareKernelArgsLLVMOffload(CodeGenFunction &CGF,
353 FunctionArgList &Args) {
354 SmallVector<llvm::Type *> ArgTypes, KernelLaunchParamsTypes;
355 for (
auto &Arg : Args)
357 llvm::StructType *KernelArgsTy = llvm::StructType::create(ArgTypes);
359 auto *Int64Ty = CGF.
Builder.getInt64Ty();
360 KernelLaunchParamsTypes.push_back(Int64Ty);
361 KernelLaunchParamsTypes.push_back(PtrTy);
362 KernelLaunchParamsTypes.push_back(PtrTy);
364 llvm::StructType *KernelLaunchParamsTy =
365 llvm::StructType::create(KernelLaunchParamsTypes);
370 "kernel_launch_params");
372 auto KernelArgsSize = CGM.
getDataLayout().getTypeAllocSize(KernelArgsTy);
380 for (
unsigned i = 0; i < Args.size(); ++i) {
385 return KernelLaunchParams;
388Address CGNVCUDARuntime::prepareKernelArgs(CodeGenFunction &CGF,
389 FunctionArgList &Args) {
395 llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size())));
397 for (
unsigned i = 0; i < Args.size(); ++i) {
399 llvm::Value *VoidVarPtr = CGF.
Builder.CreatePointerCast(VarPtr, PtrTy);
401 VoidVarPtr, CGF.
Builder.CreateConstGEP1_32(
409void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
410 FunctionArgList &Args) {
413 ? prepareKernelArgsLLVMOffload(CGF, Args)
414 : prepareKernelArgs(CGF, Args);
428 TranslationUnitDecl *TUDecl = CGM.
getContext().getTranslationUnitDecl();
430 std::string KernelLaunchAPI =
"LaunchKernel";
432 LangOptions::GPUDefaultStreamKind::PerThread) {
434 KernelLaunchAPI = KernelLaunchAPI +
"_spt";
436 KernelLaunchAPI = KernelLaunchAPI +
"_ptsz";
438 auto LaunchKernelName = addPrefixToName(KernelLaunchAPI);
439 const IdentifierInfo &cudaLaunchKernelII =
441 FunctionDecl *cudaLaunchKernelFD =
nullptr;
443 if (FunctionDecl *FD = dyn_cast<FunctionDecl>(
Result))
444 cudaLaunchKernelFD = FD;
447 if (cudaLaunchKernelFD ==
nullptr) {
449 "Can't find declaration for " + LaunchKernelName);
453 ParmVarDecl *GridDimParam = cudaLaunchKernelFD->
getParamDecl(1);
454 QualType Dim3Ty = GridDimParam->
getType();
464 llvm::FunctionType::get(IntTy,
470 addUnderscoredPrefixToName(
"PopCallConfiguration"));
479 CGF.
Builder.CreatePointerCast(KernelHandles[CGF.
CurFn->getName()], PtrTy);
480 CallArgList LaunchKernelArgs;
492 QualType QT = cudaLaunchKernelFD->
getType();
497 const CGFunctionInfo &FI =
499 llvm::FunctionCallee cudaLaunchKernelFn =
509 llvm::Function *KernelFunction = llvm::cast<llvm::Function>(
Kernel);
510 std::string GlobalVarName = (KernelFunction->getName() +
".id").str();
512 llvm::GlobalVariable *HandleVar =
513 CGM.
getModule().getNamedGlobal(GlobalVarName);
515 HandleVar =
new llvm::GlobalVariable(
517 false, KernelFunction->getLinkage(),
518 llvm::ConstantInt::get(CGM.
Int8Ty, 0), GlobalVarName);
519 HandleVar->setDSOLocal(KernelFunction->isDSOLocal());
520 HandleVar->setVisibility(KernelFunction->getVisibility());
521 if (KernelFunction->hasComdat())
522 HandleVar->setComdat(CGM.
getModule().getOrInsertComdat(GlobalVarName));
535void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
536 FunctionArgList &Args) {
538 llvm::FunctionCallee cudaSetupArgFn = getSetupArgumentFn();
541 for (
const VarDecl *A : Args) {
543 Offset = Offset.
alignTo(TInfo.Align);
544 llvm::Value *Args[] = {
547 llvm::ConstantInt::get(SizeTy, TInfo.Width.getQuantity()),
548 llvm::ConstantInt::get(SizeTy, Offset.
getQuantity()),
551 llvm::Constant *
Zero = llvm::ConstantInt::get(IntTy, 0);
552 llvm::Value *CBZero = CGF.
Builder.CreateICmpEQ(CB,
Zero);
554 CGF.
Builder.CreateCondBr(CBZero, NextBlock, EndBlock);
556 Offset += TInfo.Width;
560 llvm::FunctionCallee cudaLaunchFn = getLaunchFn();
562 CGF.
Builder.CreatePointerCast(KernelHandles[CGF.
CurFn->getName()], PtrTy);
572 llvm::GlobalVariable *ManagedVar) {
574 for (
auto &&VarUse : Var->uses()) {
575 WorkList.push_back({VarUse.getUser()});
577 while (!WorkList.empty()) {
578 auto &&WorkItem = WorkList.pop_back_val();
579 auto *
U = WorkItem.back();
581 for (
auto &&UU :
U->uses()) {
582 WorkItem.push_back(UU.getUser());
583 WorkList.push_back(WorkItem);
588 if (
auto *I = dyn_cast<llvm::Instruction>(
U)) {
589 llvm::Value *OldV = Var;
590 llvm::Instruction *NewV =
new llvm::LoadInst(
591 Var->getType(), ManagedVar,
"ld.managed",
false,
592 llvm::Align(Var->getAlignment()), I->getIterator());
596 for (
auto &&Op : WorkItem) {
598 auto *NewInst = CE->getAsInstruction();
599 NewInst->insertBefore(*I->getParent(), I->getIterator());
600 NewInst->replaceUsesOfWith(OldV, NewV);
604 I->replaceUsesOfWith(OldV, NewV);
606 llvm_unreachable(
"Invalid use of managed variable");
625llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
627 if (EmittedKernels.empty() && DeviceVars.empty())
630 llvm::Function *RegisterKernelsFunc = llvm::Function::Create(
631 getRegisterGlobalsFnTy(), llvm::GlobalValue::InternalLinkage,
632 addUnderscoredPrefixToName(
"_register_globals"), &TheModule);
633 llvm::BasicBlock *EntryBB =
634 llvm::BasicBlock::Create(Context,
"entry", RegisterKernelsFunc);
635 CGBuilderTy Builder(CGM, Context);
636 Builder.SetInsertPoint(EntryBB);
640 llvm::Type *RegisterFuncParams[] = {
641 PtrTy, PtrTy, PtrTy, PtrTy, IntTy,
642 PtrTy, PtrTy, PtrTy, PtrTy, llvm::PointerType::getUnqual(Context)};
644 llvm::FunctionType::get(IntTy, RegisterFuncParams,
false),
645 addUnderscoredPrefixToName(
"RegisterFunction"));
650 llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin();
651 for (
auto &&I : EmittedKernels) {
652 llvm::Constant *KernelName =
654 llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(PtrTy);
655 llvm::Value *Args[] = {
657 KernelHandles[I.Kernel->getName()],
660 llvm::ConstantInt::getAllOnesValue(IntTy),
665 llvm::ConstantPointerNull::get(llvm::PointerType::getUnqual(Context))};
666 Builder.CreateCall(RegisterFunc, Args);
669 llvm::Type *VarSizeTy = IntTy;
677 llvm::Type *RegisterVarParams[] = {PtrTy, PtrTy, PtrTy, PtrTy,
678 IntTy, VarSizeTy, IntTy, IntTy};
680 llvm::FunctionType::get(VoidTy, RegisterVarParams,
false),
681 addUnderscoredPrefixToName(
"RegisterVar"));
684 llvm::Type *RegisterManagedVarParams[] = {PtrTy, PtrTy, PtrTy,
685 PtrTy, VarSizeTy, IntTy};
687 llvm::FunctionType::get(VoidTy, RegisterManagedVarParams,
false),
688 addUnderscoredPrefixToName(
"RegisterManagedVar"));
692 llvm::FunctionType::get(
693 VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy},
false),
694 addUnderscoredPrefixToName(
"RegisterSurface"));
698 llvm::FunctionType::get(
699 VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy, IntTy},
false),
700 addUnderscoredPrefixToName(
"RegisterTexture"));
701 for (
auto &&Info : DeviceVars) {
702 llvm::GlobalVariable *Var = Info.Var;
703 assert((!Var->isDeclaration() || Info.Flags.isManaged()) &&
704 "External variables should not show up here, except HIP managed "
706 llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D));
707 switch (Info.Flags.getKind()) {
708 case DeviceVarFlags::Variable: {
711 if (Info.Flags.isManaged()) {
712 assert(Var->getName().ends_with(
".managed") &&
713 "HIP managed variables not transformed");
714 auto *ManagedVar = CGM.
getModule().getNamedGlobal(
715 Var->getName().drop_back(StringRef(
".managed").size()));
716 llvm::Value *Args[] = {
721 llvm::ConstantInt::get(VarSizeTy, VarSize),
722 llvm::ConstantInt::get(IntTy, Var->getAlignment())};
723 if (!Var->isDeclaration())
724 Builder.CreateCall(RegisterManagedVar, Args);
726 llvm::Value *Args[] = {
731 llvm::ConstantInt::get(IntTy, Info.Flags.isExtern()),
732 llvm::ConstantInt::get(VarSizeTy, VarSize),
733 llvm::ConstantInt::get(IntTy, Info.Flags.isConstant()),
734 llvm::ConstantInt::get(IntTy, 0)};
735 Builder.CreateCall(RegisterVar, Args);
739 case DeviceVarFlags::Surface:
742 {&GpuBinaryHandlePtr, Var, VarName, VarName,
743 llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
744 llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});
746 case DeviceVarFlags::Texture:
749 {&GpuBinaryHandlePtr, Var, VarName, VarName,
750 llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
751 llvm::ConstantInt::get(IntTy, Info.Flags.isNormalized()),
752 llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});
763 if (OffloadProfShadow) {
764 llvm::Constant *Name =
765 makeConstantString(std::string(OffloadProfShadow->getName()));
766 llvm::Constant *IntZero = llvm::ConstantInt::get(IntTy, 0);
767 llvm::Value *RegisterVarArgs[] = {
773 llvm::ConstantInt::get(VarSizeTy,
777 Builder.CreateCall(RegisterVar, RegisterVarArgs);
780 llvm::FunctionType::get(VoidTy, {PtrTy},
false),
781 "__llvm_profile_offload_register_shadow_variable");
782 Builder.CreateCall(RegisterShadow, {OffloadProfShadow});
785 if (!OffloadProfSectionShadows.empty()) {
787 llvm::FunctionType::get(VoidTy, {PtrTy},
false),
788 "__llvm_profile_offload_register_section_shadow_variable");
789 llvm::Constant *IntZero = llvm::ConstantInt::get(IntTy, 0);
790 for (
const auto &Info : OffloadProfSectionShadows) {
791 llvm::Constant *Name = makeConstantString(Info.DeviceName);
792 llvm::Value *RegisterVarArgs[] = {
798 llvm::ConstantInt::get(VarSizeTy,
802 Builder.CreateCall(RegisterVar, RegisterVarArgs);
803 Builder.CreateCall(RegisterSectionShadow, {Info.Shadow});
807 Builder.CreateRetVoid();
808 return RegisterKernelsFunc;
830llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
835 if (CudaGpuBinaryFileName.empty() && !IsHIP)
837 if ((IsHIP || (IsCUDA && !RelocatableDeviceCode)) && EmittedKernels.empty() &&
842 llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
845 if (RelocatableDeviceCode && !RegisterGlobalsFunc)
846 RegisterGlobalsFunc = makeDummyFunction(getRegisterGlobalsFnTy());
850 llvm::FunctionType::get(PtrTy, PtrTy,
false),
851 addUnderscoredPrefixToName(
"RegisterFatBinary"));
853 llvm::StructType *FatbinWrapperTy =
854 llvm::StructType::get(IntTy, IntTy, PtrTy, PtrTy);
860 std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary =
nullptr;
861 if (!CudaGpuBinaryFileName.empty()) {
863 auto CudaGpuBinaryOrErr =
864 VFS->getBufferForFile(CudaGpuBinaryFileName, -1,
false);
865 if (std::error_code EC = CudaGpuBinaryOrErr.getError()) {
867 << CudaGpuBinaryFileName << EC.message();
870 CudaGpuBinary = std::move(CudaGpuBinaryOrErr.get());
873 llvm::Function *ModuleCtorFunc = llvm::Function::Create(
874 llvm::FunctionType::get(VoidTy,
false),
875 llvm::GlobalValue::InternalLinkage,
876 addUnderscoredPrefixToName(
"_module_ctor"), &TheModule);
877 llvm::BasicBlock *CtorEntryBB =
878 llvm::BasicBlock::Create(Context,
"entry", ModuleCtorFunc);
879 CGBuilderTy CtorBuilder(CGM, Context);
881 CtorBuilder.SetInsertPoint(CtorEntryBB);
883 const char *FatbinConstantName;
884 const char *FatbinSectionName;
885 const char *ModuleIDSectionName;
886 StringRef ModuleIDPrefix;
887 llvm::Constant *FatBinStr;
892 CGM.
getTriple().isMacOSX() ?
"__HIP,__hip_fatbin" :
".hip_fatbin";
894 CGM.
getTriple().isMacOSX() ?
"__HIP,__fatbin" :
".hipFatBinSegment";
896 ModuleIDSectionName =
897 CGM.
getTriple().isMacOSX() ?
"__HIP,__module_id" :
"__hip_module_id";
898 ModuleIDPrefix =
"__hip_";
903 const unsigned HIPCodeObjectAlign = 4096;
904 FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()),
"",
905 FatbinConstantName, HIPCodeObjectAlign);
911 FatBinStr =
new llvm::GlobalVariable(
913 true, llvm::GlobalValue::ExternalLinkage,
nullptr,
917 nullptr, llvm::GlobalVariable::NotThreadLocal);
921 FatMagic = HIPFatMagic;
923 if (RelocatableDeviceCode)
924 FatbinConstantName = CGM.
getTriple().isMacOSX()
925 ?
"__NV_CUDA,__nv_relfatbin"
929 CGM.
getTriple().isMacOSX() ?
"__NV_CUDA,__nv_fatbin" :
".nv_fatbin";
932 CGM.
getTriple().isMacOSX() ?
"__NV_CUDA,__fatbin" :
".nvFatBinSegment";
934 ModuleIDSectionName = CGM.
getTriple().isMacOSX()
935 ?
"__NV_CUDA,__nv_module_id"
937 ModuleIDPrefix =
"__nv_";
941 FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()),
"",
942 FatbinConstantName, 8);
943 FatMagic = CudaFatMagic;
947 ConstantInitBuilder Builder(CGM);
948 auto Values = Builder.beginStruct(FatbinWrapperTy);
950 Values.addInt(IntTy, FatMagic);
952 Values.addInt(IntTy, 1);
954 Values.add(FatBinStr);
956 Values.add(llvm::ConstantPointerNull::get(PtrTy));
957 llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(
960 FatbinWrapper->setSection(FatbinSectionName);
971 auto Linkage = RelocatableDeviceCode ? llvm::GlobalValue::ExternalLinkage
972 : llvm::GlobalValue::InternalLinkage;
973 llvm::BasicBlock *IfBlock =
974 llvm::BasicBlock::Create(Context,
"if", ModuleCtorFunc);
975 llvm::BasicBlock *ExitBlock =
976 llvm::BasicBlock::Create(Context,
"exit", ModuleCtorFunc);
979 GpuBinaryHandle =
new llvm::GlobalVariable(
980 TheModule, PtrTy,
false,
Linkage,
982 !RelocatableDeviceCode ? llvm::ConstantPointerNull::get(PtrTy)
989 if (
Linkage != llvm::GlobalValue::InternalLinkage)
990 GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);
992 GpuBinaryHandle, PtrTy,
995 auto *HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
996 llvm::Constant *
Zero =
997 llvm::Constant::getNullValue(HandleValue->getType());
998 llvm::Value *EQZero = CtorBuilder.CreateICmpEQ(HandleValue,
Zero);
999 CtorBuilder.CreateCondBr(EQZero, IfBlock, ExitBlock);
1002 CtorBuilder.SetInsertPoint(IfBlock);
1004 llvm::CallInst *RegisterFatbinCall =
1005 CtorBuilder.CreateCall(RegisterFatbinFunc, FatbinWrapper);
1006 CtorBuilder.CreateStore(RegisterFatbinCall, GpuBinaryAddr);
1007 CtorBuilder.CreateBr(ExitBlock);
1010 CtorBuilder.SetInsertPoint(ExitBlock);
1012 if (RegisterGlobalsFunc) {
1013 auto *HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
1014 CtorBuilder.CreateCall(RegisterGlobalsFunc, HandleValue);
1017 }
else if (!RelocatableDeviceCode) {
1021 llvm::CallInst *RegisterFatbinCall =
1022 CtorBuilder.CreateCall(RegisterFatbinFunc, FatbinWrapper);
1023 GpuBinaryHandle =
new llvm::GlobalVariable(
1024 TheModule, PtrTy,
false, llvm::GlobalValue::InternalLinkage,
1025 llvm::ConstantPointerNull::get(PtrTy),
"__cuda_gpubin_handle");
1027 CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
1031 if (RegisterGlobalsFunc)
1032 CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
1036 CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) {
1039 llvm::FunctionType::get(VoidTy, PtrTy,
false),
1040 "__cudaRegisterFatBinaryEnd");
1041 CtorBuilder.CreateCall(RegisterFatbinEndFunc, RegisterFatbinCall);
1045 SmallString<64> ModuleID;
1046 llvm::raw_svector_ostream
OS(ModuleID);
1047 OS << ModuleIDPrefix << llvm::format(
"%" PRIx64, FatbinWrapper->getGUID());
1048 llvm::Constant *ModuleIDConstant = makeConstantArray(
1049 std::string(ModuleID),
"", ModuleIDSectionName, 32,
true);
1052 llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage,
1053 Twine(
"__fatbinwrap") + ModuleID, FatbinWrapper);
1057 SmallString<128> RegisterLinkedBinaryName(
"__cudaRegisterLinkedBinary");
1058 RegisterLinkedBinaryName += ModuleID;
1060 getRegisterLinkedBinaryFnTy(), RegisterLinkedBinaryName);
1062 assert(RegisterGlobalsFunc &&
"Expecting at least dummy function!");
1063 llvm::Value *Args[] = {RegisterGlobalsFunc, FatbinWrapper, ModuleIDConstant,
1064 makeDummyFunction(getCallbackFnTy())};
1065 CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args);
1071 if (llvm::Function *CleanupFn = makeModuleDtorFunction()) {
1073 llvm::FunctionType *AtExitTy =
1074 llvm::FunctionType::get(IntTy, CleanupFn->getType(),
false);
1075 llvm::FunctionCallee AtExitFunc =
1078 CtorBuilder.CreateCall(AtExitFunc, CleanupFn);
1081 CtorBuilder.CreateRetVoid();
1082 return ModuleCtorFunc;
1104llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
1106 if (!GpuBinaryHandle)
1111 llvm::FunctionType::get(VoidTy, PtrTy,
false),
1112 addUnderscoredPrefixToName(
"UnregisterFatBinary"));
1114 llvm::Function *ModuleDtorFunc = llvm::Function::Create(
1115 llvm::FunctionType::get(VoidTy,
false),
1116 llvm::GlobalValue::InternalLinkage,
1117 addUnderscoredPrefixToName(
"_module_dtor"), &TheModule);
1119 llvm::BasicBlock *DtorEntryBB =
1120 llvm::BasicBlock::Create(Context,
"entry", ModuleDtorFunc);
1121 CGBuilderTy DtorBuilder(CGM, Context);
1122 DtorBuilder.SetInsertPoint(DtorEntryBB);
1125 GpuBinaryHandle, GpuBinaryHandle->getValueType(),
1127 auto *HandleValue = DtorBuilder.CreateLoad(GpuBinaryAddr);
1132 llvm::BasicBlock *IfBlock =
1133 llvm::BasicBlock::Create(Context,
"if", ModuleDtorFunc);
1134 llvm::BasicBlock *ExitBlock =
1135 llvm::BasicBlock::Create(Context,
"exit", ModuleDtorFunc);
1136 llvm::Constant *
Zero = llvm::Constant::getNullValue(HandleValue->getType());
1137 llvm::Value *NEZero = DtorBuilder.CreateICmpNE(HandleValue,
Zero);
1138 DtorBuilder.CreateCondBr(NEZero, IfBlock, ExitBlock);
1140 DtorBuilder.SetInsertPoint(IfBlock);
1141 DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
1142 DtorBuilder.CreateStore(
Zero, GpuBinaryAddr);
1143 DtorBuilder.CreateBr(ExitBlock);
1145 DtorBuilder.SetInsertPoint(ExitBlock);
1147 DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
1149 DtorBuilder.CreateRetVoid();
1150 return ModuleDtorFunc;
1154 return new CGNVCUDARuntime(CGM);
1157void CGNVCUDARuntime::internalizeDeviceSideVar(
1174 if (D->
hasAttr<CUDADeviceAttr>() || D->
hasAttr<CUDAConstantAttr>() ||
1175 D->
hasAttr<CUDASharedAttr>() ||
1178 Linkage = llvm::GlobalValue::InternalLinkage;
1182void CGNVCUDARuntime::handleVarRegistration(
const VarDecl *D,
1183 llvm::GlobalVariable &GV) {
1184 if (D->
hasAttr<CUDADeviceAttr>() || D->
hasAttr<CUDAConstantAttr>()) {
1200 D->
hasAttr<HIPManagedAttr>()) {
1202 D->
hasAttr<CUDAConstantAttr>());
1210 const TemplateArgumentList &Args = TD->getTemplateArgs();
1211 if (TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) {
1212 assert(Args.
size() == 2 &&
1213 "Unexpected number of template arguments of CUDA device "
1214 "builtin surface type.");
1215 auto SurfType = Args[1].getAsIntegral();
1217 registerDeviceSurf(D, GV, !D->
hasDefinition(), SurfType.getSExtValue());
1219 assert(Args.
size() == 3 &&
1220 "Unexpected number of template arguments of CUDA device "
1221 "builtin texture type.");
1222 auto TexType = Args[1].getAsIntegral();
1223 auto Normalized = Args[2].getAsIntegral();
1225 registerDeviceTex(D, GV, !D->
hasDefinition(), TexType.getSExtValue(),
1226 Normalized.getZExtValue());
1235void CGNVCUDARuntime::transformManagedVars() {
1236 for (
auto &&Info : DeviceVars) {
1237 llvm::GlobalVariable *Var = Info.Var;
1238 if (Info.Flags.getKind() == DeviceVarFlags::Variable &&
1239 Info.Flags.isManaged()) {
1240 auto *ManagedVar =
new llvm::GlobalVariable(
1242 false, Var->getLinkage(),
1243 Var->isDeclaration()
1245 : llvm::ConstantPointerNull::get(Var->getType()),
1247 llvm::GlobalVariable::NotThreadLocal,
1249 ? LangAS::cuda_device
1250 : LangAS::Default));
1251 ManagedVar->setDSOLocal(Var->isDSOLocal());
1252 ManagedVar->setVisibility(Var->getVisibility());
1253 ManagedVar->setExternallyInitialized(
true);
1255 ManagedVar->takeName(Var);
1256 Var->setName(Twine(ManagedVar->getName()) +
".managed");
1259 if (CGM.
getLangOpts().CUDAIsDevice && !Var->isDeclaration()) {
1260 assert(!ManagedVar->isDeclaration());
1271void CGNVCUDARuntime::createOffloadingEntries() {
1273 ? llvm::object::OffloadKind::OFK_HIP
1274 : llvm::object::OffloadKind::OFK_Cuda;
1277 Kind = llvm::object::OffloadKind::OFK_OpenMP;
1280 for (KernelInfo &I : EmittedKernels)
1281 llvm::offloading::emitOffloadingEntry(
1282 M, Kind, KernelHandles[I.Kernel->getName()],
1284 llvm::offloading::OffloadGlobalEntry);
1286 for (VarInfo &I : DeviceVars) {
1288 CGM.
getDataLayout().getTypeAllocSize(I.Var->getValueType());
1291 ?
static_cast<int32_t>(llvm::offloading::OffloadGlobalExtern)
1293 (I.Flags.isConstant()
1294 ?
static_cast<int32_t>(llvm::offloading::OffloadGlobalConstant)
1296 (I.Flags.isNormalized()
1297 ?
static_cast<int32_t>(llvm::offloading::OffloadGlobalNormalized)
1299 if (I.Flags.getKind() == DeviceVarFlags::Variable) {
1300 if (I.Flags.isManaged()) {
1301 assert(I.Var->getName().ends_with(
".managed") &&
1302 "HIP managed variables not transformed");
1304 auto *ManagedVar = M.getNamedGlobal(
1305 I.Var->getName().drop_back(StringRef(
".managed").size()));
1306 llvm::offloading::emitOffloadingEntry(
1307 M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
1308 llvm::offloading::OffloadGlobalManagedEntry | Flags,
1309 I.Var->getAlignment(), ManagedVar);
1311 llvm::offloading::emitOffloadingEntry(
1312 M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
1313 llvm::offloading::OffloadGlobalEntry | Flags,
1316 }
else if (I.Flags.getKind() == DeviceVarFlags::Surface) {
1317 llvm::offloading::emitOffloadingEntry(
1318 M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
1319 llvm::offloading::OffloadGlobalSurfaceEntry | Flags,
1320 I.Flags.getSurfTexType());
1321 }
else if (I.Flags.getKind() == DeviceVarFlags::Texture) {
1322 llvm::offloading::emitOffloadingEntry(
1323 M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
1324 llvm::offloading::OffloadGlobalTextureEntry | Flags,
1325 I.Flags.getSurfTexType());
1333 if (OffloadProfShadow) {
1334 llvm::offloading::emitOffloadingEntry(
1335 M, Kind, OffloadProfShadow, OffloadProfShadow->getName(),
1337 llvm::offloading::OffloadGlobalEntry, 0);
1339 llvm::LLVMContext &Ctx = M.getContext();
1340 auto *PtrTy = llvm::PointerType::getUnqual(Ctx);
1342 llvm::FunctionType::get(VoidTy, {PtrTy},
false),
1343 "__llvm_profile_offload_register_shadow_variable");
1345 llvm::FunctionType::get(VoidTy, {PtrTy},
false),
1346 "__llvm_profile_offload_register_section_shadow_variable");
1347 auto *CtorFn = llvm::Function::Create(
1348 llvm::FunctionType::get(VoidTy,
false),
1349 llvm::GlobalValue::InternalLinkage,
1351 auto *Entry = llvm::BasicBlock::Create(Ctx,
"entry", CtorFn);
1352 llvm::IRBuilder<> B(Entry);
1353 B.CreateCall(RegisterShadow, {OffloadProfShadow});
1354 for (
const auto &Info : OffloadProfSectionShadows) {
1355 llvm::offloading::emitOffloadingEntry(
1356 M, Kind, Info.Shadow, Info.DeviceName,
1358 llvm::offloading::OffloadGlobalEntry, 0);
1359 B.CreateCall(RegisterSectionShadow, {Info.Shadow});
1362 llvm::appendToGlobalCtors(M, CtorFn, 65535);
1373void CGNVCUDARuntime::emitOffloadProfilingSections() {
1380 if (CUIDHash.empty())
1384 llvm::LLVMContext &Ctx = M.getContext();
1385 std::string Name = (
"__llvm_profile_sections_" + CUIDHash).str();
1389 if (M.getNamedValue(Name))
1396 unsigned GlobalAS = M.getDataLayout().getDefaultGlobalsAddressSpace();
1397 std::string NamesVarPostfixVarName =
1398 std::string(llvm::getInstrProfNamesVarPostfixVarName());
1399 if (!M.getNamedValue(NamesVarPostfixVarName)) {
1400 auto *NamesVarPostfix = llvm::ConstantDataArray::getString(
1401 Ctx, (llvm::Twine(
"_") + CUIDHash).str(),
true);
1402 auto *NamesGV =
new llvm::GlobalVariable(
1403 M, NamesVarPostfix->getType(),
true,
1404 llvm::GlobalValue::PrivateLinkage, NamesVarPostfix,
1405 NamesVarPostfixVarName,
1406 nullptr, llvm::GlobalValue::NotThreadLocal,
1418 auto *PtrTy = llvm::PointerType::getUnqual(Ctx);
1419 OffloadProfShadow =
new llvm::GlobalVariable(
1420 M, PtrTy,
false, llvm::GlobalValue::ExternalLinkage,
1421 llvm::ConstantPointerNull::get(PtrTy), Name);
1424 auto AddSectionShadow = [&](StringRef
Kind,
const Twine &DeviceName) {
1425 std::string ShadowName =
1426 (Twine(
"__llvm_profile_shadow_") +
Kind +
"_" + CUIDHash +
"_" +
1427 Twine(OffloadProfSectionShadows.size()))
1429 auto *Shadow =
new llvm::GlobalVariable(
1430 M, PtrTy,
false, llvm::GlobalValue::ExternalLinkage,
1431 llvm::ConstantPointerNull::get(PtrTy), ShadowName);
1433 OffloadProfSectionShadows.push_back({Shadow, DeviceName.str()});
1437 for (
auto &&I : EmittedKernels) {
1439 AddSectionShadow(
"data", Twine(
"__profd_") + KernelName);
1440 AddSectionShadow(
"cnts", Twine(
"__profc_") + KernelName);
1441 AddSectionShadow(
"names",
1442 Twine(llvm::getInstrProfNamesVarName()) +
"_" + CUIDHash);
1447llvm::Function *CGNVCUDARuntime::finalizeModule() {
1448 transformManagedVars();
1449 emitOffloadProfilingSections();
1461 for (
auto &&Info : DeviceVars) {
1462 auto Kind = Info.Flags.getKind();
1463 if (!Info.Var->isDeclaration() &&
1464 !llvm::GlobalValue::isLocalLinkage(Info.Var->getLinkage()) &&
1465 (Kind == DeviceVarFlags::Variable ||
1466 Kind == DeviceVarFlags::Surface ||
1467 Kind == DeviceVarFlags::Texture) &&
1468 Info.D->isUsed() && !Info.D->hasAttr<UsedAttr>()) {
1475 (CGM.
getLangOpts().OffloadingNewDriver && RelocatableDeviceCode))
1476 createOffloadingEntries();
1478 return makeModuleCtorFunction();
1483llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F,
1485 auto Loc = KernelHandles.find(F->getName());
1486 if (Loc != KernelHandles.end()) {
1487 auto OldHandle = Loc->second;
1488 if (KernelStubs[OldHandle] == F)
1496 KernelStubs[OldHandle] = F;
1501 KernelStubs.erase(OldHandle);
1505 KernelHandles[F->getName()] = F;
1510 auto *Var =
new llvm::GlobalVariable(
1511 TheModule, F->getType(),
true, F->getLinkage(),
1516 Var->setDSOLocal(F->isDSOLocal());
1517 Var->setVisibility(F->getVisibility());
1519 auto *FT = FD->getPrimaryTemplate();
1520 if (!FT || FT->isThisDeclarationADefinition())
1522 KernelHandles[F->getName()] = Var;
1523 KernelStubs[Var] = F;
static std::unique_ptr< MangleContext > InitDeviceMC(CodeGenModule &CGM)
static void replaceManagedVar(llvm::GlobalVariable *Var, llvm::GlobalVariable *ManagedVar)
Result
Implement __builtin_bit_cast and related operations.
MangleContext * createMangleContext(const TargetInfo *T=nullptr)
If T is null pointer, assume the target in ASTContext.
bool shouldExternalize(const Decl *D) const
Whether a C++ static variable or CUDA/HIP kernel should be externalized.
StringRef getCUIDHash() const
llvm::SetVector< const VarDecl * > CUDADeviceVarODRUsedByHost
Keep track of CUDA/HIP device-side variables ODR-used by host code.
const TargetInfo * getAuxTargetInfo() const
MangleContext * createDeviceMangleContext(const TargetInfo &T)
Creates a device mangle context to correctly mangle lambdas in a mixed architecture compile by settin...
TypeInfoChars getTypeInfoInChars(const Type *T) const
const TargetInfo & getTargetInfo() const
unsigned getTargetAddressSpace(LangAS AS) const
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
static CharUnits One()
One - Construct a CharUnits quantity of one.
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
static CharUnits Zero()
Zero - Construct a CharUnits quantity of zero.
bool hasProfileInstr() const
Check if any form of instrumentation is on.
std::string CudaGpuBinaryFileName
Name of file passed with -fcuda-include-gpubinary option to forward to CUDA runtime back-end for inco...
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
llvm::PointerType * getType() const
Return the type of the pointer value.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Address CreateStructGEP(Address Addr, unsigned Index, const llvm::Twine &Name="")
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
MangleContext & getMangleContext()
Gets the mangle context.
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
void add(RValue rvalue, QualType type)
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")
Emits a call or invoke instruction to the given runtime function.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
llvm::AllocaInst * CreateTempAlloca(llvm::Type *Ty, const Twine &Name="tmp", llvm::Value *ArraySize=nullptr)
CreateTempAlloca - This creates an alloca and inserts it into the entry block if ArraySize is nullptr...
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **CallOrInvoke, bool IsMustTail, SourceLocation Loc, bool IsVirtualFunctionPointerThunk=false)
EmitCall - Generate a call of the given function, expecting the given result type,...
RawAddress CreateMemTempWithoutCast(QualType T, const Twine &Name="tmp")
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen without...
RawAddress CreateTempAllocaWithoutCast(llvm::Type *Ty, CharUnits align, const Twine &Name="tmp", llvm::Value *ArraySize=nullptr)
CreateTempAlloca - This creates a alloca and inserts it into the entry block.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
llvm::Type * ConvertTypeForMem(QualType T)
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
This class organizes the cross-function state that is used while generating LLVM code.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
const IntrusiveRefCntPtr< llvm::vfs::FileSystem > & getFileSystem() const
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CodeGenTypes & getTypes()
const TargetInfo & getTarget() const
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
CGCXXABI & getCXXABI() const
SanitizerMetadata * getSanitizerMetadata()
const llvm::Triple & getTriple() const
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
void maybeSetTrivialComdat(const Decl &D, llvm::GlobalObject &GO)
void printPostfixForExternalizedDecl(llvm::raw_ostream &OS, const Decl *D) const
Print the postfix for externalized static variable or kernels for single source offloading languages ...
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
const CGFunctionInfo & arrangeFunctionDeclaration(const GlobalDecl GD)
Free functions are functions that are compatible with an ordinary C function pointer type.
static RValue get(llvm::Value *V)
static RValue getAggregate(Address addr, bool isVolatile=false)
Convert an Address to an RValue.
lookup_result lookup(DeclarationName Name) const
lookup - Find the declarations (if any) with the given Name in this context.
SourceLocation getLocation() const
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
const ParmVarDecl * getParamDecl(unsigned i) const
GlobalDecl getWithKernelReferenceKind(KernelReferenceKind Kind)
const Decl * getDecl() const
StringRef getName() const
Return the actual identifier string.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
std::string CUID
The user provided compilation unit ID, if non-empty.
GPUDefaultStreamKind GPUDefaultStream
The default stream kind used for HIP kernel launching.
bool shouldMangleDeclName(const NamedDecl *D)
void mangleName(GlobalDecl GD, raw_ostream &)
IdentifierInfo * getIdentifier() const
Get the identifier that names this declaration, if there is one.
QualType getCanonicalType() const
bool isMicrosoft() const
Is this ABI an MSVC-compatible ABI?
bool isItaniumFamily() const
Does this ABI generally fall into the Itanium family of ABIs?
TargetCXXABI getCXXABI() const
Get the C++ ABI currently in use.
const llvm::VersionTuple & getSDKVersion() const
unsigned size() const
Retrieve the number of template arguments in this template argument list.
static DeclContext * castToDeclContext(const TranslationUnitDecl *D)
CXXRecordDecl * castAsCXXRecordDecl() const
bool isCUDADeviceBuiltinSurfaceType() const
Check if the type is the CUDA device builtin surface type.
bool isCUDADeviceBuiltinTextureType() const
Check if the type is the CUDA device builtin texture type.
Represents a variable declaration or definition.
bool isInline() const
Whether this variable is (C++1z) inline.
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
CGCUDARuntime * CreateNVCUDARuntime(CodeGenModule &CGM)
Creates an instance of a CUDA runtime class.
@ VFS
Remove unused -ivfsoverlay arguments.
@ OS
Indicates that the tracking object is a descendant of a referenced-counted OSObject,...
@ Address
A pointer to a ValueDecl.
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
CudaVersion ToCudaVersion(llvm::VersionTuple)
bool CudaFeatureEnabled(llvm::VersionTuple, CudaFeature)
Linkage
Describes the different kinds of linkage (C++ [basic.link], C99 6.2.2) that an entity may have.
@ Type
The name was classified as a type.
U cast(CodeGen::Address addr)
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 int32_t
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::IntegerType * SizeTy
llvm::IntegerType * IntTy
int
CharUnits getSizeAlign() const
CharUnits getPointerAlign() const
llvm::PointerType * DefaultPtrTy