10#include "mlir/IR/Attributes.h"
11#include "mlir/IR/BuiltinAttributeInterfaces.h"
12#include "mlir/IR/IRMapping.h"
13#include "mlir/IR/Location.h"
14#include "mlir/IR/Value.h"
32#include "llvm/ADT/StringRef.h"
33#include "llvm/ADT/TypeSwitch.h"
34#include "llvm/IR/Instructions.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Support/MemoryBuffer.h"
37#include "llvm/Support/Path.h"
38#include "llvm/Support/VirtualFileSystem.h"
47#define GEN_PASS_DEF_LOWERINGPREPARE
48#include "clang/CIR/Dialect/Passes.h.inc"
52 SmallString<128> fileName;
54 if (mlirModule.getSymName())
55 fileName = llvm::sys::path::filename(mlirModule.getSymName()->str());
60 for (
size_t i = 0; i < fileName.size(); ++i) {
71struct LoweringPreparePass
72 :
public impl::LoweringPrepareBase<LoweringPreparePass> {
73 LoweringPreparePass() =
default;
83 LoweringPreparePass(
const LoweringPreparePass &other)
84 : impl::LoweringPrepareBase<LoweringPreparePass>(other) {}
86 void runOnOperation()
override;
88 void runOnOp(mlir::Operation *op);
89 void lowerCastOp(cir::CastOp op);
90 void lowerComplexConjOp(cir::ComplexConjOp op);
91 void lowerComplexDivOp(cir::ComplexDivOp op);
92 void lowerComplexMulOp(cir::ComplexMulOp op);
93 void lowerGetGlobalOp(cir::GetGlobalOp op);
94 void lowerGlobalOp(cir::GlobalOp op);
95 void lowerThreeWayCmpOp(cir::CmpThreeWayOp op);
96 void lowerArrayDtor(cir::ArrayDtor op);
97 void lowerArrayCtor(cir::ArrayCtor op);
98 void lowerTrivialCopyCall(cir::CallOp op);
99 void lowerStoreOfConstAggregate(cir::StoreOp op);
100 void lowerLocalInitOp(cir::LocalInitOp op);
105 cir::FuncOp getCalledFunction(cir::CallOp callOp);
114 cir::GlobalOp getOrCreateConstAggregateGlobal(CIRBaseBuilderTy &builder,
116 llvm::StringRef baseName,
118 mlir::TypedAttr constant);
121 cir::FuncOp buildCXXGlobalVarDeclInitFunc(cir::GlobalOp op);
124 void defineGlobalThreadLocalWrapper(cir::GlobalOp op, cir::FuncOp initAlias,
125 bool isVarDefinition);
127 cir::FuncOp defineGlobalThreadLocalInitAlias(cir::GlobalOp op,
128 cir::FuncOp aliasee);
130 cir::FuncOp getOrCreateThreadLocalWrapper(CIRBaseBuilderTy &builder,
136 cir::IfOp buildGlobalTlsGuardCheck(CIRBaseBuilderTy &builder,
137 mlir::Location loc, cir::GlobalOp guard);
139 cir::FuncOp getOrCreateDtorFunc(CIRBaseBuilderTy &builder, cir::GlobalOp op,
140 mlir::Region &dtorRegion,
141 cir::CallOp &dtorCall);
144 void buildCXXGlobalInitFunc();
147 void buildCXXGlobalTlsFunc();
150 void buildGlobalCtorDtorList();
152 cir::FuncOp buildRuntimeFunction(
153 mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
155 cir::GlobalLinkageKind linkage = cir::GlobalLinkageKind::ExternalLinkage);
157 cir::GlobalOp getOrCreateRuntimeVariable(
158 mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
160 cir::GlobalLinkageKind linkage = cir::GlobalLinkageKind::ExternalLinkage,
161 cir::VisibilityKind visibility = cir::VisibilityKind::Default);
167 llvm::StringMap<FuncOp> cudaKernelMap;
168 llvm::SmallVector<std::pair<cir::GlobalOp, cir::CUDAVarRegistrationInfoAttr>>
173 void buildCUDAModuleCtor();
174 std::optional<FuncOp> buildCUDAModuleDtor();
175 std::optional<FuncOp> buildHIPModuleDtor();
176 std::optional<FuncOp> buildCUDARegisterGlobals();
177 void buildCUDARegisterVars(cir::CIRBaseBuilderTy &builder,
178 FuncOp regGlobalFunc);
179 void buildCUDARegisterGlobalFunctions(cir::CIRBaseBuilderTy &builder,
180 FuncOp regGlobalFunc);
183 void handleStaticLocal(cir::GlobalOp globalOp, cir::LocalInitOp localInitOp);
192 cir::FuncOp getTlsInitFn();
195 cir::GlobalOp createGlobalThreadLocalGuard(CIRBaseBuilderTy &builder,
199 cir::GlobalOp createGuardGlobalOp(CIRBaseBuilderTy &builder,
200 mlir::Location loc, llvm::StringRef name,
201 cir::IntType guardTy,
202 cir::GlobalLinkageKind linkage);
205 cir::GlobalOp getStaticLocalDeclGuardAddress(llvm::StringRef globalSymName) {
206 auto it = staticLocalDeclGuardMap.find(globalSymName);
207 if (it != staticLocalDeclGuardMap.end())
213 void setStaticLocalDeclGuardAddress(llvm::StringRef globalSymName,
214 cir::GlobalOp guard) {
215 staticLocalDeclGuardMap[globalSymName] = guard;
219 cir::GlobalOp getOrCreateStaticLocalDeclGuardAddress(
220 CIRBaseBuilderTy &builder, cir::GlobalOp globalOp, StringRef guardName,
221 bool isLocalVarDecl,
bool useInt8GuardVariable) {
223 cir::CIRDataLayout dataLayout(mlirModule);
224 cir::IntType guardTy;
225 clang::CharUnits guardAlignment;
228 if (useInt8GuardVariable) {
229 guardTy = cir::IntType::get(&getContext(), 8,
true);
231 }
else if (useARMGuardVarABI()) {
233 const unsigned sizeTypeSize =
234 astCtx->getTypeSize(astCtx->getSignedSizeType());
236 cir::IntType::get(&getContext(), sizeTypeSize,
true);
240 guardTy = cir::IntType::get(&getContext(), 64,
true);
244 assert(guardTy && guardAlignment.
getQuantity() != 0);
246 llvm::StringRef globalSymName = globalOp.getSymName();
247 cir::GlobalOp guard = getStaticLocalDeclGuardAddress(globalSymName);
250 guard = createGuardGlobalOp(builder, globalOp->getLoc(), guardName,
251 guardTy, globalOp.getLinkage());
252 guard.setInitialValueAttr(cir::IntAttr::get(guardTy, 0));
253 guard.setDSOLocal(globalOp.getDsoLocal());
254 guard.setAlignment(guardAlignment.
getAsAlign().value());
255 guard.setTlsModel(globalOp.getTlsModel());
261 bool hasComdat = globalOp.getComdat();
262 const llvm::Triple &triple = astCtx->getTargetInfo().getTriple();
265 if (!isLocalVarDecl && hasComdat &&
266 (triple.isOSBinFormatELF() || triple.isOSBinFormatWasm())) {
268 guard.setComdat(
true);
269 }
else if (hasComdat && globalOp.isWeakForLinker()) {
270 guard.setComdat(
true);
273 setStaticLocalDeclGuardAddress(globalSymName, guard);
282 clang::ASTContext *astCtx;
285 mlir::ModuleOp mlirModule;
305 mlir::SymbolTableCollection symbolTables;
308 llvm::StringMap<uint32_t> dynamicInitializerNames;
309 llvm::SmallVector<cir::FuncOp> dynamicInitializers;
310 llvm::SmallVector<cir::FuncOp> globalThreadLocalInitializers;
311 llvm::StringMap<cir::FuncOp> threadLocalWrappers;
312 llvm::StringMap<cir::FuncOp> threadLocalInitAliases;
315 llvm::StringMap<cir::GlobalOp> staticLocalDeclGuardMap;
317 llvm::StringMap<llvm::SmallVector<cir::GlobalOp, 1>> constAggregateGlobals;
320 llvm::SmallVector<std::pair<std::string, uint32_t>, 4> globalCtorList;
322 llvm::SmallVector<std::pair<std::string, uint32_t>, 4> globalDtorList;
326 bool useARMGuardVarABI()
const {
327 switch (astCtx->getCXXABIKind()) {
328 case clang::TargetCXXABI::GenericARM:
329 case clang::TargetCXXABI::iOS:
330 case clang::TargetCXXABI::WatchOS:
331 case clang::TargetCXXABI::GenericAArch64:
332 case clang::TargetCXXABI::WebAssembly:
339 void emitGlobalGuardedDtorRegion(CIRBaseBuilderTy &builder,
340 cir::GlobalOp global,
341 mlir::Region &dtorRegion,
bool tls,
342 mlir::Block &entryBB) {
344 builder.setInsertionPointToStart(&mlirModule.getBodyRegion().front());
345 cir::GlobalOp handle = getOrCreateRuntimeVariable(
346 builder,
"__dso_handle", global.getLoc(), builder.getI8Type(),
347 cir::GlobalLinkageKind::ExternalLinkage, cir::VisibilityKind::Hidden);
353 cir::CallOp dtorCall;
354 cir::FuncOp dtorFunc =
355 getOrCreateDtorFunc(builder, global, dtorRegion, dtorCall);
360 cir::PointerType voidFnPtrTy = builder.
getVoidFnPtrTy({voidPtrTy});
361 cir::PointerType handlePtrTy = builder.
getPointerTo(handle.getSymType());
363 builder.
getVoidFnTy({voidFnPtrTy, voidPtrTy, handlePtrTy});
365 llvm::StringLiteral nameAtExit =
"__cxa_atexit";
367 nameAtExit = astCtx->getTargetInfo().getTriple().isOSDarwin()
368 ? llvm::StringLiteral(
"_tlv_atexit")
369 : llvm::StringLiteral(
"__cxa_thread_atexit");
371 cir::FuncOp fnAtExit = buildRuntimeFunction(builder, nameAtExit,
372 global.getLoc(), fnAtExitType);
376 builder.setInsertionPointAfter(dtorCall);
378 auto dtorPtrTy = cir::PointerType::get(dtorFunc.getFunctionType());
379 args[0] = cir::GetGlobalOp::create(builder, dtorCall.getLoc(), dtorPtrTy,
380 dtorFunc.getSymName());
381 args[0] = cir::CastOp::create(builder, dtorCall.getLoc(), voidFnPtrTy,
382 cir::CastKind::bitcast, args[0]);
384 cir::CastOp::create(builder, dtorCall.getLoc(), voidPtrTy,
385 cir::CastKind::bitcast, dtorCall.getArgOperand(0));
386 args[2] = cir::GetGlobalOp::create(builder, handle.getLoc(), handlePtrTy,
387 handle.getSymName());
388 builder.
createCallOp(dtorCall.getLoc(), fnAtExit, args);
390 mlir::Block &dtorBlock = dtorRegion.front();
391 entryBB.getOperations().splice(entryBB.end(), dtorBlock.getOperations(),
393 std::prev(dtorBlock.end()));
396 builder.setInsertionPointToEnd(&entryBB);
402 void emitCXXGuardedInitIf(CIRBaseBuilderTy &builder, cir::GlobalOp globalOp,
403 mlir::Region &ctorRegion, mlir::Region &dtorRegion,
404 cir::ASTVarDeclInterface varDecl,
405 mlir::Value guardPtr, cir::PointerType guardPtrTy,
407 auto loc = globalOp->getLoc();
427 mlir::Block *insertBlock = builder.getInsertionBlock();
428 if (!ctorRegion.empty()) {
429 assert(ctorRegion.hasOneBlock() &&
"Enforced by MaxSizedRegion<1>");
431 mlir::Block &block = ctorRegion.front();
432 insertBlock->getOperations().splice(
433 insertBlock->end(), block.getOperations(), block.begin(),
434 std::prev(block.end()));
437 if (!dtorRegion.empty()) {
438 assert(dtorRegion.hasOneBlock() &&
"Enforced by MaxSizedRegion<1>");
440 emitGlobalGuardedDtorRegion(builder, globalOp, dtorRegion, !threadsafe,
443 builder.setInsertionPointToEnd(insertBlock);
444 ctorRegion.getBlocks().clear();
452 mlir::Value acquireResult = acquireCall.getResult();
455 loc, mlir::cast<cir::IntType>(acquireResult.getType()), 0);
456 auto shouldInit = builder.
createCompare(loc, cir::CmpOpKind::ne,
457 acquireResult, acquireZero);
462 cir::IfOp::create(builder, loc, shouldInit,
false,
463 [](mlir::OpBuilder &, mlir::Location) {});
464 mlir::OpBuilder::InsertionGuard insertGuard(builder);
465 builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
479 mlir::ValueRange{guardPtr});
482 }
else if (!
varDecl.isLocalVarDecl()) {
488 globalOp->emitError(
"NYI: non-threadsafe init for non-local variables");
503 void setASTContext(clang::ASTContext *c) { astCtx = c; }
508cir::GlobalOp LoweringPreparePass::getOrCreateRuntimeVariable(
509 mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
510 mlir::Type type, cir::GlobalLinkageKind linkage,
511 cir::VisibilityKind visibility) {
512 cir::GlobalOp g = dyn_cast_or_null<cir::GlobalOp>(
513 mlir::SymbolTable::lookupNearestSymbolFrom(
514 mlirModule, mlir::StringAttr::get(mlirModule->getContext(), name)));
516 g = cir::GlobalOp::create(builder, loc, name, type);
518 cir::GlobalLinkageKindAttr::get(builder.getContext(), linkage));
519 mlir::SymbolTable::setSymbolVisibility(
520 g, mlir::SymbolTable::Visibility::Private);
521 g.setGlobalVisibility(visibility);
526cir::FuncOp LoweringPreparePass::buildRuntimeFunction(
527 mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
528 cir::FuncType type, cir::GlobalLinkageKind linkage) {
529 cir::FuncOp f = dyn_cast_or_null<FuncOp>(SymbolTable::lookupNearestSymbolFrom(
530 mlirModule, StringAttr::get(mlirModule->getContext(), name)));
532 f = cir::FuncOp::create(builder, loc, name, type);
534 cir::GlobalLinkageKindAttr::get(builder.getContext(), linkage));
535 mlir::SymbolTable::setSymbolVisibility(
536 f, mlir::SymbolTable::Visibility::Private);
546 builder.setInsertionPoint(op);
548 mlir::Value src = op.getSrc();
549 mlir::Value imag = builder.
getNullValue(src.getType(), op.getLoc());
555 cir::CastKind elemToBoolKind) {
557 builder.setInsertionPoint(op);
559 mlir::Value src = op.getSrc();
560 if (!mlir::isa<cir::BoolType>(op.getType()))
567 cir::BoolType boolTy = builder.
getBoolTy();
568 mlir::Value srcRealToBool =
569 builder.
createCast(op.getLoc(), elemToBoolKind, srcReal, boolTy);
570 mlir::Value srcImagToBool =
571 builder.
createCast(op.getLoc(), elemToBoolKind, srcImag, boolTy);
572 return builder.
createLogicalOr(op.getLoc(), srcRealToBool, srcImagToBool);
577 cir::CastKind scalarCastKind) {
579 builder.setInsertionPoint(op);
581 mlir::Value src = op.getSrc();
582 auto dstComplexElemTy =
583 mlir::cast<cir::ComplexType>(op.getType()).getElementType();
588 mlir::Value dstReal = builder.
createCast(op.getLoc(), scalarCastKind, srcReal,
590 mlir::Value dstImag = builder.
createCast(op.getLoc(), scalarCastKind, srcImag,
595void LoweringPreparePass::lowerCastOp(cir::CastOp op) {
596 mlir::MLIRContext &ctx = getContext();
597 mlir::Value loweredValue = [&]() -> mlir::Value {
598 switch (op.getKind()) {
599 case cir::CastKind::float_to_complex:
600 case cir::CastKind::int_to_complex:
602 case cir::CastKind::float_complex_to_real:
603 case cir::CastKind::int_complex_to_real:
605 case cir::CastKind::float_complex_to_bool:
607 case cir::CastKind::int_complex_to_bool:
609 case cir::CastKind::float_complex:
611 case cir::CastKind::float_complex_to_int_complex:
613 case cir::CastKind::int_complex:
615 case cir::CastKind::int_complex_to_float_complex:
623 op.replaceAllUsesWith(loweredValue);
630 llvm::StringRef (*libFuncNameGetter)(llvm::APFloat::Semantics),
631 mlir::Location loc, cir::ComplexType ty, mlir::Value lhsReal,
632 mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag) {
633 cir::FPTypeInterface elementTy =
634 mlir::cast<cir::FPTypeInterface>(ty.getElementType());
636 llvm::StringRef libFuncName = libFuncNameGetter(
637 llvm::APFloat::SemanticsToEnum(elementTy.getFloatSemantics()));
640 cir::FuncType libFuncTy = cir::FuncType::get(libFuncInputTypes, ty);
646 mlir::OpBuilder::InsertionGuard ipGuard{builder};
647 builder.setInsertionPointToStart(pass.mlirModule.getBody());
648 libFunc = pass.buildRuntimeFunction(builder, libFuncName, loc, libFuncTy);
652 builder.
createCallOp(loc, libFunc, {lhsReal, lhsImag, rhsReal, rhsImag});
653 return call.getResult();
656static llvm::StringRef
659 case llvm::APFloat::S_IEEEhalf:
661 case llvm::APFloat::S_IEEEsingle:
663 case llvm::APFloat::S_IEEEdouble:
665 case llvm::APFloat::S_PPCDoubleDouble:
667 case llvm::APFloat::S_x87DoubleExtended:
669 case llvm::APFloat::S_IEEEquad:
672 llvm_unreachable(
"unsupported floating point type");
678 mlir::Value lhsReal, mlir::Value lhsImag,
679 mlir::Value rhsReal, mlir::Value rhsImag) {
681 mlir::Value &a = lhsReal;
682 mlir::Value &b = lhsImag;
683 mlir::Value &c = rhsReal;
684 mlir::Value &d = rhsImag;
688 bool isFP = cir::isFPOrVectorOfFPType(a.getType());
689 auto mul = [&](mlir::Location l, mlir::Value x, mlir::Value y) {
692 auto add = [&](mlir::Location l, mlir::Value x, mlir::Value y) {
695 auto sub = [&](mlir::Location l, mlir::Value x, mlir::Value y) {
698 auto div = [&](mlir::Location l, mlir::Value x, mlir::Value y) {
702 mlir::Value ac = mul(loc, a, c);
703 mlir::Value bd = mul(loc, b, d);
704 mlir::Value cc = mul(loc, c, c);
705 mlir::Value dd = mul(loc, d, d);
706 mlir::Value acbd = add(loc, ac, bd);
707 mlir::Value ccdd = add(loc, cc, dd);
708 mlir::Value resultReal = div(loc, acbd, ccdd);
710 mlir::Value bc = mul(loc, b, c);
711 mlir::Value ad = mul(loc, a, d);
712 mlir::Value bcad = sub(loc, bc, ad);
713 mlir::Value resultImag = div(loc, bcad, ccdd);
719 mlir::Value lhsReal, mlir::Value lhsImag,
720 mlir::Value rhsReal, mlir::Value rhsImag) {
741 mlir::Value &a = lhsReal;
742 mlir::Value &b = lhsImag;
743 mlir::Value &c = rhsReal;
744 mlir::Value &d = rhsImag;
747 assert(cir::isFPOrVectorOfFPType(a.getType()) &&
748 "range-reduction complex divide expects floating-point operands");
750 auto trueBranchBuilder = [&](mlir::OpBuilder &, mlir::Location) {
751 mlir::Value r = builder.
createFDiv(loc, d, c);
752 mlir::Value rd = builder.
createFMul(loc, r, d);
753 mlir::Value tmp = builder.
createFAdd(loc, c, rd);
755 mlir::Value br = builder.
createFMul(loc, b, r);
756 mlir::Value abr = builder.
createFAdd(loc, a, br);
757 mlir::Value e = builder.
createFDiv(loc, abr, tmp);
759 mlir::Value ar = builder.
createFMul(loc, a, r);
760 mlir::Value bar = builder.
createFSub(loc, b, ar);
761 mlir::Value f = builder.
createFDiv(loc, bar, tmp);
767 auto falseBranchBuilder = [&](mlir::OpBuilder &, mlir::Location) {
768 mlir::Value r = builder.
createFDiv(loc, c, d);
769 mlir::Value rc = builder.
createFMul(loc, r, c);
770 mlir::Value tmp = builder.
createFAdd(loc, d, rc);
772 mlir::Value ar = builder.
createFMul(loc, a, r);
773 mlir::Value arb = builder.
createFAdd(loc, ar, b);
774 mlir::Value e = builder.
createFDiv(loc, arb, tmp);
776 mlir::Value br = builder.
createFMul(loc, b, r);
777 mlir::Value bra = builder.
createFSub(loc, br, a);
778 mlir::Value f = builder.
createFDiv(loc, bra, tmp);
784 auto cFabs = cir::FAbsOp::create(builder, loc, c);
785 auto dFabs = cir::FAbsOp::create(builder, loc, d);
786 cir::CmpOp cmpResult =
787 builder.
createCompare(loc, cir::CmpOpKind::ge, cFabs, dFabs);
788 auto ternary = cir::TernaryOp::create(builder, loc, cmpResult,
789 trueBranchBuilder, falseBranchBuilder);
791 return ternary.getResult();
798 auto getHigherPrecisionFPType = [&context](mlir::Type type) -> mlir::Type {
799 if (mlir::isa<cir::FP16Type>(type))
800 return cir::SingleType::get(&context);
802 if (mlir::isa<cir::SingleType>(type) || mlir::isa<cir::BF16Type>(type))
803 return cir::DoubleType::get(&context);
805 if (mlir::isa<cir::DoubleType>(type))
806 return cir::LongDoubleType::get(&context, type);
811 auto getFloatTypeSemantics =
812 [&cc](mlir::Type type) ->
const llvm::fltSemantics & {
814 if (mlir::isa<cir::FP16Type>(type))
817 if (mlir::isa<cir::BF16Type>(type))
820 if (mlir::isa<cir::SingleType>(type))
823 if (mlir::isa<cir::DoubleType>(type))
826 if (mlir::isa<cir::LongDoubleType>(type)) {
828 llvm_unreachable(
"NYI Float type semantics with OpenMP");
832 if (mlir::isa<cir::FP128Type>(type)) {
834 llvm_unreachable(
"NYI Float type semantics with OpenMP");
838 llvm_unreachable(
"Unsupported float type semantics");
841 const mlir::Type higherElementType = getHigherPrecisionFPType(elementType);
842 const llvm::fltSemantics &elementTypeSemantics =
843 getFloatTypeSemantics(elementType);
844 const llvm::fltSemantics &higherElementTypeSemantics =
845 getFloatTypeSemantics(higherElementType);
854 if (llvm::APFloat::semanticsMaxExponent(elementTypeSemantics) * 2 + 1 <=
855 llvm::APFloat::semanticsMaxExponent(higherElementTypeSemantics)) {
856 return higherElementType;
866 mlir::Location loc, cir::ComplexDivOp op, mlir::Value lhsReal,
867 mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag,
869 cir::ComplexType complexTy = op.getType();
870 if (mlir::isa<cir::FPTypeInterface>(complexTy.getElementType())) {
871 cir::ComplexRangeKind range = op.getRange();
872 if (range == cir::ComplexRangeKind::Improved)
876 if (range == cir::ComplexRangeKind::Full)
878 loc, complexTy, lhsReal, lhsImag, rhsReal,
881 if (range == cir::ComplexRangeKind::Promoted) {
882 mlir::Type originalElementType = complexTy.getElementType();
883 mlir::Type higherPrecisionElementType =
885 originalElementType);
887 if (!higherPrecisionElementType)
891 cir::CastKind floatingCastKind = cir::CastKind::floating;
892 lhsReal = builder.
createCast(floatingCastKind, lhsReal,
893 higherPrecisionElementType);
894 lhsImag = builder.
createCast(floatingCastKind, lhsImag,
895 higherPrecisionElementType);
896 rhsReal = builder.
createCast(floatingCastKind, rhsReal,
897 higherPrecisionElementType);
898 rhsImag = builder.
createCast(floatingCastKind, rhsImag,
899 higherPrecisionElementType);
902 builder, loc, lhsReal, lhsImag, rhsReal, rhsImag);
907 mlir::Value finalReal =
908 builder.
createCast(floatingCastKind, resultReal, originalElementType);
909 mlir::Value finalImag =
910 builder.
createCast(floatingCastKind, resultImag, originalElementType);
919void LoweringPreparePass::lowerComplexDivOp(cir::ComplexDivOp op) {
920 cir::CIRBaseBuilderTy builder(getContext());
921 builder.setInsertionPointAfter(op);
922 mlir::Location loc = op.getLoc();
923 mlir::TypedValue<cir::ComplexType> lhs = op.getLhs();
924 mlir::TypedValue<cir::ComplexType> rhs = op.getRhs();
930 mlir::Value loweredResult =
932 rhsImag, getContext(), *astCtx);
933 op.replaceAllUsesWith(loweredResult);
937static llvm::StringRef
940 case llvm::APFloat::S_IEEEhalf:
942 case llvm::APFloat::S_IEEEsingle:
944 case llvm::APFloat::S_IEEEdouble:
946 case llvm::APFloat::S_PPCDoubleDouble:
948 case llvm::APFloat::S_x87DoubleExtended:
950 case llvm::APFloat::S_IEEEquad:
953 llvm_unreachable(
"unsupported floating point type");
959 mlir::Location loc, cir::ComplexMulOp op,
960 mlir::Value lhsReal, mlir::Value lhsImag,
961 mlir::Value rhsReal, mlir::Value rhsImag) {
963 bool isFP = cir::isFPOrVectorOfFPType(lhsReal.getType());
964 auto mul = [&](mlir::Location l, mlir::Value x, mlir::Value y) {
967 auto add = [&](mlir::Location l, mlir::Value x, mlir::Value y) {
970 auto sub = [&](mlir::Location l, mlir::Value x, mlir::Value y) {
974 mlir::Value resultRealLhs = mul(loc, lhsReal, rhsReal);
975 mlir::Value resultRealRhs = mul(loc, lhsImag, rhsImag);
976 mlir::Value resultImagLhs = mul(loc, lhsReal, rhsImag);
977 mlir::Value resultImagRhs = mul(loc, lhsImag, rhsReal);
978 mlir::Value resultReal = sub(loc, resultRealLhs, resultRealRhs);
979 mlir::Value resultImag = add(loc, resultImagLhs, resultImagRhs);
980 mlir::Value algebraicResult =
983 cir::ComplexType complexTy = op.getType();
984 cir::ComplexRangeKind rangeKind = op.getRange();
985 if (mlir::isa<cir::IntType>(complexTy.getElementType()) ||
986 rangeKind == cir::ComplexRangeKind::Basic ||
987 rangeKind == cir::ComplexRangeKind::Improved ||
988 rangeKind == cir::ComplexRangeKind::Promoted)
989 return algebraicResult;
996 mlir::Value resultRealIsNaN = builder.
createIsNaN(loc, resultReal);
997 mlir::Value resultImagIsNaN = builder.
createIsNaN(loc, resultImag);
998 mlir::Value resultRealAndImagAreNaN =
1001 return cir::TernaryOp::create(
1002 builder, loc, resultRealAndImagAreNaN,
1003 [&](mlir::OpBuilder &, mlir::Location) {
1006 lhsReal, lhsImag, rhsReal, rhsImag);
1009 [&](mlir::OpBuilder &, mlir::Location) {
1015void LoweringPreparePass::lowerComplexMulOp(cir::ComplexMulOp op) {
1016 cir::CIRBaseBuilderTy builder(getContext());
1017 builder.setInsertionPointAfter(op);
1018 mlir::Location loc = op.getLoc();
1019 mlir::TypedValue<cir::ComplexType> lhs = op.getLhs();
1020 mlir::TypedValue<cir::ComplexType> rhs = op.getRhs();
1025 mlir::Value loweredResult =
lowerComplexMul(*
this, builder, loc, op, lhsReal,
1026 lhsImag, rhsReal, rhsImag);
1027 op.replaceAllUsesWith(loweredResult);
1031void LoweringPreparePass::lowerComplexConjOp(cir::ComplexConjOp op) {
1032 mlir::Location loc = op.getLoc();
1033 CIRBaseBuilderTy builder(getContext());
1034 builder.setInsertionPointAfter(op);
1036 mlir::Value operand = op.getOperand();
1041 const bool isFP = cir::isFPOrVectorOfFPType(operandReal.getType());
1042 mlir::Value resultImag = isFP ? builder.
createFNeg(loc, operandImag)
1045 mlir::Value result =
1047 op->replaceAllUsesWith(mlir::ValueRange{result});
1051cir::FuncOp LoweringPreparePass::getOrCreateDtorFunc(CIRBaseBuilderTy &builder,
1053 mlir::Region &dtorRegion,
1054 cir::CallOp &dtorCall) {
1055 mlir::OpBuilder::InsertionGuard guard(builder);
1058 cir::VoidType voidTy = builder.
getVoidTy();
1059 auto voidPtrTy = cir::PointerType::get(voidTy);
1062 mlir::Block &dtorBlock = dtorRegion.front();
1066 auto opIt = dtorBlock.getOperations().begin();
1067 cir::GetGlobalOp ggop = mlir::cast<cir::GetGlobalOp>(*opIt);
1078 if (dtorBlock.getOperations().size() == 3) {
1079 auto callOp = mlir::dyn_cast<cir::CallOp>(&*(++opIt));
1080 auto yieldOp = mlir::dyn_cast<cir::YieldOp>(&*(++opIt));
1081 if (yieldOp && callOp && callOp.getNumOperands() == 1 &&
1082 callOp.getArgOperand(0) == ggop) {
1084 return getCalledFunction(callOp);
1091 builder.setInsertionPointAfter(op);
1092 SmallString<256> fnName(
"__cxx_global_array_dtor");
1093 uint32_t cnt = dynamicInitializerNames[fnName]++;
1095 fnName +=
"." + std::to_string(cnt);
1098 auto fnType = cir::FuncType::get({voidPtrTy}, voidTy);
1099 cir::FuncOp dtorFunc =
1100 buildRuntimeFunction(builder, fnName, op.getLoc(), fnType,
1101 cir::GlobalLinkageKind::InternalLinkage);
1103 SmallVector<mlir::NamedAttribute> paramAttrs;
1104 paramAttrs.push_back(
1105 builder.getNamedAttr(
"llvm.noundef", builder.getUnitAttr()));
1106 SmallVector<mlir::Attribute> argAttrDicts;
1107 argAttrDicts.push_back(
1108 mlir::DictionaryAttr::get(builder.getContext(), paramAttrs));
1109 dtorFunc.setArgAttrsAttr(
1110 mlir::ArrayAttr::get(builder.getContext(), argAttrDicts));
1112 mlir::Block *entryBB = dtorFunc.addEntryBlock();
1115 entryBB->getOperations().splice(entryBB->begin(), dtorBlock.getOperations(),
1116 dtorBlock.begin(), dtorBlock.end());
1119 cir::GetGlobalOp dtorGGop =
1120 mlir::cast<cir::GetGlobalOp>(entryBB->getOperations().front());
1121 builder.setInsertionPointToStart(&dtorBlock);
1122 builder.clone(*dtorGGop.getOperation());
1126 mlir::Value dtorArg = entryBB->getArgument(0);
1127 dtorGGop.replaceAllUsesWith(dtorArg);
1131 mlir::Block &finalBlock = dtorFunc.getBody().back();
1132 auto yieldOp = cast<cir::YieldOp>(finalBlock.getTerminator());
1133 builder.setInsertionPoint(yieldOp);
1134 cir::ReturnOp::create(builder, yieldOp->getLoc());
1139 cir::GetGlobalOp origGGop =
1140 mlir::cast<cir::GetGlobalOp>(dtorBlock.getOperations().front());
1141 builder.setInsertionPointAfter(origGGop);
1142 mlir::Value ggopResult = origGGop.getResult();
1143 dtorCall = builder.
createCallOp(op.getLoc(), dtorFunc, ggopResult);
1146 auto finalYield = cir::YieldOp::create(builder, op.getLoc());
1149 dtorBlock.getOperations().erase(std::next(mlir::Block::iterator(finalYield)),
1151 dtorRegion.getBlocks().erase(std::next(dtorRegion.begin()), dtorRegion.end());
1157LoweringPreparePass::buildCXXGlobalVarDeclInitFunc(cir::GlobalOp op) {
1160 SmallString<256> fnName(
"__cxx_global_var_init");
1162 uint32_t cnt = dynamicInitializerNames[fnName]++;
1164 fnName +=
"." + std::to_string(cnt);
1167 CIRBaseBuilderTy builder(getContext());
1168 builder.setInsertionPointAfter(op);
1169 cir::VoidType voidTy = builder.
getVoidTy();
1170 auto fnType = cir::FuncType::get({}, voidTy);
1171 FuncOp f = buildRuntimeFunction(builder, fnName, op.getLoc(), fnType,
1172 cir::GlobalLinkageKind::InternalLinkage);
1180 mlir::Block *entryBB = f.addEntryBlock();
1181 builder.setInsertionPointToStart(entryBB);
1185 bool needsTlsGuard = op.getDynTlsRefs() && op.getDynTlsRefs()->getGuardName();
1187 if (needsTlsGuard) {
1188 guardIf = buildGlobalTlsGuardCheck(
1189 builder, op.getLoc(),
1190 getOrCreateStaticLocalDeclGuardAddress(
1191 builder, op, op.getDynTlsRefs()->getGuardName().getValue(),
1193 op.hasInternalLinkage()));
1194 builder.setInsertionPointToEnd(&guardIf.getThenRegion().front());
1197 if (!op.getCtorRegion().empty()) {
1198 mlir::Block &block = op.getCtorRegion().front();
1199 mlir::Block *insertBlock = builder.getBlock();
1200 insertBlock->getOperations().splice(insertBlock->end(),
1201 block.getOperations(), block.begin(),
1202 std::prev(block.end()));
1206 mlir::Region &dtorRegion = op.getDtorRegion();
1207 if (!dtorRegion.empty()) {
1210 emitGlobalGuardedDtorRegion(builder, op, dtorRegion,
1211 op.getTlsModel().has_value(),
1212 *builder.getBlock());
1216 if (needsTlsGuard) {
1217 builder.setInsertionPointToEnd(&guardIf.getThenRegion().back());
1218 cir::YieldOp::create(builder, op.getLoc());
1222 builder.setInsertionPointToEnd(entryBB);
1223 mlir::Operation *yieldOp =
nullptr;
1224 if (!op.getCtorRegion().empty()) {
1225 mlir::Block &block = op.getCtorRegion().front();
1226 yieldOp = &block.getOperations().back();
1228 assert(!dtorRegion.empty());
1229 mlir::Block &block = dtorRegion.front();
1230 yieldOp = &block.getOperations().back();
1233 assert(isa<cir::YieldOp>(*yieldOp));
1234 cir::ReturnOp::create(builder, yieldOp->getLoc());
1239LoweringPreparePass::getGuardAcquireFn(cir::PointerType guardPtrTy) {
1241 CIRBaseBuilderTy builder(getContext());
1242 mlir::OpBuilder::InsertionGuard ipGuard{builder};
1243 builder.setInsertionPointToStart(mlirModule.getBody());
1244 mlir::Location loc = mlirModule.getLoc();
1245 cir::IntType intTy = cir::IntType::get(&getContext(), 32,
true);
1246 auto fnType = cir::FuncType::get({guardPtrTy}, intTy);
1247 return buildRuntimeFunction(builder,
"__cxa_guard_acquire", loc, fnType);
1251LoweringPreparePass::getGuardReleaseFn(cir::PointerType guardPtrTy) {
1253 CIRBaseBuilderTy builder(getContext());
1254 mlir::OpBuilder::InsertionGuard ipGuard{builder};
1255 builder.setInsertionPointToStart(mlirModule.getBody());
1256 mlir::Location loc = mlirModule.getLoc();
1257 cir::VoidType voidTy = cir::VoidType::get(&getContext());
1258 auto fnType = cir::FuncType::get({guardPtrTy}, voidTy);
1259 return buildRuntimeFunction(builder,
"__cxa_guard_release", loc, fnType);
1262cir::FuncOp LoweringPreparePass::getTlsInitFn() {
1264 CIRBaseBuilderTy builder(getContext());
1265 mlir::OpBuilder::InsertionGuard _{builder};
1266 builder.setInsertionPointToStart(mlirModule.getBody());
1267 mlir::Location loc = mlirModule.getLoc();
1269 return buildRuntimeFunction(builder,
"__tls_init", loc, fnType,
1270 cir::GlobalLinkageKind::InternalLinkage);
1273cir::GlobalOp LoweringPreparePass::createGuardGlobalOp(
1274 CIRBaseBuilderTy &builder, mlir::Location loc, llvm::StringRef name,
1275 cir::IntType guardTy, cir::GlobalLinkageKind linkage) {
1276 mlir::OpBuilder::InsertionGuard guard(builder);
1277 builder.setInsertionPointToStart(mlirModule.getBody());
1278 cir::GlobalOp g = cir::GlobalOp::create(builder, loc, name, guardTy);
1280 cir::GlobalLinkageKindAttr::get(builder.getContext(), linkage));
1281 mlir::SymbolTable::setSymbolVisibility(
1282 g, mlir::SymbolTable::Visibility::Private);
1286void LoweringPreparePass::handleStaticLocal(cir::GlobalOp globalOp,
1287 cir::LocalInitOp localInitOp) {
1288 CIRBaseBuilderTy builder(getContext());
1290 std::optional<cir::ASTVarDeclInterface> astOption = globalOp.getAst();
1291 assert(astOption.has_value());
1292 cir::ASTVarDeclInterface
varDecl = astOption.value();
1294 builder.setInsertionPointAfter(localInitOp);
1295 mlir::Block *localInitBlock = builder.getInsertionBlock();
1298 mlir::Operation *ret = localInitBlock->getTerminator();
1302 builder.setInsertionPointAfter(localInitOp);
1306 bool nonTemplateInline =
1312 if (nonTemplateInline) {
1313 globalOp->emitError(
1314 "NYI: guarded initialization for inline namespace-scope variables");
1321 bool threadsafe = astCtx->
getLangOpts().ThreadsafeStatics &&
1322 (
varDecl.isLocalVarDecl() || nonTemplateInline) &&
1327 bool useInt8GuardVariable = !threadsafe && globalOp.hasInternalLinkage();
1330 cir::GlobalOp guard = getOrCreateStaticLocalDeclGuardAddress(
1331 builder, globalOp, globalOp.getStaticLocalGuard()->getName().getValue(),
1332 varDecl.isLocalVarDecl(), useInt8GuardVariable);
1335 localInitBlock->push_back(ret);
1339 mlir::Value guardPtr = builder.
createGetGlobal(guard, localInitOp.getTls());
1361 unsigned maxInlineWidthInBits =
1364 if (!threadsafe || maxInlineWidthInBits) {
1366 auto bytePtrTy = cir::PointerType::get(builder.
getSIntNTy(8));
1367 mlir::Value bytePtr = builder.
createBitcast(guardPtr, bytePtrTy);
1369 localInitOp.getLoc(), bytePtr, *guard.getAlignment());
1378 auto loadOp = mlir::cast<cir::LoadOp>(guardLoad.getDefiningOp());
1379 loadOp.setMemOrder(cir::MemOrder::Acquire);
1380 loadOp.setSyncScope(cir::SyncScopeKind::System);
1403 if (useARMGuardVarABI() && !useInt8GuardVariable) {
1405 localInitOp.getLoc(), mlir::cast<cir::IntType>(guardLoad.getType()),
1407 guardLoad = builder.
createAnd(localInitOp.getLoc(), guardLoad, one);
1412 localInitOp.getLoc(), mlir::cast<cir::IntType>(guardLoad.getType()), 0);
1413 auto needsInit = builder.
createCompare(localInitOp.getLoc(),
1414 cir::CmpOpKind::eq, guardLoad, zero);
1418 builder, globalOp.getLoc(), needsInit,
1419 false, [&](mlir::OpBuilder &, mlir::Location) {
1420 emitCXXGuardedInitIf(builder, globalOp, localInitOp.getCtorRegion(),
1421 localInitOp.getDtorRegion(), varDecl, guardPtr,
1422 builder.getPointerTo(guard.getSymType()),
1428 globalOp->emitError(
"NYI: guarded init without inline atomics support");
1433 builder.getInsertionBlock()->push_back(ret);
1436void LoweringPreparePass::lowerLocalInitOp(cir::LocalInitOp initOp) {
1439 if (initOp.getCtorRegion().empty() && initOp.getDtorRegion().empty()) {
1444 cir::GlobalOp globalOp = initOp.getReferencedGlobal(symbolTables);
1445 assert(globalOp &&
"No global-op found");
1447 handleStaticLocal(globalOp, initOp);
1454 return tls == cir::TLS_Model::GeneralDynamic &&
1458static cir::GlobalLinkageKind
1461 return op.getLinkage();
1466 return op.getLinkage();
1470 if (op.isDeclaration())
1471 return cir::GlobalLinkageKind::LinkOnceODRLinkage;
1472 return cir::GlobalLinkageKind::WeakODRLinkage;
1476LoweringPreparePass::getOrCreateThreadLocalWrapper(CIRBaseBuilderTy &builder,
1478 mlir::OpBuilder::InsertionGuard insertGuard(builder);
1479 builder.setInsertionPointToStart(&mlirModule.getBodyRegion().front());
1481 mlir::StringAttr wrapperName = op.getDynTlsRefs()->getWrapperName();
1483 auto existingWrapperIter = threadLocalWrappers.find(wrapperName.getValue());
1484 if (existingWrapperIter != threadLocalWrappers.end())
1485 return existingWrapperIter->second;
1488 auto funcType = cir::FuncType::get({}, builder.
getPointerTo(op.getSymType()));
1490 cir::FuncOp::create(builder, op.getLoc(), wrapperName, funcType);
1492 cir::GlobalLinkageKind linkageKind =
1494 func.setLinkageAttr(
1495 cir::GlobalLinkageKindAttr::get(&getContext(), linkageKind));
1500 func.isWeakForLinker())
1501 func.setComdat(
true);
1503 mlir::SymbolTable::setSymbolVisibility(
1504 func, mlir::SymbolTable::Visibility::Private);
1509 op.getGlobalVisibility() == cir::VisibilityKind::Hidden)
1510 func.setGlobalVisibility(cir::VisibilityKind::Hidden);
1513 op->emitError(
"Unhandled thread wrapper attributes for CC and Nounwind");
1515 threadLocalWrappers.insert({wrapperName.getValue(), func});
1519void LoweringPreparePass::defineGlobalThreadLocalWrapper(cir::GlobalOp op,
1520 cir::FuncOp initAlias,
1521 bool isVarDefinition) {
1522 CIRBaseBuilderTy builder(getContext());
1523 cir::FuncOp wrapper = getOrCreateThreadLocalWrapper(builder, op);
1524 mlir::Block *entryBB = wrapper.addEntryBlock();
1525 builder.setInsertionPointToStart(entryBB);
1529 mlir::Location aliasLoc = initAlias.getLoc();
1530 if (!isVarDefinition) {
1532 mlir::Value funcLoad = cir::GetGlobalOp::create(
1533 builder, aliasLoc, cir::PointerType::get(initAlias.getFunctionType()),
1534 initAlias.getSymName());
1535 mlir::Value nullCheck =
1537 mlir::Value cmp = cir::CmpOp::create(
1538 builder, aliasLoc, cir::CmpOpKind::ne, funcLoad, nullCheck);
1539 cir::IfOp::create(builder, aliasLoc, cmp,
false,
1540 [&](mlir::OpBuilder &, mlir::Location loc) {
1542 cir::YieldOp::create(builder, aliasLoc);
1551 cir::ReturnOp::create(builder, op.getLoc(), {get});
1555LoweringPreparePass::defineGlobalThreadLocalInitAlias(cir::GlobalOp op,
1556 cir::FuncOp aliasee) {
1557 CIRBaseBuilderTy builder(getContext());
1558 mlir::OpBuilder::InsertionGuard insertGuard(builder);
1559 builder.setInsertionPointToStart(&mlirModule.getBodyRegion().front());
1560 mlir::StringAttr aliasName = op.getDynTlsRefs()->getInitName();
1561 auto existingAliasIter = threadLocalInitAliases.find(aliasName.getValue());
1563 if (existingAliasIter != threadLocalInitAliases.end())
1564 return existingAliasIter->second;
1568 cir::FuncOp::create(builder, op.getLoc(), aliasName, funcType);
1569 alias.setLinkage(op.getLinkage());
1572 alias.setAliasee(aliasee.getSymName());
1577 alias.setLinkage(cir::GlobalLinkageKind::ExternalWeakLinkage);
1578 mlir::SymbolTable::setSymbolVisibility(
1579 alias, mlir::SymbolTable::Visibility::Private);
1582 threadLocalInitAliases.insert({aliasName.getValue(), alias});
1586void LoweringPreparePass::lowerGlobalOp(GlobalOp op) {
1588 if (op.getStaticLocalGuard())
1591 mlir::Region &ctorRegion = op.getCtorRegion();
1592 mlir::Region &dtorRegion = op.getDtorRegion();
1593 cir::FuncOp initAlias;
1595 if (!ctorRegion.empty() || !dtorRegion.empty()) {
1598 cir::FuncOp f = buildCXXGlobalVarDeclInitFunc(op);
1601 ctorRegion.getBlocks().clear();
1602 dtorRegion.getBlocks().clear();
1605 if (op.getTlsModel() == TLS_Model::GeneralDynamic &&
1606 !op.getStaticLocalGuard().has_value()) {
1618 if (op.getDynTlsRefs()->getGuardName()) {
1620 initAlias = defineGlobalThreadLocalInitAlias(op, f);
1623 initAlias = defineGlobalThreadLocalInitAlias(op, getTlsInitFn());
1627 globalThreadLocalInitializers.push_back(f);
1630 dynamicInitializers.push_back(f);
1632 }
else if (op.getTlsModel() == TLS_Model::GeneralDynamic &&
1633 op.getDynTlsRefs() && op.isDeclaration()) {
1636 initAlias = defineGlobalThreadLocalInitAlias(op, {});
1642 if (op.getTlsModel() == TLS_Model::GeneralDynamic && op.getDynTlsRefs())
1643 defineGlobalThreadLocalWrapper(op, initAlias, !op.isDeclaration());
1648void LoweringPreparePass::lowerGetGlobalOp(GetGlobalOp op) {
1651 auto globalOp = mlir::cast<cir::GlobalOp>(
1652 symbolTables.lookupNearestSymbolFrom(op, op.getNameAttr()));
1658 if (globalOp.getTlsModel() != TLS_Model::GeneralDynamic ||
1659 !globalOp.getDynTlsRefs())
1677 mlir::Operation *parentOp = op->getParentOp();
1678 if (parentOp == globalOp) {
1679 mlir::Region *ctorRegion = &globalOp.getCtorRegion();
1680 mlir::Region *dtorRegion = &globalOp.getDtorRegion();
1682 if (!ctorRegion->empty() && &*ctorRegion->op_begin() == op.getOperation())
1684 if (!dtorRegion->empty() && &*dtorRegion->op_begin() == op.getOperation())
1688 CIRBaseBuilderTy builder(getContext());
1689 cir::FuncOp wrapperFunc = getOrCreateThreadLocalWrapper(builder, globalOp);
1691 builder.setInsertionPoint(op);
1693 wrapperFunc.getLoc(),
1694 mlir::FlatSymbolRefAttr::get(wrapperFunc.getSymNameAttr()),
1695 wrapperFunc.getFunctionType().getReturnType(), {});
1696 op->replaceAllUsesWith(call);
1700void LoweringPreparePass::lowerThreeWayCmpOp(CmpThreeWayOp op) {
1701 CIRBaseBuilderTy builder(getContext());
1702 builder.setInsertionPointAfter(op);
1704 mlir::Location loc = op->getLoc();
1705 cir::CmpThreeWayInfoAttr cmpInfo = op.getInfo();
1714 mlir::Value transformedResult;
1715 if (cmpInfo.getOrdering() != CmpOrdering::Partial) {
1718 builder.
createCompare(loc, CmpOpKind::lt, op.getLhs(), op.getRhs());
1719 mlir::Value selectOnLt = builder.
createSelect(loc, lt, ltRes, gtRes);
1721 builder.
createCompare(loc, CmpOpKind::eq, op.getLhs(), op.getRhs());
1722 transformedResult = builder.
createSelect(loc, eq, eqRes, selectOnLt);
1726 loc, op.getType(), cmpInfo.getUnordered().value());
1729 builder.
createCompare(loc, CmpOpKind::eq, op.getLhs(), op.getRhs());
1730 mlir::Value selectOnEq = builder.
createSelect(loc, eq, eqRes, unorderedRes);
1732 builder.
createCompare(loc, CmpOpKind::gt, op.getLhs(), op.getRhs());
1733 mlir::Value selectOnGt = builder.
createSelect(loc, gt, gtRes, selectOnEq);
1735 builder.
createCompare(loc, CmpOpKind::lt, op.getLhs(), op.getRhs());
1736 transformedResult = builder.
createSelect(loc, lt, ltRes, selectOnGt);
1739 op.replaceAllUsesWith(transformedResult);
1743template <
typename AttributeTy>
1744static llvm::SmallVector<mlir::Attribute>
1748 for (
const auto &[name, priority] : list)
1749 attrs.push_back(AttributeTy::get(context, name, priority));
1753void LoweringPreparePass::buildGlobalCtorDtorList() {
1754 if (!globalCtorList.empty()) {
1755 llvm::SmallVector<mlir::Attribute> globalCtors =
1759 mlirModule->setAttr(cir::CIRDialect::getGlobalCtorsAttrName(),
1760 mlir::ArrayAttr::get(&getContext(), globalCtors));
1763 if (!globalDtorList.empty()) {
1764 llvm::SmallVector<mlir::Attribute> globalDtors =
1767 mlirModule->setAttr(cir::CIRDialect::getGlobalDtorsAttrName(),
1768 mlir::ArrayAttr::get(&getContext(), globalDtors));
1773LoweringPreparePass::createGlobalThreadLocalGuard(CIRBaseBuilderTy &builder,
1774 mlir::Location loc) {
1775 mlir::OpBuilder::InsertionGuard guard(builder);
1776 builder.setInsertionPointToStart(mlirModule.getBody());
1779 cir::IntType guardTy = builder.
getSIntNTy(8);
1780 auto g = cir::GlobalOp::create(builder, loc,
"__tls_guard", guardTy);
1781 g.setLinkageAttr(cir::GlobalLinkageKindAttr::get(
1782 builder.getContext(), cir::GlobalLinkageKind::InternalLinkage));
1786 g.setTlsModel(TLS_Model::GeneralDynamic);
1787 g.setInitialValueAttr(cir::IntAttr::get(guardTy, 0));
1791cir::IfOp LoweringPreparePass::buildGlobalTlsGuardCheck(
1792 CIRBaseBuilderTy &builder, mlir::Location loc, cir::GlobalOp guard) {
1794 mlir::Value getGuardValue = getGuard;
1799 if (guard.getSymType() != builder.
getSIntNTy(8))
1801 getGuard, cir::PointerType::get(builder.
getSIntNTy(8)));
1803 mlir::Value guardLoad =
1807 builder.
createCompare(loc, cir::CmpOpKind::eq, guardLoad, zero);
1808 return cir::IfOp::create(
1810 false, [&](mlir::OpBuilder &, mlir::Location loc) {
1814 loc, builder.
getConstantInt(loc, guard.getSymType(), 1), getGuard);
1818void LoweringPreparePass::buildCXXGlobalTlsFunc() {
1819 if (globalThreadLocalInitializers.empty())
1825 cir::FuncOp tlsInit = getTlsInitFn();
1826 mlir::Location loc = tlsInit.getLoc();
1827 CIRBaseBuilderTy builder(getContext());
1828 mlir::Block *entryBB = tlsInit.addEntryBlock();
1829 builder.setInsertionPointToStart(entryBB);
1831 cir::IfOp ifOperation = buildGlobalTlsGuardCheck(
1832 builder, loc, createGlobalThreadLocalGuard(builder, loc));
1835 builder.setInsertionPointToEnd(&ifOperation.getThenRegion().front());
1836 for (cir::FuncOp initFunc : globalThreadLocalInitializers)
1838 cir::YieldOp::create(builder, loc);
1840 builder.setInsertionPointAfter(ifOperation);
1841 cir::ReturnOp::create(builder, loc);
1844void LoweringPreparePass::buildCXXGlobalInitFunc() {
1845 if (dynamicInitializers.empty())
1852 SmallString<256> fnName;
1853 cir::GlobalLinkageKind linkage;
1861 llvm::raw_svector_ostream
out(fnName);
1862 std::unique_ptr<clang::MangleContext> mangleCtx(
1864 cast<clang::ItaniumMangleContext>(*mangleCtx)
1866 linkage = cir::GlobalLinkageKind::ExternalLinkage;
1868 fnName +=
"_GLOBAL__sub_I_";
1870 linkage = cir::GlobalLinkageKind::InternalLinkage;
1873 CIRBaseBuilderTy builder(getContext());
1874 builder.setInsertionPointToEnd(&mlirModule.getBodyRegion().back());
1875 auto fnType = cir::FuncType::get({}, builder.
getVoidTy());
1876 cir::FuncOp f = buildRuntimeFunction(builder, fnName, mlirModule.getLoc(),
1878 builder.setInsertionPointToStart(f.addEntryBlock());
1879 for (cir::FuncOp &f : dynamicInitializers)
1883 globalCtorList.emplace_back(fnName,
1884 cir::GlobalCtorAttr::getDefaultPriority());
1886 cir::ReturnOp::create(builder, f.getLoc());
1895 mlir::Operation *op, mlir::Type eltTy,
1897 mlir::Value numElements,
1898 uint64_t arrayLen,
bool isCtor) {
1899 mlir::Location loc = op->getLoc();
1900 bool isDynamic = numElements !=
nullptr;
1904 const unsigned sizeTypeSize =
1910 mlir::Value begin, end;
1913 end = cir::PtrStrideOp::create(builder, loc, eltTy, begin, numElements);
1915 mlir::Value endOffsetVal =
1917 begin = cir::CastOp::create(builder, loc, eltTy,
1918 cir::CastKind::array_to_ptrdecay, addr);
1919 end = cir::PtrStrideOp::create(builder, loc, eltTy, begin, endOffsetVal);
1922 mlir::Value start = isCtor ? begin : end;
1923 mlir::Value stop = isCtor ? end : begin;
1929 mlir::Value guardCond;
1932 guardCond = cir::CmpOp::create(builder, loc, cir::CmpOpKind::ne,
1938 cir::CmpOp::create(builder, loc, cir::CmpOpKind::ne, start, stop);
1940 ifOp = cir::IfOp::create(builder, loc, guardCond,
1942 [&](mlir::OpBuilder &, mlir::Location) {});
1943 builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
1946 mlir::Value tmpAddr =
1951 mlir::Block *bodyBlock = &op->getRegion(0).front();
1956 auto cloneRegionBodyInto = [&](mlir::Block *srcBlock,
1957 mlir::Value replacement) {
1958 mlir::IRMapping map;
1959 map.map(srcBlock->getArgument(0), replacement);
1960 for (mlir::Operation ®ionOp : *srcBlock) {
1961 if (!mlir::isa<cir::YieldOp>(®ionOp))
1962 builder.clone(regionOp, map);
1966 mlir::Block *partialDtorBlock =
nullptr;
1967 if (
auto arrayCtor = mlir::dyn_cast<cir::ArrayCtor>(op)) {
1968 mlir::Region &partialDtor = arrayCtor.getPartialDtor();
1969 if (!partialDtor.empty())
1970 partialDtorBlock = &partialDtor.front();
1971 }
else if (
auto arrayDtor = mlir::dyn_cast<cir::ArrayDtor>(op)) {
1980 if (arrayDtor.getDtorMayThrow())
1981 partialDtorBlock = bodyBlock;
1984 auto emitCtorDtorLoop = [&]() {
1988 [&](mlir::OpBuilder &b, mlir::Location loc) {
1989 auto currentElement = cir::LoadOp::create(b, loc, eltTy, tmpAddr);
1990 auto cmp = cir::CmpOp::create(builder, loc, cir::CmpOpKind::ne,
1991 currentElement, stop);
1995 [&](mlir::OpBuilder &b, mlir::Location loc) {
1996 auto currentElement = cir::LoadOp::create(b, loc, eltTy, tmpAddr);
1998 cloneRegionBodyInto(bodyBlock, currentElement);
1999 mlir::Value stride = builder.
getUnsignedInt(loc, 1, sizeTypeSize);
2000 auto nextElement = cir::PtrStrideOp::create(builder, loc, eltTy,
2001 currentElement, stride);
2004 mlir::Value stride = builder.
getSignedInt(loc, -1, sizeTypeSize);
2005 auto prevElement = cir::PtrStrideOp::create(builder, loc, eltTy,
2006 currentElement, stride);
2008 cloneRegionBodyInto(bodyBlock, prevElement);
2011 cir::YieldOp::create(b, loc);
2015 if (partialDtorBlock) {
2016 cir::CleanupScopeOp::create(
2017 builder, loc, cir::CleanupKind::EH,
2019 [&](mlir::OpBuilder &b, mlir::Location loc) {
2021 cir::YieldOp::create(b, loc);
2024 [&](mlir::OpBuilder &b, mlir::Location loc) {
2025 auto cur = cir::LoadOp::create(b, loc, eltTy, tmpAddr);
2027 cir::CmpOp::create(builder, loc, cir::CmpOpKind::ne, cur, begin);
2029 builder, loc, cmp,
false,
2030 [&](mlir::OpBuilder &b, mlir::Location loc) {
2034 [&](mlir::OpBuilder &b, mlir::Location loc) {
2035 auto el = cir::LoadOp::create(b, loc, eltTy, tmpAddr);
2036 auto neq = cir::CmpOp::create(
2037 builder, loc, cir::CmpOpKind::ne, el, begin);
2041 [&](mlir::OpBuilder &b, mlir::Location loc) {
2042 auto el = cir::LoadOp::create(b, loc, eltTy, tmpAddr);
2043 mlir::Value negOne =
2045 auto prev = cir::PtrStrideOp::create(builder, loc, eltTy,
2048 cloneRegionBodyInto(partialDtorBlock, prev);
2051 cir::YieldOp::create(builder, loc);
2053 cir::YieldOp::create(b, loc);
2060 cir::YieldOp::create(builder, loc);
2065void LoweringPreparePass::lowerArrayDtor(cir::ArrayDtor op) {
2066 CIRBaseBuilderTy builder(getContext());
2067 builder.setInsertionPointAfter(op.getOperation());
2069 mlir::Type eltTy = op->getRegion(0).getArgument(0).getType();
2071 if (op.getNumElements()) {
2073 op.getNumElements(), 0,
2079 mlir::cast<cir::ArrayType>(op.getAddr().getType().getPointee()).getSize();
2085void LoweringPreparePass::lowerArrayCtor(cir::ArrayCtor op) {
2086 cir::CIRBaseBuilderTy builder(getContext());
2087 builder.setInsertionPointAfter(op.getOperation());
2089 mlir::Type eltTy = op->getRegion(0).getArgument(0).getType();
2091 if (op.getNumElements()) {
2093 op.getNumElements(), 0,
2099 mlir::cast<cir::ArrayType>(op.getAddr().getType().getPointee()).getSize();
2105cir::FuncOp LoweringPreparePass::getCalledFunction(cir::CallOp callOp) {
2106 mlir::SymbolRefAttr sym = llvm::dyn_cast_if_present<mlir::SymbolRefAttr>(
2107 callOp.getCallableForCallee());
2110 return symbolTables.lookupNearestSymbolFrom<cir::FuncOp>(callOp, sym);
2113void LoweringPreparePass::lowerTrivialCopyCall(cir::CallOp op) {
2114 cir::FuncOp funcOp = getCalledFunction(op);
2118 std::optional<cir::CtorKind> ctorKind = funcOp.getCxxConstructorKind();
2119 if (ctorKind && *ctorKind == cir::CtorKind::Copy &&
2120 funcOp.isCxxTrivialMemberFunction()) {
2122 CIRBaseBuilderTy builder(getContext());
2123 mlir::ValueRange operands = op.getOperands();
2124 mlir::Value dest = operands[0];
2125 mlir::Value src = operands[1];
2126 builder.setInsertionPoint(op);
2132cir::GlobalOp LoweringPreparePass::getOrCreateConstAggregateGlobal(
2133 CIRBaseBuilderTy &builder, mlir::Location loc, llvm::StringRef baseName,
2134 mlir::Type ty, mlir::TypedAttr constant) {
2136 llvm::SmallVector<cir::GlobalOp, 1> &versions =
2137 constAggregateGlobals[baseName];
2140 for (cir::GlobalOp gv : versions) {
2141 if (gv.getSymType() == ty && gv.getInitialValue() == constant)
2149 llvm::SmallString<128>
name(baseName);
2150 size_t baseLen =
name.size();
2151 unsigned version = versions.size();
2153 name.resize(baseLen);
2155 name.push_back(
'.');
2156 llvm::Twine(version).toVector(name);
2158 auto existingGv = symbolTables.lookupSymbolIn<cir::GlobalOp>(
2159 mlirModule, mlir::StringAttr::get(&getContext(), name));
2162 versions.push_back(existingGv);
2163 if (existingGv.getSymType() == ty &&
2164 existingGv.getInitialValue() == constant)
2170 mlir::OpBuilder::InsertionGuard guard(builder);
2171 builder.setInsertionPointToStart(mlirModule.getBody());
2173 cir::GlobalOp::create(builder, loc, name, ty,
2175 cir::LangAddressSpaceAttr::get(
2176 &getContext(), cir::LangAddressSpace::Default),
2177 cir::GlobalLinkageKind::PrivateLinkage);
2178 mlir::SymbolTable::setSymbolVisibility(
2179 gv, mlir::SymbolTable::Visibility::Private);
2180 gv.setInitialValueAttr(constant);
2184 symbolTables.getSymbolTable(mlirModule).insert(gv);
2186 versions.push_back(gv);
2190void LoweringPreparePass::lowerStoreOfConstAggregate(cir::StoreOp op) {
2192 auto constOp = op.getValue().getDefiningOp<cir::ConstantOp>();
2196 mlir::Type ty = constOp.getType();
2197 if (!mlir::isa<cir::ArrayType, cir::RecordType>(ty))
2203 auto alloca = op.getAddr().getDefiningOp<cir::AllocaOp>();
2207 mlir::TypedAttr constant = constOp.getValue();
2218 auto func = op->getParentOfType<cir::FuncOp>();
2221 llvm::StringRef funcName = func.getSymName();
2224 llvm::StringRef varName = alloca.getName();
2227 std::string baseName = (
"__const." + funcName +
"." + varName).str();
2228 CIRBaseBuilderTy builder(getContext());
2232 cir::GlobalOp gv = getOrCreateConstAggregateGlobal(builder, op.getLoc(),
2233 baseName, ty, constant);
2236 builder.setInsertionPoint(op);
2238 auto ptrTy = cir::PointerType::get(ty);
2239 mlir::Value globalPtr =
2240 cir::GetGlobalOp::create(builder, op.getLoc(), ptrTy, gv.getSymName());
2249 if (constOp.use_empty())
2253void LoweringPreparePass::runOnOp(mlir::Operation *op) {
2254 if (
auto arrayCtor = dyn_cast<cir::ArrayCtor>(op)) {
2255 lowerArrayCtor(arrayCtor);
2256 }
else if (
auto arrayDtor = dyn_cast<cir::ArrayDtor>(op)) {
2257 lowerArrayDtor(arrayDtor);
2258 }
else if (
auto cast = mlir::dyn_cast<cir::CastOp>(op)) {
2260 }
else if (
auto complexConj = mlir::dyn_cast<cir::ComplexConjOp>(op)) {
2261 lowerComplexConjOp(complexConj);
2262 }
else if (
auto complexDiv = mlir::dyn_cast<cir::ComplexDivOp>(op)) {
2263 lowerComplexDivOp(complexDiv);
2264 }
else if (
auto complexMul = mlir::dyn_cast<cir::ComplexMulOp>(op)) {
2265 lowerComplexMulOp(complexMul);
2266 }
else if (
auto glob = mlir::dyn_cast<cir::GlobalOp>(op)) {
2267 lowerGlobalOp(glob);
2268 if (
auto regAttr = glob->getAttrOfType<CUDAVarRegistrationInfoAttr>(
2269 CUDAVarRegistrationInfoAttr::getMnemonic()))
2270 cudaDeviceVars.emplace_back(glob, regAttr);
2271 }
else if (
auto getGlob = mlir::dyn_cast<cir::GetGlobalOp>(op)) {
2272 lowerGetGlobalOp(getGlob);
2273 }
else if (
auto callOp = dyn_cast<cir::CallOp>(op)) {
2274 lowerTrivialCopyCall(callOp);
2275 }
else if (
auto storeOp = dyn_cast<cir::StoreOp>(op)) {
2276 lowerStoreOfConstAggregate(storeOp);
2277 }
else if (
auto fnOp = dyn_cast<cir::FuncOp>(op)) {
2278 if (
auto globalCtor = fnOp.getGlobalCtorPriority())
2279 globalCtorList.emplace_back(fnOp.getName(), globalCtor.value());
2280 else if (
auto globalDtor = fnOp.getGlobalDtorPriority())
2281 globalDtorList.emplace_back(fnOp.getName(), globalDtor.value());
2283 if (mlir::Attribute attr =
2284 fnOp->getAttr(cir::CUDAKernelNameAttr::getMnemonic())) {
2285 auto kernelNameAttr = dyn_cast<CUDAKernelNameAttr>(attr);
2286 llvm::StringRef kernelName = kernelNameAttr.getKernelName();
2287 cudaKernelMap[kernelName] = fnOp;
2289 }
else if (
auto threeWayCmp = dyn_cast<cir::CmpThreeWayOp>(op)) {
2290 lowerThreeWayCmpOp(threeWayCmp);
2291 }
else if (
auto initOp = dyn_cast<cir::LocalInitOp>(op)) {
2292 lowerLocalInitOp(initOp);
2303 llvm::StringRef name) {
2304 return (
"__" + prefix + name).str();
2326void LoweringPreparePass::buildCUDAModuleCtor() {
2329 if (astCtx->
getLangOpts().GPURelocatableDeviceCode)
2330 llvm_unreachable(
"GPU RDC NYI");
2334 if (cudaKernelMap.empty() && cudaDeviceVars.empty())
2339 mlir::Attribute cudaBinaryHandleAttr =
2340 mlirModule->getAttr(CIRDialect::getCUDABinaryHandleAttrName());
2341 if (!cudaBinaryHandleAttr) {
2347 llvm::StringRef cudaGPUBinaryName =
2348 mlir::cast<CUDABinaryHandleAttr>(cudaBinaryHandleAttr)
2352 llvm::vfs::FileSystem &vfs =
2354 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> gpuBinaryOrErr =
2355 vfs.getBufferForFile(cudaGPUBinaryName);
2356 if (std::error_code ec = gpuBinaryOrErr.getError()) {
2357 mlirModule->emitError(
"cannot open GPU binary file: " + cudaGPUBinaryName +
2358 ": " + ec.message());
2361 std::unique_ptr<llvm::MemoryBuffer> gpuBinary =
2362 std::move(gpuBinaryOrErr.get());
2366 mlir::Location loc = mlirModule->getLoc();
2367 CIRBaseBuilderTy builder(getContext());
2368 builder.setInsertionPointToStart(mlirModule.getBody());
2372 PointerType voidPtrPtrTy = builder.
getPointerTo(voidPtrTy);
2374 IntType charTy = cir::IntType::get(&getContext(), astCtx->
getCharWidth(),
2380 llvm::StringRef fatbinConstName =
2381 astCtx->
getLangOpts().HIP ?
".hip_fatbin" :
".nv_fatbin";
2383 llvm::StringRef fatbinSectionName =
2384 astCtx->
getLangOpts().HIP ?
".hipFatBinSegment" :
".nvFatBinSegment";
2388 ArrayType::get(&getContext(), charTy, gpuBinary->getBuffer().size());
2390 GlobalOp fatbinStr = GlobalOp::create(builder, loc, fatbinStrName, fatbinType,
2392 GlobalLinkageKind::PrivateLinkage);
2393 fatbinStr.setAlignment(8);
2394 fatbinStr.setInitialValueAttr(cir::ConstArrayAttr::get(
2395 fatbinType, StringAttr::get(gpuBinary->getBuffer(), fatbinType)));
2396 fatbinStr.setSection(fatbinConstName);
2397 fatbinStr.setPrivate();
2401 auto fatbinWrapperType = cir::StructType::get(
2402 &getContext(), {intTy, intTy, voidPtrTy, voidPtrTy},
2403 false,
false,
false);
2404 std::string fatbinWrapperName =
2406 GlobalOp fatbinWrapper = GlobalOp::create(
2407 builder, loc, fatbinWrapperName, fatbinWrapperType,
2408 true, {}, GlobalLinkageKind::PrivateLinkage);
2409 fatbinWrapper.setSection(fatbinSectionName);
2411 constexpr unsigned cudaFatMagic = 0x466243b1;
2412 constexpr unsigned hipFatMagic = 0x48495046;
2413 unsigned fatMagic =
isHIP ? hipFatMagic : cudaFatMagic;
2415 auto magicInit = IntAttr::get(intTy, fatMagic);
2416 auto versionInit = IntAttr::get(intTy, 1);
2417 auto fatbinStrSymbol =
2418 mlir::FlatSymbolRefAttr::get(fatbinStr.getSymNameAttr());
2419 auto fatbinInit = GlobalViewAttr::get(voidPtrTy, fatbinStrSymbol);
2421 fatbinWrapper.setInitialValueAttr(cir::ConstRecordAttr::get(
2423 mlir::ArrayAttr::get(&getContext(),
2424 {magicInit, versionInit, fatbinInit, unusedInit})));
2427 std::string gpubinHandleName =
2430 GlobalOp gpuBinHandle = GlobalOp::create(
2431 builder, loc, gpubinHandleName, voidPtrPtrTy,
2432 false, {}, cir::GlobalLinkageKind::InternalLinkage);
2434 gpuBinHandle.setPrivate();
2439 std::string regFuncName =
2441 FuncType regFuncType = FuncType::get({voidPtrTy}, voidPtrPtrTy);
2442 cir::FuncOp regFunc =
2443 buildRuntimeFunction(builder, regFuncName, loc, regFuncType);
2446 cir::FuncOp moduleCtor = buildRuntimeFunction(
2447 builder, moduleCtorName, loc, FuncType::get({}, voidTy),
2448 GlobalLinkageKind::InternalLinkage);
2450 globalCtorList.emplace_back(moduleCtorName,
2451 cir::GlobalCtorAttr::getDefaultPriority());
2452 builder.setInsertionPointToStart(moduleCtor.addEntryBlock());
2460 mlir::Block *entryBlock = builder.getInsertionBlock();
2461 mlir::Region *parent = entryBlock->getParent();
2462 mlir::Block *ifBlock = builder.createBlock(parent);
2463 mlir::Block *exitBlock = builder.createBlock(parent);
2465 mlir::OpBuilder::InsertionGuard guard(builder);
2466 builder.setInsertionPointToEnd(entryBlock);
2467 mlir::Value handle =
2469 auto handlePtrTy = mlir::cast<cir::PointerType>(handle.getType());
2470 mlir::Value nullPtr = builder.
getNullPtr(handlePtrTy, loc);
2471 mlir::Value isNull =
2472 builder.
createCompare(loc, cir::CmpOpKind::eq, handle, nullPtr);
2473 cir::BrCondOp::create(builder, loc, isNull, ifBlock, exitBlock);
2477 mlir::OpBuilder::InsertionGuard guard(builder);
2478 builder.setInsertionPointToStart(ifBlock);
2480 mlir::Value fatbinVoidPtr = builder.
createBitcast(wrapper, voidPtrTy);
2481 cir::CallOp gpuBinaryHandleCall =
2483 mlir::Value gpuBinaryHandle = gpuBinaryHandleCall.getResult();
2485 mlir::Value gpuBinaryHandleGlobal = builder.
createGetGlobal(gpuBinHandle);
2486 builder.
createStore(loc, gpuBinaryHandle, gpuBinaryHandleGlobal);
2487 cir::BrOp::create(builder, loc, exitBlock);
2492 mlir::OpBuilder::InsertionGuard guard(builder);
2493 builder.setInsertionPointToStart(exitBlock);
2494 mlir::Value gHandle =
2497 if (std::optional<FuncOp> regGlobal = buildCUDARegisterGlobals())
2500 if (std::optional<FuncOp> dtor = buildHIPModuleDtor()) {
2501 cir::CIRBaseBuilderTy globalBuilder(getContext());
2502 globalBuilder.setInsertionPointToStart(mlirModule.getBody());
2503 FuncOp atexit = buildRuntimeFunction(
2504 globalBuilder,
"atexit", loc,
2505 FuncType::get(PointerType::get(dtor->getFunctionType()), intTy));
2506 mlir::Value dtorFunc = GetGlobalOp::create(
2507 builder, loc, PointerType::get(dtor->getFunctionType()),
2508 mlir::FlatSymbolRefAttr::get(dtor->getSymNameAttr()));
2511 cir::ReturnOp::create(builder, loc);
2515 if (!astCtx->
getLangOpts().GPURelocatableDeviceCode) {
2523 mlir::Value fatbinVoidPtr = builder.
createBitcast(wrapper, voidPtrTy);
2524 cir::CallOp gpuBinaryHandleCall =
2526 mlir::Value gpuBinaryHandle = gpuBinaryHandleCall.getResult();
2528 mlir::Value gpuBinaryHandleGlobal = builder.
createGetGlobal(gpuBinHandle);
2529 builder.
createStore(loc, gpuBinaryHandle, gpuBinaryHandleGlobal);
2532 if (std::optional<FuncOp> regGlobal = buildCUDARegisterGlobals()) {
2533 builder.
createCallOp(loc, *regGlobal, gpuBinaryHandle);
2542 cir::CIRBaseBuilderTy globalBuilder(getContext());
2543 globalBuilder.setInsertionPointToStart(mlirModule.getBody());
2545 buildRuntimeFunction(globalBuilder,
"__cudaRegisterFatBinaryEnd", loc,
2546 FuncType::get({voidPtrPtrTy}, voidTy));
2550 llvm_unreachable(
"GPU RDC NYI");
2555 if (std::optional<FuncOp> dtor = buildCUDAModuleDtor()) {
2558 cir::CIRBaseBuilderTy globalBuilder(getContext());
2559 globalBuilder.setInsertionPointToStart(mlirModule.getBody());
2560 FuncOp atexit = buildRuntimeFunction(
2561 globalBuilder,
"atexit", loc,
2562 FuncType::get(PointerType::get(dtor->getFunctionType()), intTy));
2563 mlir::Value dtorFunc = GetGlobalOp::create(
2564 builder, loc, PointerType::get(dtor->getFunctionType()),
2565 mlir::FlatSymbolRefAttr::get(dtor->getSymNameAttr()));
2568 cir::ReturnOp::create(builder, loc);
2571std::optional<FuncOp> LoweringPreparePass::buildCUDAModuleDtor() {
2572 if (!mlirModule->getAttr(CIRDialect::getCUDABinaryHandleAttrName()))
2577 VoidType voidTy = VoidType::get(&getContext());
2578 PointerType voidPtrPtrTy = PointerType::get(PointerType::get(voidTy));
2580 mlir::Location loc = mlirModule.getLoc();
2582 cir::CIRBaseBuilderTy builder(getContext());
2583 builder.setInsertionPointToStart(mlirModule.getBody());
2586 std::string unregisterFuncName =
2588 FuncOp unregisterFunc = buildRuntimeFunction(
2589 builder, unregisterFuncName, loc, FuncType::get({voidPtrPtrTy}, voidTy));
2598 buildRuntimeFunction(builder, dtorName, loc, FuncType::get({}, voidTy),
2599 GlobalLinkageKind::InternalLinkage);
2601 builder.setInsertionPointToStart(dtor.addEntryBlock());
2607 GlobalOp gpubinGlobal = cast<GlobalOp>(mlirModule.lookupSymbol(gpubinName));
2609 mlir::Value gpubin = builder.
createLoad(loc, gpubinAddress);
2611 ReturnOp::create(builder, loc);
2628std::optional<FuncOp> LoweringPreparePass::buildHIPModuleDtor() {
2629 if (!mlirModule->getAttr(CIRDialect::getCUDABinaryHandleAttrName()))
2634 VoidType voidTy = VoidType::get(&getContext());
2635 PointerType voidPtrPtrTy = PointerType::get(PointerType::get(voidTy));
2637 mlir::Location loc = mlirModule.getLoc();
2639 cir::CIRBaseBuilderTy builder(getContext());
2640 builder.setInsertionPointToStart(mlirModule.getBody());
2643 std::string unregisterFuncName =
2645 FuncOp unregisterFunc = buildRuntimeFunction(
2646 builder, unregisterFuncName, loc, FuncType::get({voidPtrPtrTy}, voidTy));
2650 buildRuntimeFunction(builder, dtorName, loc, FuncType::get({}, voidTy),
2651 GlobalLinkageKind::InternalLinkage);
2654 GlobalOp gpuBinGlobal = cast<GlobalOp>(mlirModule.lookupSymbol(gpubinName));
2656 mlir::Block *entryBlock = dtor.addEntryBlock();
2657 mlir::Block *ifBlock = builder.createBlock(&dtor.getBody());
2658 mlir::Block *exitBlock = builder.createBlock(&dtor.getBody());
2660 mlir::OpBuilder::InsertionGuard guard(builder);
2661 builder.setInsertionPointToEnd(entryBlock);
2662 mlir::Value handle =
2664 auto handlePtrTy = mlir::cast<cir::PointerType>(handle.getType());
2665 mlir::Value nullPtr = builder.
getNullPtr(handlePtrTy, loc);
2666 mlir::Value isNotNull =
2667 builder.
createCompare(loc, cir::CmpOpKind::ne, handle, nullPtr);
2668 cir::BrCondOp::create(builder, loc, isNotNull, ifBlock, exitBlock);
2672 mlir::OpBuilder::InsertionGuard ifGuard(builder);
2673 builder.setInsertionPointToStart(ifBlock);
2676 cir::BrOp::create(builder, loc, exitBlock);
2679 mlir::OpBuilder::InsertionGuard exitGuard(builder);
2680 builder.setInsertionPointToStart(exitBlock);
2681 cir::ReturnOp::create(builder, loc);
2687std::optional<FuncOp> LoweringPreparePass::buildCUDARegisterGlobals() {
2688 if (cudaKernelMap.empty() && cudaDeviceVars.empty())
2691 cir::CIRBaseBuilderTy builder(getContext());
2692 builder.setInsertionPointToStart(mlirModule.getBody());
2694 mlir::Location loc = mlirModule.getLoc();
2697 auto voidTy = VoidType::get(&getContext());
2698 auto voidPtrTy = PointerType::get(voidTy);
2699 auto voidPtrPtrTy = PointerType::get(voidPtrTy);
2703 std::string regGlobalFuncName =
2705 auto regGlobalFuncTy = FuncType::get({voidPtrPtrTy}, voidTy);
2706 FuncOp regGlobalFunc =
2707 buildRuntimeFunction(builder, regGlobalFuncName, loc, regGlobalFuncTy,
2708 GlobalLinkageKind::InternalLinkage);
2709 builder.setInsertionPointToStart(regGlobalFunc.addEntryBlock());
2711 buildCUDARegisterGlobalFunctions(builder, regGlobalFunc);
2712 buildCUDARegisterVars(builder, regGlobalFunc);
2714 ReturnOp::create(builder, loc);
2715 return regGlobalFunc;
2718void LoweringPreparePass::buildCUDARegisterGlobalFunctions(
2719 cir::CIRBaseBuilderTy &builder, FuncOp regGlobalFunc) {
2720 mlir::Location loc = mlirModule.getLoc();
2722 cir::CIRDataLayout dataLayout(mlirModule);
2724 auto voidTy = VoidType::get(&getContext());
2725 auto voidPtrTy = PointerType::get(voidTy);
2726 auto voidPtrPtrTy = PointerType::get(voidPtrTy);
2728 IntType charTy = cir::IntType::get(&getContext(), astCtx->
getCharWidth(),
2732 mlir::Value fatbinHandle = *regGlobalFunc.args_begin();
2734 cir::CIRBaseBuilderTy globalBuilder(getContext());
2735 globalBuilder.setInsertionPointToStart(mlirModule.getBody());
2749 FuncOp cudaRegisterFunction = buildRuntimeFunction(
2751 FuncType::get({voidPtrPtrTy, voidPtrTy, voidPtrTy, voidPtrTy, intTy,
2752 voidPtrTy, voidPtrTy, voidPtrTy, voidPtrTy, voidPtrTy},
2755 auto makeConstantString = [&](llvm::StringRef str) -> GlobalOp {
2756 auto strType = ArrayType::get(&getContext(), charTy, 1 + str.size());
2757 auto tmpString = cir::GlobalOp::create(
2758 globalBuilder, loc, (
".str" + str).str(), strType,
2760 cir::GlobalLinkageKind::PrivateLinkage);
2763 tmpString.setInitialValueAttr(
2764 ConstArrayAttr::get(strType, StringAttr::get(str +
"\0", strType)));
2765 tmpString.setPrivate();
2769 cir::ConstantOp cirNullPtr = builder.
getNullPtr(voidPtrTy, loc);
2771 for (
auto kernelName : cudaKernelMap.keys()) {
2772 FuncOp deviceStub = cudaKernelMap[kernelName];
2773 GlobalOp deviceFuncStr = makeConstantString(kernelName);
2777 mlir::Value hostFunc;
2784 auto funcHandle = cast<GlobalOp>(mlirModule.lookupSymbol(kernelName));
2789 GetGlobalOp::create(
2790 builder, loc, PointerType::get(deviceStub.getFunctionType()),
2791 mlir::FlatSymbolRefAttr::get(deviceStub.getSymNameAttr())),
2795 loc, cudaRegisterFunction,
2796 {fatbinHandle, hostFunc, deviceFunc, deviceFunc,
2797 ConstantOp::create(builder, loc, IntAttr::get(intTy, -1)), cirNullPtr,
2798 cirNullPtr, cirNullPtr, cirNullPtr, cirNullPtr});
2805void LoweringPreparePass::buildCUDARegisterVars(cir::CIRBaseBuilderTy &builder,
2806 FuncOp regGlobalFunc) {
2807 mlir::Location loc = mlirModule.getLoc();
2809 cir::CIRDataLayout dataLayout(mlirModule);
2812 PointerType voidPtrPtrTy = builder.
getPointerTo(voidPtrTy);
2816 IntType charTy = cir::IntType::get(&getContext(), astCtx->
getCharWidth(),
2819 if (cudaDeviceVars.empty())
2822 cir::CIRBaseBuilderTy globalBuilder(getContext());
2823 globalBuilder.setInsertionPointToStart(mlirModule.getBody());
2830 cir::VoidType voidTy = builder.
getVoidTy();
2831 FuncOp cudaRegisterVar = buildRuntimeFunction(
2833 FuncType::get({voidPtrPtrTy, voidPtrTy, voidPtrTy, voidPtrTy, intTy,
2834 sizeTy, intTy, intTy},
2837 auto makeConstantString = [&](llvm::StringRef str) -> GlobalOp {
2838 auto strType = ArrayType::get(&getContext(), charTy, 1 + str.size());
2839 auto tmpString = cir::GlobalOp::create(
2840 globalBuilder, loc, (
".str" + str).str(), strType,
2842 cir::GlobalLinkageKind::PrivateLinkage);
2843 tmpString.setInitialValueAttr(
2844 ConstArrayAttr::get(strType, StringAttr::get(str +
"\0", strType)));
2845 tmpString.setPrivate();
2849 mlir::Value fatbinHandle = *regGlobalFunc.args_begin();
2851 for (
auto &[global, regAttr] : cudaDeviceVars) {
2852 switch (regAttr.getKind()) {
2853 case cir::CUDADeviceVarKind::Variable:
2855 case cir::CUDADeviceVarKind::Surface:
2856 llvm_unreachable(
"Surface registration NYI");
2857 case cir::CUDADeviceVarKind::Texture:
2858 llvm_unreachable(
"Texture registration NYI");
2861 if (regAttr.getIsManaged())
2862 llvm_unreachable(
"Managed variable registration NYI");
2864 GlobalOp deviceNameStr = makeConstantString(regAttr.getDeviceSideName());
2867 mlir::Value hostVar =
2870 auto isExtern = ConstantOp::create(
2871 builder, loc, IntAttr::get(intTy, regAttr.getIsExtern() ? 1 : 0));
2872 llvm::TypeSize size = dataLayout.getTypeAllocSize(global.getSymType());
2873 auto varSize = ConstantOp::create(
2874 builder, loc, IntAttr::get(sizeTy, size.getFixedValue()));
2875 auto isConstant = ConstantOp::create(
2876 builder, loc, IntAttr::get(intTy, regAttr.getIsConstant() ? 1 : 0));
2877 auto normalized = ConstantOp::create(builder, loc, IntAttr::get(intTy, 0));
2879 {fatbinHandle, hostVar, deviceName, deviceName,
2880 isExtern, varSize, isConstant, normalized});
2884void LoweringPreparePass::runOnOperation() {
2885 mlir::Operation *op = getOperation();
2886 if (isa<::mlir::ModuleOp>(op))
2887 mlirModule = cast<::mlir::ModuleOp>(op);
2889 llvm::SmallVector<mlir::Operation *> opsToTransform;
2891 op->walk([&](mlir::Operation *op) {
2892 if (mlir::isa<cir::ArrayCtor, cir::ArrayDtor, cir::CastOp,
2893 cir::ComplexConjOp, cir::ComplexMulOp, cir::ComplexDivOp,
2894 cir::DynamicCastOp, cir::FuncOp, cir::CallOp,
2895 cir::GetGlobalOp, cir::GlobalOp, cir::StoreOp,
2896 cir::CmpThreeWayOp, cir::LocalInitOp>(op))
2897 opsToTransform.push_back(op);
2900 for (mlir::Operation *o : opsToTransform)
2903 buildCXXGlobalInitFunc();
2904 buildCXXGlobalTlsFunc();
2906 buildCUDAModuleCtor();
2908 buildGlobalCtorDtorList();
2912 return std::make_unique<LoweringPreparePass>();
2915std::unique_ptr<Pass>
2917 auto pass = std::make_unique<LoweringPreparePass>();
2918 pass->setASTContext(astCtx);
2919 return std::move(pass);
Defines the clang::ASTContext interface.
static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, int MaxLevel, int Level=0)
static llvm::FunctionCallee getGuardReleaseFn(CodeGenModule &CGM, llvm::PointerType *GuardPtrTy)
static llvm::FunctionCallee getGuardAcquireFn(CodeGenModule &CGM, llvm::PointerType *GuardPtrTy)
static mlir::Value buildRangeReductionComplexDiv(CIRBaseBuilderTy &builder, mlir::Location loc, mlir::Value lhsReal, mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag)
static llvm::StringRef getComplexDivLibCallName(llvm::APFloat::Semantics semantics)
static llvm::SmallVector< mlir::Attribute > prepareCtorDtorAttrList(mlir::MLIRContext *context, llvm::ArrayRef< std::pair< std::string, uint32_t > > list)
static llvm::StringRef getComplexMulLibCallName(llvm::APFloat::Semantics semantics)
static cir::GlobalLinkageKind getThreadLocalWrapperLinkage(GlobalOp op, clang::ASTContext &astCtx)
static mlir::Value buildComplexBinOpLibCall(LoweringPreparePass &pass, CIRBaseBuilderTy &builder, llvm::StringRef(*libFuncNameGetter)(llvm::APFloat::Semantics), mlir::Location loc, cir::ComplexType ty, mlir::Value lhsReal, mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag)
static mlir::Value lowerComplexMul(LoweringPreparePass &pass, CIRBaseBuilderTy &builder, mlir::Location loc, cir::ComplexMulOp op, mlir::Value lhsReal, mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag)
static std::string addUnderscoredPrefix(llvm::StringRef prefix, llvm::StringRef name)
static SmallString< 128 > getTransformedFileName(mlir::ModuleOp mlirModule)
static mlir::Value lowerComplexToComplexCast(mlir::MLIRContext &ctx, cir::CastOp op, cir::CastKind scalarCastKind)
static void lowerArrayDtorCtorIntoLoop(cir::CIRBaseBuilderTy &builder, clang::ASTContext *astCtx, mlir::Operation *op, mlir::Type eltTy, mlir::Value addr, mlir::Value numElements, uint64_t arrayLen, bool isCtor)
Lower a cir.array.ctor or cir.array.dtor into a do-while loop that iterates over every element.
static bool isThreadWrapperReplaceable(cir::TLS_Model tls, clang::ASTContext &astCtx)
static mlir::Value lowerComplexToScalarCast(mlir::MLIRContext &ctx, cir::CastOp op, cir::CastKind elemToBoolKind)
static mlir::Value buildAlgebraicComplexDiv(CIRBaseBuilderTy &builder, mlir::Location loc, mlir::Value lhsReal, mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag)
static llvm::StringRef getCUDAPrefix(clang::ASTContext *astCtx)
static mlir::Type higherPrecisionElementTypeForComplexArithmetic(mlir::MLIRContext &context, clang::ASTContext &cc, CIRBaseBuilderTy &builder, mlir::Type elementType)
static mlir::Value lowerScalarToComplexCast(mlir::MLIRContext &ctx, cir::CastOp op)
static mlir::Value lowerComplexDiv(LoweringPreparePass &pass, CIRBaseBuilderTy &builder, mlir::Location loc, cir::ComplexDivOp op, mlir::Value lhsReal, mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag, mlir::MLIRContext &mlirCx, clang::ASTContext &cc)
Defines the clang::Module class, which describes a module in the source code.
static bool compare(const PathDiagnostic &X, const PathDiagnostic &Y)
Defines the SourceManager interface.
Defines various enumerations that describe declaration and type specifiers.
Defines the TargetCXXABI class, which abstracts details of the C++ ABI that we're targeting.
mlir::Value createDiv(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
mlir::TypedAttr getConstNullPtrAttr(mlir::Type t)
mlir::Value createLogicalOr(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
mlir::Value createSub(mlir::Location loc, mlir::Value lhs, mlir::Value rhs, OverflowBehavior ob=OverflowBehavior::None)
cir::ConditionOp createCondition(mlir::Value condition)
Create a loop condition.
cir::CopyOp createCopy(mlir::Value dst, mlir::Value src, bool isVolatile=false, bool skipTailPadding=false)
Create a copy with inferred length.
cir::VoidType getVoidTy()
cir::ConstantOp getNullValue(mlir::Type ty, mlir::Location loc)
mlir::Value createCast(mlir::Location loc, cir::CastKind kind, mlir::Value src, mlir::Type newTy)
cir::PointerType getVoidFnPtrTy(mlir::TypeRange argTypes={})
Returns void (*)(T...) as a cir::PointerType.
mlir::Value createFDiv(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
mlir::Value createAdd(mlir::Location loc, mlir::Value lhs, mlir::Value rhs, OverflowBehavior ob=OverflowBehavior::None)
cir::PointerType getPointerTo(mlir::Type ty)
mlir::Value createFNeg(mlir::Location loc, mlir::Value operand)
mlir::Value createFAdd(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
mlir::Value createComplexImag(mlir::Location loc, mlir::Value operand)
cir::ConstantOp getNullPtr(mlir::Type ty, mlir::Location loc)
cir::IntType getUIntNTy(int n)
cir::DoWhileOp createDoWhile(mlir::Location loc, llvm::function_ref< void(mlir::OpBuilder &, mlir::Location)> condBuilder, llvm::function_ref< void(mlir::OpBuilder &, mlir::Location)> bodyBuilder)
Create a do-while operation.
cir::GetGlobalOp createGetGlobal(mlir::Location loc, cir::GlobalOp global, bool threadLocal=false)
mlir::Value createAlloca(mlir::Location loc, cir::PointerType addrType, llvm::StringRef name, mlir::IntegerAttr alignment, mlir::Value dynAllocSize)
mlir::Value getSignedInt(mlir::Location loc, int64_t val, unsigned numBits)
mlir::Value createAnd(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
mlir::Value createBitcast(mlir::Value src, mlir::Type newTy)
mlir::Value createFMul(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
cir::FuncType getVoidFnTy(mlir::TypeRange argTypes={})
Returns void (T...) as a cir::FuncType.
cir::CmpOp createCompare(mlir::Location loc, cir::CmpOpKind kind, mlir::Value lhs, mlir::Value rhs)
mlir::IntegerAttr getAlignmentAttr(clang::CharUnits alignment)
mlir::Value createSelect(mlir::Location loc, mlir::Value condition, mlir::Value trueValue, mlir::Value falseValue)
mlir::Value createMul(mlir::Location loc, mlir::Value lhs, mlir::Value rhs, OverflowBehavior ob=OverflowBehavior::None)
cir::LoadOp createLoad(mlir::Location loc, mlir::Value ptr, bool isVolatile=false, uint64_t alignment=0)
mlir::Value createMinus(mlir::Location loc, mlir::Value input, bool nsw=false)
cir::ConstantOp getConstantInt(mlir::Location loc, mlir::Type ty, int64_t value)
mlir::Value createComplexCreate(mlir::Location loc, mlir::Value real, mlir::Value imag)
cir::PointerType getVoidPtrTy(clang::LangAS langAS=clang::LangAS::Default)
mlir::Value createIsNaN(mlir::Location loc, mlir::Value operand)
cir::IntType getSIntNTy(int n)
mlir::Value createAlignedLoad(mlir::Location loc, mlir::Value ptr, uint64_t alignment)
cir::CallOp createCallOp(mlir::Location loc, mlir::SymbolRefAttr callee, mlir::Type returnType, mlir::ValueRange operands, llvm::ArrayRef< mlir::NamedAttribute > attrs={}, llvm::ArrayRef< mlir::NamedAttrList > argAttrs={}, llvm::ArrayRef< mlir::NamedAttribute > resAttrs={})
cir::StoreOp createStore(mlir::Location loc, mlir::Value val, mlir::Value dst, bool isVolatile=false, mlir::IntegerAttr align={}, cir::SyncScopeKindAttr scope={}, cir::MemOrderAttr order={})
cir::YieldOp createYield(mlir::Location loc, mlir::ValueRange value={})
Create a yield operation.
mlir::Value createLogicalAnd(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
mlir::Value createFSub(mlir::Location loc, mlir::Value lhs, mlir::Value rhs)
cir::BoolType getBoolTy()
mlir::Value getUnsignedInt(mlir::Location loc, uint64_t val, unsigned numBits)
mlir::Value createComplexReal(mlir::Location loc, mlir::Value operand)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
SourceManager & getSourceManager()
MangleContext * createMangleContext(const TargetInfo *T=nullptr)
If T is null pointer, assume the target in ASTContext.
const LangOptions & getLangOpts() const
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
const TargetInfo & getTargetInfo() const
QualType getSignedSizeType() const
Return the unique signed counterpart of the integer type corresponding to size_t.
Module * getCurrentNamedModule() const
Get module under construction, nullptr if this is not a C++20 module.
uint64_t getCharWidth() const
Return the size of the character type, in bits.
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
static CharUnits One()
One - Construct a CharUnits quantity of one.
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
llvm::vfs::FileSystem & getVirtualFileSystem() const
bool isModuleImplementation() const
Is this a module implementation.
FileManager & getFileManager() const
Exposes information about the current target.
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
unsigned getMaxAtomicInlineWidth() const
Return the maximum width lock-free atomic operation which can be inlined given the supported features...
const llvm::fltSemantics & getDoubleFormat() const
const llvm::fltSemantics & getHalfFormat() const
const llvm::fltSemantics & getBFloat16Format() const
const llvm::fltSemantics & getLongDoubleFormat() const
const llvm::fltSemantics & getFloatFormat() const
virtual uint64_t getMaxPointerWidth() const
Return the maximum width of pointers on this target.
const llvm::fltSemantics & getFloat128Format() const
const llvm::VersionTuple & getSDKVersion() const
Defines the clang::TargetInfo interface.
static bool isLocalLinkage(GlobalLinkageKind linkage)
static bool isWeakODRLinkage(GlobalLinkageKind linkage)
static bool isLinkOnceLinkage(GlobalLinkageKind linkage)
const internal::VariadicDynCastAllOfMatcher< Decl, VarDecl > varDecl
Matches variable declarations.
bool isHIP(ID Id)
isHIP - Is this a HIP input.
bool isTemplateInstantiation(TemplateSpecializationKind Kind)
Determine whether this template specialization kind refers to an instantiation of an entity (as oppos...
bool CudaFeatureEnabled(llvm::VersionTuple, CudaFeature)
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
@ CUDA_USES_FATBIN_REGISTER_END
std::unique_ptr< Pass > createLoweringPreparePass()
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 __packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 uint32_t
static bool hipModuleCtor()
static bool guardAbortOnException()
static bool opGlobalAnnotations()
static bool opGlobalCtorPriority()
static bool shouldSplitConstantStore()
static bool shouldUseMemSetToInitialize()
static bool opFuncExtraAttrs()
static bool shouldUseBZeroPlusStoresToInitialize()
static bool fastMathFlags()
static bool astVarDeclInterface()