38#include "llvm/ADT/APFloat.h"
39#include "llvm/ADT/APInt.h"
40#include "llvm/ADT/FloatingPointMode.h"
41#include "llvm/ADT/SmallPtrSet.h"
42#include "llvm/ADT/StringExtras.h"
43#include "llvm/Analysis/ValueTracking.h"
44#include "llvm/IR/DataLayout.h"
45#include "llvm/IR/InlineAsm.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/IntrinsicsAArch64.h"
48#include "llvm/IR/IntrinsicsAMDGPU.h"
49#include "llvm/IR/IntrinsicsARM.h"
50#include "llvm/IR/IntrinsicsBPF.h"
51#include "llvm/IR/IntrinsicsDirectX.h"
52#include "llvm/IR/IntrinsicsHexagon.h"
53#include "llvm/IR/IntrinsicsNVPTX.h"
54#include "llvm/IR/IntrinsicsPowerPC.h"
55#include "llvm/IR/IntrinsicsR600.h"
56#include "llvm/IR/IntrinsicsRISCV.h"
57#include "llvm/IR/IntrinsicsS390.h"
58#include "llvm/IR/IntrinsicsWebAssembly.h"
59#include "llvm/IR/IntrinsicsX86.h"
60#include "llvm/IR/MDBuilder.h"
61#include "llvm/IR/MatrixBuilder.h"
62#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
63#include "llvm/Support/AMDGPUAddrSpace.h"
64#include "llvm/Support/ConvertUTF.h"
65#include "llvm/Support/MathExtras.h"
66#include "llvm/Support/ScopedPrinter.h"
67#include "llvm/TargetParser/AArch64TargetParser.h"
68#include "llvm/TargetParser/RISCVISAInfo.h"
69#include "llvm/TargetParser/RISCVTargetParser.h"
70#include "llvm/TargetParser/X86TargetParser.h"
76using namespace CodeGen;
80 Align AlignmentInBytes) {
82 switch (CGF.
getLangOpts().getTrivialAutoVarInit()) {
83 case LangOptions::TrivialAutoVarInitKind::Uninitialized:
86 case LangOptions::TrivialAutoVarInitKind::Zero:
87 Byte = CGF.
Builder.getInt8(0x00);
89 case LangOptions::TrivialAutoVarInitKind::Pattern: {
91 Byte = llvm::dyn_cast<llvm::ConstantInt>(
99 I->addAnnotationMetadata(
"auto-init");
105 Constant *FZeroConst = ConstantFP::getZero(CGF->
FloatTy);
110 FZeroConst = ConstantVector::getSplat(
111 ElementCount::getFixed(VecTy->getNumElements()), FZeroConst);
112 auto *FCompInst = CGF->
Builder.CreateFCmpOLT(Op0, FZeroConst);
113 CMP = CGF->
Builder.CreateIntrinsic(
115 {FCompInst},
nullptr);
117 CMP = CGF->
Builder.CreateFCmpOLT(Op0, FZeroConst);
120 LastInstr = CGF->
Builder.CreateIntrinsic(
121 CGF->
VoidTy, llvm::Intrinsic::dx_discard, {CMP},
nullptr);
126 CGF->
Builder.CreateCondBr(CMP, LT0, End);
128 CGF->
Builder.SetInsertPoint(LT0);
130 CGF->
Builder.CreateIntrinsic(CGF->
VoidTy, llvm::Intrinsic::spv_discard, {},
133 LastInstr = CGF->
Builder.CreateBr(End);
135 CGF->
Builder.SetInsertPoint(End);
137 llvm_unreachable(
"Backend Codegen not supported.");
145 const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(
E->getArg(1));
146 const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(
E->getArg(2));
157 Value *LowBits =
nullptr;
158 Value *HighBits =
nullptr;
162 llvm::Type *RetElementTy = CGF->
Int32Ty;
164 RetElementTy = llvm::VectorType::get(
165 CGF->
Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
166 auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy);
168 CallInst *CI = CGF->
Builder.CreateIntrinsic(
169 RetTy, Intrinsic::dx_splitdouble, {Op0},
nullptr,
"hlsl.splitdouble");
171 LowBits = CGF->
Builder.CreateExtractValue(CI, 0);
172 HighBits = CGF->
Builder.CreateExtractValue(CI, 1);
177 if (!Op0->
getType()->isVectorTy()) {
178 FixedVectorType *DestTy = FixedVectorType::get(CGF->
Int32Ty, 2);
179 Value *Bitcast = CGF->
Builder.CreateBitCast(Op0, DestTy);
181 LowBits = CGF->
Builder.CreateExtractElement(Bitcast, (uint64_t)0);
182 HighBits = CGF->
Builder.CreateExtractElement(Bitcast, 1);
185 if (
const auto *VecTy =
187 NumElements = VecTy->getNumElements();
189 FixedVectorType *Uint32VecTy =
190 FixedVectorType::get(CGF->
Int32Ty, NumElements * 2);
191 Value *Uint32Vec = CGF->
Builder.CreateBitCast(Op0, Uint32VecTy);
192 if (NumElements == 1) {
193 LowBits = CGF->
Builder.CreateExtractElement(Uint32Vec, (uint64_t)0);
194 HighBits = CGF->
Builder.CreateExtractElement(Uint32Vec, 1);
197 for (
int I = 0,
E = NumElements; I !=
E; ++I) {
198 EvenMask.push_back(I * 2);
199 OddMask.push_back(I * 2 + 1);
201 LowBits = CGF->
Builder.CreateShuffleVector(Uint32Vec, EvenMask);
202 HighBits = CGF->
Builder.CreateShuffleVector(Uint32Vec, OddMask);
216 "asdouble operands types mismatch");
220 llvm::Type *ResultType = CGF.
DoubleTy;
223 N = VTy->getNumElements();
224 ResultType = llvm::FixedVectorType::get(CGF.
DoubleTy, N);
228 return CGF.
Builder.CreateIntrinsic(
229 ResultType, Intrinsic::dx_asdouble,
233 OpLowBits = CGF.
Builder.CreateVectorSplat(1, OpLowBits);
234 OpHighBits = CGF.
Builder.CreateVectorSplat(1, OpHighBits);
238 for (
int i = 0; i < N; i++) {
240 Mask.push_back(i + N);
243 Value *BitVec = CGF.
Builder.CreateShuffleVector(OpLowBits, OpHighBits, Mask);
245 return CGF.
Builder.CreateBitCast(BitVec, ResultType);
252 llvm::Metadata *Ops[] = {llvm::MDString::get(Context,
"x18")};
253 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
254 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
257 llvm::Value *X18 = CGF.
Builder.CreateCall(F, Metadata);
264 unsigned BuiltinID) {
273 static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
274 {Builtin::BI__builtin___fprintf_chk,
"__fprintf_chkieee128"},
275 {Builtin::BI__builtin___printf_chk,
"__printf_chkieee128"},
276 {Builtin::BI__builtin___snprintf_chk,
"__snprintf_chkieee128"},
277 {Builtin::BI__builtin___sprintf_chk,
"__sprintf_chkieee128"},
278 {Builtin::BI__builtin___vfprintf_chk,
"__vfprintf_chkieee128"},
279 {Builtin::BI__builtin___vprintf_chk,
"__vprintf_chkieee128"},
280 {Builtin::BI__builtin___vsnprintf_chk,
"__vsnprintf_chkieee128"},
281 {Builtin::BI__builtin___vsprintf_chk,
"__vsprintf_chkieee128"},
282 {Builtin::BI__builtin_fprintf,
"__fprintfieee128"},
283 {Builtin::BI__builtin_printf,
"__printfieee128"},
284 {Builtin::BI__builtin_snprintf,
"__snprintfieee128"},
285 {Builtin::BI__builtin_sprintf,
"__sprintfieee128"},
286 {Builtin::BI__builtin_vfprintf,
"__vfprintfieee128"},
287 {Builtin::BI__builtin_vprintf,
"__vprintfieee128"},
288 {Builtin::BI__builtin_vsnprintf,
"__vsnprintfieee128"},
289 {Builtin::BI__builtin_vsprintf,
"__vsprintfieee128"},
290 {Builtin::BI__builtin_fscanf,
"__fscanfieee128"},
291 {Builtin::BI__builtin_scanf,
"__scanfieee128"},
292 {Builtin::BI__builtin_sscanf,
"__sscanfieee128"},
293 {Builtin::BI__builtin_vfscanf,
"__vfscanfieee128"},
294 {Builtin::BI__builtin_vscanf,
"__vscanfieee128"},
295 {Builtin::BI__builtin_vsscanf,
"__vsscanfieee128"},
296 {Builtin::BI__builtin_nexttowardf128,
"__nexttowardieee128"},
302 static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
303 {Builtin::BI__builtin_frexpl,
"frexp"},
304 {Builtin::BI__builtin_ldexpl,
"ldexp"},
305 {Builtin::BI__builtin_modfl,
"modf"},
311 if (FD->
hasAttr<AsmLabelAttr>())
317 &
getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
318 F128Builtins.contains(BuiltinID))
319 Name = F128Builtins[BuiltinID];
322 &llvm::APFloat::IEEEdouble() &&
323 AIXLongDouble64Builtins.contains(BuiltinID))
324 Name = AIXLongDouble64Builtins[BuiltinID];
329 llvm::FunctionType *Ty =
332 return GetOrCreateLLVMFunction(Name, Ty,
D,
false);
338 QualType T, llvm::IntegerType *IntType) {
341 if (
V->getType()->isPointerTy())
342 return CGF.
Builder.CreatePtrToInt(
V, IntType);
344 assert(
V->getType() == IntType);
352 if (ResultType->isPointerTy())
353 return CGF.
Builder.CreateIntToPtr(
V, ResultType);
355 assert(
V->getType() == ResultType);
366 if (Align % Bytes != 0) {
379 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
389 llvm::IntegerType *IntType = llvm::IntegerType::get(
393 llvm::Type *ValueType = Val->getType();
421 llvm::AtomicRMWInst::BinOp Kind,
430 llvm::AtomicRMWInst::BinOp Kind,
432 Instruction::BinaryOps Op,
433 bool Invert =
false) {
442 llvm::IntegerType *IntType = llvm::IntegerType::get(
446 llvm::Type *ValueType = Val->getType();
450 Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
455 llvm::ConstantInt::getAllOnesValue(IntType));
479 llvm::IntegerType *IntType = llvm::IntegerType::get(
483 llvm::Type *ValueType = Cmp->getType();
488 DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
489 llvm::AtomicOrdering::SequentiallyConsistent);
492 return CGF.
Builder.CreateZExt(CGF.
Builder.CreateExtractValue(Pair, 1),
515 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
527 auto *RTy = Exchange->getType();
531 if (RTy->isPointerTy()) {
537 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
538 AtomicOrdering::Monotonic :
546 DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
547 CmpXchg->setVolatile(
true);
550 if (RTy->isPointerTy()) {
571 AtomicOrdering SuccessOrdering) {
572 assert(
E->getNumArgs() == 4);
578 assert(DestPtr->getType()->isPointerTy());
579 assert(!ExchangeHigh->getType()->isPointerTy());
580 assert(!ExchangeLow->getType()->isPointerTy());
583 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
584 ? AtomicOrdering::Monotonic
589 llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.
getLLVMContext(), 128);
590 Address DestAddr(DestPtr, Int128Ty,
595 ExchangeHigh = CGF.
Builder.CreateZExt(ExchangeHigh, Int128Ty);
596 ExchangeLow = CGF.
Builder.CreateZExt(ExchangeLow, Int128Ty);
598 CGF.
Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
599 llvm::Value *Exchange = CGF.
Builder.CreateOr(ExchangeHigh, ExchangeLow);
605 SuccessOrdering, FailureOrdering);
611 CXI->setVolatile(
true);
623 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
629 AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
630 return CGF.
Builder.CreateAdd(
Result, ConstantInt::get(IntTy, 1));
635 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
641 AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
642 return CGF.
Builder.CreateSub(
Result, ConstantInt::get(IntTy, 1));
653 Load->setVolatile(
true);
663 llvm::StoreInst *Store =
665 Store->setVolatile(
true);
674 unsigned ConstrainedIntrinsicID) {
677 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF,
E);
678 if (CGF.
Builder.getIsFPConstrained()) {
680 return CGF.
Builder.CreateConstrainedFPCall(F, { Src0 });
683 return CGF.
Builder.CreateCall(F, Src0);
691 unsigned ConstrainedIntrinsicID) {
695 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF,
E);
696 if (CGF.
Builder.getIsFPConstrained()) {
698 return CGF.
Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
701 return CGF.
Builder.CreateCall(F, { Src0, Src1 });
708 llvm::Intrinsic::ID ConstrainedIntrinsicID) {
712 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF,
E);
713 if (CGF.
Builder.getIsFPConstrained()) {
715 {Src0->getType(), Src1->getType()});
716 return CGF.
Builder.CreateConstrainedFPCall(F, {Src0, Src1});
721 return CGF.
Builder.CreateCall(F, {Src0, Src1});
728 unsigned ConstrainedIntrinsicID) {
733 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF,
E);
734 if (CGF.
Builder.getIsFPConstrained()) {
736 return CGF.
Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
739 return CGF.
Builder.CreateCall(F, { Src0, Src1, Src2 });
746 unsigned IntrinsicID,
747 unsigned ConstrainedIntrinsicID,
751 if (CGF.
Builder.getIsFPConstrained())
756 if (CGF.
Builder.getIsFPConstrained())
757 return CGF.
Builder.CreateConstrainedFPCall(F, Args);
759 return CGF.
Builder.CreateCall(F, Args);
768 unsigned IntrinsicID,
769 llvm::StringRef Name =
"") {
770 static_assert(N,
"expect non-empty argument");
772 for (
unsigned I = 0; I < N; ++I)
775 return CGF.
Builder.CreateCall(F, Args, Name);
781 unsigned IntrinsicID) {
786 return CGF.
Builder.CreateCall(F, {Src0, Src1});
792 unsigned IntrinsicID,
793 unsigned ConstrainedIntrinsicID) {
797 if (CGF.
Builder.getIsFPConstrained()) {
798 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF,
E);
800 {ResultType, Src0->getType()});
801 return CGF.
Builder.CreateConstrainedFPCall(F, {Src0});
805 return CGF.
Builder.CreateCall(F, Src0);
810 llvm::Intrinsic::ID IntrinsicID) {
818 llvm::Value *
Call = CGF.
Builder.CreateCall(F, Src0);
820 llvm::Value *Exp = CGF.
Builder.CreateExtractValue(
Call, 1);
828 llvm::Intrinsic::ID IntrinsicID) {
833 llvm::Function *F = CGF.
CGM.
getIntrinsic(IntrinsicID, {Val->getType()});
834 llvm::Value *
Call = CGF.
Builder.CreateCall(F, Val);
836 llvm::Value *SinResult = CGF.
Builder.CreateExtractValue(
Call, 0);
837 llvm::Value *CosResult = CGF.
Builder.CreateExtractValue(
Call, 1);
843 llvm::StoreInst *StoreSin =
845 llvm::StoreInst *StoreCos =
852 MDNode *
Domain = MDHelper.createAnonymousAliasScopeDomain();
853 MDNode *AliasScope = MDHelper.createAnonymousAliasScope(
Domain);
854 MDNode *AliasScopeList = MDNode::get(
Call->getContext(), AliasScope);
855 StoreSin->setMetadata(LLVMContext::MD_alias_scope, AliasScopeList);
856 StoreCos->setMetadata(LLVMContext::MD_noalias, AliasScopeList);
863 Call->setDoesNotAccessMemory();
872 llvm::Type *Ty =
V->getType();
873 int Width = Ty->getPrimitiveSizeInBits();
874 llvm::Type *IntTy = llvm::IntegerType::get(
C, Width);
876 if (Ty->isPPC_FP128Ty()) {
886 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
891 IntTy = llvm::IntegerType::get(
C, Width);
894 Value *Zero = llvm::Constant::getNullValue(IntTy);
895 return CGF.
Builder.CreateICmpSLT(
V, Zero);
904 auto IsIndirect = [&](
ABIArgInfo const &info) {
905 return info.isIndirect() || info.isIndirectAliased() || info.isInAlloca();
910 return IsIndirect(ArgInfo.info);
915 const CallExpr *
E, llvm::Constant *calleeValue) {
916 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF,
E);
918 llvm::CallBase *callOrInvoke =
nullptr;
922 nullptr, &callOrInvoke, &FnInfo);
927 bool ConstWithoutErrnoAndExceptions =
931 if (ConstWithoutErrnoAndExceptions && CGF.
CGM.
getLangOpts().MathErrno &&
932 !CGF.
Builder.getIsFPConstrained() &&
Call.isScalar() &&
953 const llvm::Intrinsic::ID IntrinsicID,
954 llvm::Value *
X, llvm::Value *Y,
955 llvm::Value *&Carry) {
957 assert(
X->getType() == Y->getType() &&
958 "Arguments must be the same type. (Did you forget to make sure both "
959 "arguments have the same integer width?)");
962 llvm::Value *Tmp = CGF.
Builder.CreateCall(Callee, {
X, Y});
963 Carry = CGF.
Builder.CreateExtractValue(Tmp, 1);
964 return CGF.
Builder.CreateExtractValue(Tmp, 0);
971 llvm::ConstantRange CR(APInt(32, low), APInt(32, high));
972 Call->addRangeRetAttr(CR);
973 Call->addRetAttr(llvm::Attribute::AttrKind::NoUndef);
978 struct WidthAndSignedness {
984static WidthAndSignedness
996static struct WidthAndSignedness
998 assert(Types.size() > 0 &&
"Empty list of types.");
1002 for (
const auto &
Type : Types) {
1011 for (
const auto &
Type : Types) {
1013 if (Width < MinWidth) {
1022 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
1033 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
1038 return ConstantInt::get(ResType, (
Type & 2) ? 0 : -1,
true);
1042CodeGenFunction::evaluateOrEmitBuiltinObjectSize(
const Expr *
E,
unsigned Type,
1043 llvm::IntegerType *ResType,
1044 llvm::Value *EmittedE,
1048 return emitBuiltinObjectSize(
E,
Type, ResType, EmittedE, IsDynamic);
1049 return ConstantInt::get(ResType, ObjectSize,
true);
1063 if ((!FAMDecl || FD == FAMDecl) &&
1065 Ctx, FD, FD->getType(), StrictFlexArraysLevel,
1093 if (FD->getType()->isCountAttributedType())
1105CodeGenFunction::emitFlexibleArrayMemberSize(
const Expr *
E,
unsigned Type,
1106 llvm::IntegerType *ResType) {
1135 const Expr *Idx =
nullptr;
1137 if (
const auto *UO = dyn_cast<UnaryOperator>(
Base);
1138 UO && UO->getOpcode() == UO_AddrOf) {
1140 if (
const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
1141 Base = ASE->getBase()->IgnoreParenImpCasts();
1144 if (
const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
1145 int64_t Val = IL->getValue().getSExtValue();
1162 if (
const auto *ME = dyn_cast<MemberExpr>(
Base)) {
1164 const ValueDecl *VD = ME->getMemberDecl();
1166 FAMDecl = dyn_cast<FieldDecl>(VD);
1169 }
else if (
const auto *DRE = dyn_cast<DeclRefExpr>(
Base)) {
1171 QualType Ty = DRE->getDecl()->getType();
1224 if (isa<DeclRefExpr>(
Base))
1248 CountedByInst =
Builder.CreateIntCast(CountedByInst, ResType, IsSigned);
1251 Value *IdxInst =
nullptr;
1259 IdxInst =
Builder.CreateIntCast(IdxInst, ResType, IdxSigned);
1264 Builder.CreateSub(CountedByInst, IdxInst,
"", !IsSigned, IsSigned);
1270 llvm::Constant *ElemSize =
1271 llvm::ConstantInt::get(ResType,
Size.getQuantity(), IsSigned);
1273 Builder.CreateMul(CountedByInst, ElemSize,
"", !IsSigned, IsSigned);
1274 Res =
Builder.CreateIntCast(Res, ResType, IsSigned);
1283 return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
1296CodeGenFunction::emitBuiltinObjectSize(
const Expr *
E,
unsigned Type,
1297 llvm::IntegerType *ResType,
1298 llvm::Value *EmittedE,
bool IsDynamic) {
1302 auto *Param = dyn_cast<ParmVarDecl>(
D->getDecl());
1303 auto *PS =
D->getDecl()->
getAttr<PassObjectSizeAttr>();
1304 if (Param !=
nullptr && PS !=
nullptr &&
1306 auto Iter = SizeArguments.find(Param);
1307 assert(
Iter != SizeArguments.end());
1310 auto DIter = LocalDeclMap.find(
D);
1311 assert(DIter != LocalDeclMap.end());
1321 if (
Value *
V = emitFlexibleArrayMemberSize(
E,
Type, ResType))
1332 assert(Ptr->
getType()->isPointerTy() &&
1333 "Non-pointer passed to __builtin_object_size?");
1349 enum ActionKind : uint8_t { TestOnly, Complement, Reset,
Set };
1350 enum InterlockingKind : uint8_t {
1359 InterlockingKind Interlocking;
1362 static BitTest decodeBitTestBuiltin(
unsigned BuiltinID);
1367BitTest BitTest::decodeBitTestBuiltin(
unsigned BuiltinID) {
1368 switch (BuiltinID) {
1370 case Builtin::BI_bittest:
1371 return {TestOnly, Unlocked,
false};
1372 case Builtin::BI_bittestandcomplement:
1373 return {Complement, Unlocked,
false};
1374 case Builtin::BI_bittestandreset:
1375 return {Reset, Unlocked,
false};
1376 case Builtin::BI_bittestandset:
1377 return {
Set, Unlocked,
false};
1378 case Builtin::BI_interlockedbittestandreset:
1379 return {Reset, Sequential,
false};
1380 case Builtin::BI_interlockedbittestandset:
1381 return {
Set, Sequential,
false};
1384 case Builtin::BI_bittest64:
1385 return {TestOnly, Unlocked,
true};
1386 case Builtin::BI_bittestandcomplement64:
1387 return {Complement, Unlocked,
true};
1388 case Builtin::BI_bittestandreset64:
1389 return {Reset, Unlocked,
true};
1390 case Builtin::BI_bittestandset64:
1391 return {
Set, Unlocked,
true};
1392 case Builtin::BI_interlockedbittestandreset64:
1393 return {Reset, Sequential,
true};
1394 case Builtin::BI_interlockedbittestandset64:
1395 return {
Set, Sequential,
true};
1398 case Builtin::BI_interlockedbittestandset_acq:
1399 return {
Set, Acquire,
false};
1400 case Builtin::BI_interlockedbittestandset_rel:
1401 return {
Set, Release,
false};
1402 case Builtin::BI_interlockedbittestandset_nf:
1403 return {
Set, NoFence,
false};
1404 case Builtin::BI_interlockedbittestandreset_acq:
1405 return {Reset, Acquire,
false};
1406 case Builtin::BI_interlockedbittestandreset_rel:
1407 return {Reset, Release,
false};
1408 case Builtin::BI_interlockedbittestandreset_nf:
1409 return {Reset, NoFence,
false};
1411 llvm_unreachable(
"expected only bittest intrinsics");
1416 case BitTest::TestOnly:
return '\0';
1417 case BitTest::Complement:
return 'c';
1418 case BitTest::Reset:
return 'r';
1419 case BitTest::Set:
return 's';
1421 llvm_unreachable(
"invalid action");
1429 char SizeSuffix = BT.Is64Bit ?
'q' :
'l';
1433 raw_svector_ostream AsmOS(
Asm);
1434 if (BT.Interlocking != BitTest::Unlocked)
1439 AsmOS << SizeSuffix <<
" $2, ($1)";
1442 std::string Constraints =
"={@ccc},r,r,~{cc},~{memory}";
1444 if (!MachineClobbers.empty()) {
1446 Constraints += MachineClobbers;
1448 llvm::IntegerType *IntType = llvm::IntegerType::get(
1451 llvm::FunctionType *FTy =
1452 llvm::FunctionType::get(CGF.
Int8Ty, {CGF.UnqualPtrTy, IntType},
false);
1454 llvm::InlineAsm *IA =
1455 llvm::InlineAsm::get(FTy,
Asm, Constraints,
true);
1456 return CGF.
Builder.CreateCall(IA, {BitBase, BitPos});
1459static llvm::AtomicOrdering
1462 case BitTest::Unlocked:
return llvm::AtomicOrdering::NotAtomic;
1463 case BitTest::Sequential:
return llvm::AtomicOrdering::SequentiallyConsistent;
1464 case BitTest::Acquire:
return llvm::AtomicOrdering::Acquire;
1465 case BitTest::Release:
return llvm::AtomicOrdering::Release;
1466 case BitTest::NoFence:
return llvm::AtomicOrdering::Monotonic;
1468 llvm_unreachable(
"invalid interlocking");
1481 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1493 BitPos, llvm::ConstantInt::get(BitPos->
getType(), 3),
"bittest.byteidx");
1495 "bittest.byteaddr"),
1499 llvm::ConstantInt::get(CGF.
Int8Ty, 0x7));
1502 Value *Mask =
nullptr;
1503 if (BT.Action != BitTest::TestOnly) {
1504 Mask = CGF.
Builder.CreateShl(llvm::ConstantInt::get(CGF.
Int8Ty, 1), PosLow,
1511 Value *OldByte =
nullptr;
1512 if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1515 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1516 if (BT.Action == BitTest::Reset) {
1517 Mask = CGF.
Builder.CreateNot(Mask);
1518 RMWOp = llvm::AtomicRMWInst::And;
1524 Value *NewByte =
nullptr;
1525 switch (BT.Action) {
1526 case BitTest::TestOnly:
1529 case BitTest::Complement:
1530 NewByte = CGF.
Builder.CreateXor(OldByte, Mask);
1532 case BitTest::Reset:
1533 NewByte = CGF.
Builder.CreateAnd(OldByte, CGF.
Builder.CreateNot(Mask));
1536 NewByte = CGF.
Builder.CreateOr(OldByte, Mask);
1545 Value *ShiftedByte = CGF.
Builder.CreateLShr(OldByte, PosLow,
"bittest.shr");
1547 ShiftedByte, llvm::ConstantInt::get(CGF.
Int8Ty, 1),
"bittest.res");
1556 raw_svector_ostream AsmOS(
Asm);
1557 llvm::IntegerType *RetType = CGF.
Int32Ty;
1559 switch (BuiltinID) {
1560 case clang::PPC::BI__builtin_ppc_ldarx:
1564 case clang::PPC::BI__builtin_ppc_lwarx:
1568 case clang::PPC::BI__builtin_ppc_lharx:
1572 case clang::PPC::BI__builtin_ppc_lbarx:
1577 llvm_unreachable(
"Expected only PowerPC load reserve intrinsics");
1580 AsmOS <<
"$0, ${1:y}";
1582 std::string Constraints =
"=r,*Z,~{memory}";
1584 if (!MachineClobbers.empty()) {
1586 Constraints += MachineClobbers;
1590 llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType},
false);
1592 llvm::InlineAsm *IA =
1593 llvm::InlineAsm::get(FTy,
Asm, Constraints,
true);
1594 llvm::CallInst *CI = CGF.
Builder.CreateCall(IA, {Addr});
1596 0, Attribute::get(CGF.
getLLVMContext(), Attribute::ElementType, RetType));
1601enum class MSVCSetJmpKind {
1613 llvm::Value *Arg1 =
nullptr;
1614 llvm::Type *Arg1Ty =
nullptr;
1616 bool IsVarArg =
false;
1617 if (SJKind == MSVCSetJmpKind::_setjmp3) {
1620 Arg1 = llvm::ConstantInt::get(CGF.
IntTy, 0);
1623 Name = SJKind == MSVCSetJmpKind::_setjmp ?
"_setjmp" :
"_setjmpex";
1626 Arg1 = CGF.
Builder.CreateCall(
1629 Arg1 = CGF.
Builder.CreateCall(
1631 llvm::ConstantInt::get(CGF.
Int32Ty, 0));
1635 llvm::Type *ArgTypes[2] = {CGF.
Int8PtrTy, Arg1Ty};
1636 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1638 llvm::Attribute::ReturnsTwice);
1640 llvm::FunctionType::get(CGF.
IntTy, ArgTypes, IsVarArg), Name,
1641 ReturnsTwiceAttr,
true);
1643 llvm::Value *Buf = CGF.
Builder.CreateBitOrPointerCast(
1645 llvm::Value *Args[] = {Buf, Arg1};
1647 CB->setAttributes(ReturnsTwiceAttr);
1696static std::optional<CodeGenFunction::MSVCIntrin>
1699 switch (BuiltinID) {
1701 return std::nullopt;
1702 case clang::ARM::BI_BitScanForward:
1703 case clang::ARM::BI_BitScanForward64:
1704 return MSVCIntrin::_BitScanForward;
1705 case clang::ARM::BI_BitScanReverse:
1706 case clang::ARM::BI_BitScanReverse64:
1707 return MSVCIntrin::_BitScanReverse;
1708 case clang::ARM::BI_InterlockedAnd64:
1709 return MSVCIntrin::_InterlockedAnd;
1710 case clang::ARM::BI_InterlockedExchange64:
1711 return MSVCIntrin::_InterlockedExchange;
1712 case clang::ARM::BI_InterlockedExchangeAdd64:
1713 return MSVCIntrin::_InterlockedExchangeAdd;
1714 case clang::ARM::BI_InterlockedExchangeSub64:
1715 return MSVCIntrin::_InterlockedExchangeSub;
1716 case clang::ARM::BI_InterlockedOr64:
1717 return MSVCIntrin::_InterlockedOr;
1718 case clang::ARM::BI_InterlockedXor64:
1719 return MSVCIntrin::_InterlockedXor;
1720 case clang::ARM::BI_InterlockedDecrement64:
1721 return MSVCIntrin::_InterlockedDecrement;
1722 case clang::ARM::BI_InterlockedIncrement64:
1723 return MSVCIntrin::_InterlockedIncrement;
1724 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1725 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1726 case clang::ARM::BI_InterlockedExchangeAdd_acq:
1727 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1728 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1729 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1730 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1731 case clang::ARM::BI_InterlockedExchangeAdd_rel:
1732 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1733 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1734 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1735 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1736 case clang::ARM::BI_InterlockedExchangeAdd_nf:
1737 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1738 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1739 case clang::ARM::BI_InterlockedExchange8_acq:
1740 case clang::ARM::BI_InterlockedExchange16_acq:
1741 case clang::ARM::BI_InterlockedExchange_acq:
1742 case clang::ARM::BI_InterlockedExchange64_acq:
1743 case clang::ARM::BI_InterlockedExchangePointer_acq:
1744 return MSVCIntrin::_InterlockedExchange_acq;
1745 case clang::ARM::BI_InterlockedExchange8_rel:
1746 case clang::ARM::BI_InterlockedExchange16_rel:
1747 case clang::ARM::BI_InterlockedExchange_rel:
1748 case clang::ARM::BI_InterlockedExchange64_rel:
1749 case clang::ARM::BI_InterlockedExchangePointer_rel:
1750 return MSVCIntrin::_InterlockedExchange_rel;
1751 case clang::ARM::BI_InterlockedExchange8_nf:
1752 case clang::ARM::BI_InterlockedExchange16_nf:
1753 case clang::ARM::BI_InterlockedExchange_nf:
1754 case clang::ARM::BI_InterlockedExchange64_nf:
1755 case clang::ARM::BI_InterlockedExchangePointer_nf:
1756 return MSVCIntrin::_InterlockedExchange_nf;
1757 case clang::ARM::BI_InterlockedCompareExchange8_acq:
1758 case clang::ARM::BI_InterlockedCompareExchange16_acq:
1759 case clang::ARM::BI_InterlockedCompareExchange_acq:
1760 case clang::ARM::BI_InterlockedCompareExchange64_acq:
1761 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
1762 return MSVCIntrin::_InterlockedCompareExchange_acq;
1763 case clang::ARM::BI_InterlockedCompareExchange8_rel:
1764 case clang::ARM::BI_InterlockedCompareExchange16_rel:
1765 case clang::ARM::BI_InterlockedCompareExchange_rel:
1766 case clang::ARM::BI_InterlockedCompareExchange64_rel:
1767 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
1768 return MSVCIntrin::_InterlockedCompareExchange_rel;
1769 case clang::ARM::BI_InterlockedCompareExchange8_nf:
1770 case clang::ARM::BI_InterlockedCompareExchange16_nf:
1771 case clang::ARM::BI_InterlockedCompareExchange_nf:
1772 case clang::ARM::BI_InterlockedCompareExchange64_nf:
1773 return MSVCIntrin::_InterlockedCompareExchange_nf;
1774 case clang::ARM::BI_InterlockedOr8_acq:
1775 case clang::ARM::BI_InterlockedOr16_acq:
1776 case clang::ARM::BI_InterlockedOr_acq:
1777 case clang::ARM::BI_InterlockedOr64_acq:
1778 return MSVCIntrin::_InterlockedOr_acq;
1779 case clang::ARM::BI_InterlockedOr8_rel:
1780 case clang::ARM::BI_InterlockedOr16_rel:
1781 case clang::ARM::BI_InterlockedOr_rel:
1782 case clang::ARM::BI_InterlockedOr64_rel:
1783 return MSVCIntrin::_InterlockedOr_rel;
1784 case clang::ARM::BI_InterlockedOr8_nf:
1785 case clang::ARM::BI_InterlockedOr16_nf:
1786 case clang::ARM::BI_InterlockedOr_nf:
1787 case clang::ARM::BI_InterlockedOr64_nf:
1788 return MSVCIntrin::_InterlockedOr_nf;
1789 case clang::ARM::BI_InterlockedXor8_acq:
1790 case clang::ARM::BI_InterlockedXor16_acq:
1791 case clang::ARM::BI_InterlockedXor_acq:
1792 case clang::ARM::BI_InterlockedXor64_acq:
1793 return MSVCIntrin::_InterlockedXor_acq;
1794 case clang::ARM::BI_InterlockedXor8_rel:
1795 case clang::ARM::BI_InterlockedXor16_rel:
1796 case clang::ARM::BI_InterlockedXor_rel:
1797 case clang::ARM::BI_InterlockedXor64_rel:
1798 return MSVCIntrin::_InterlockedXor_rel;
1799 case clang::ARM::BI_InterlockedXor8_nf:
1800 case clang::ARM::BI_InterlockedXor16_nf:
1801 case clang::ARM::BI_InterlockedXor_nf:
1802 case clang::ARM::BI_InterlockedXor64_nf:
1803 return MSVCIntrin::_InterlockedXor_nf;
1804 case clang::ARM::BI_InterlockedAnd8_acq:
1805 case clang::ARM::BI_InterlockedAnd16_acq:
1806 case clang::ARM::BI_InterlockedAnd_acq:
1807 case clang::ARM::BI_InterlockedAnd64_acq:
1808 return MSVCIntrin::_InterlockedAnd_acq;
1809 case clang::ARM::BI_InterlockedAnd8_rel:
1810 case clang::ARM::BI_InterlockedAnd16_rel:
1811 case clang::ARM::BI_InterlockedAnd_rel:
1812 case clang::ARM::BI_InterlockedAnd64_rel:
1813 return MSVCIntrin::_InterlockedAnd_rel;
1814 case clang::ARM::BI_InterlockedAnd8_nf:
1815 case clang::ARM::BI_InterlockedAnd16_nf:
1816 case clang::ARM::BI_InterlockedAnd_nf:
1817 case clang::ARM::BI_InterlockedAnd64_nf:
1818 return MSVCIntrin::_InterlockedAnd_nf;
1819 case clang::ARM::BI_InterlockedIncrement16_acq:
1820 case clang::ARM::BI_InterlockedIncrement_acq:
1821 case clang::ARM::BI_InterlockedIncrement64_acq:
1822 return MSVCIntrin::_InterlockedIncrement_acq;
1823 case clang::ARM::BI_InterlockedIncrement16_rel:
1824 case clang::ARM::BI_InterlockedIncrement_rel:
1825 case clang::ARM::BI_InterlockedIncrement64_rel:
1826 return MSVCIntrin::_InterlockedIncrement_rel;
1827 case clang::ARM::BI_InterlockedIncrement16_nf:
1828 case clang::ARM::BI_InterlockedIncrement_nf:
1829 case clang::ARM::BI_InterlockedIncrement64_nf:
1830 return MSVCIntrin::_InterlockedIncrement_nf;
1831 case clang::ARM::BI_InterlockedDecrement16_acq:
1832 case clang::ARM::BI_InterlockedDecrement_acq:
1833 case clang::ARM::BI_InterlockedDecrement64_acq:
1834 return MSVCIntrin::_InterlockedDecrement_acq;
1835 case clang::ARM::BI_InterlockedDecrement16_rel:
1836 case clang::ARM::BI_InterlockedDecrement_rel:
1837 case clang::ARM::BI_InterlockedDecrement64_rel:
1838 return MSVCIntrin::_InterlockedDecrement_rel;
1839 case clang::ARM::BI_InterlockedDecrement16_nf:
1840 case clang::ARM::BI_InterlockedDecrement_nf:
1841 case clang::ARM::BI_InterlockedDecrement64_nf:
1842 return MSVCIntrin::_InterlockedDecrement_nf;
1844 llvm_unreachable(
"must return from switch");
1847static std::optional<CodeGenFunction::MSVCIntrin>
1850 switch (BuiltinID) {
1852 return std::nullopt;
1853 case clang::AArch64::BI_BitScanForward:
1854 case clang::AArch64::BI_BitScanForward64:
1855 return MSVCIntrin::_BitScanForward;
1856 case clang::AArch64::BI_BitScanReverse:
1857 case clang::AArch64::BI_BitScanReverse64:
1858 return MSVCIntrin::_BitScanReverse;
1859 case clang::AArch64::BI_InterlockedAnd64:
1860 return MSVCIntrin::_InterlockedAnd;
1861 case clang::AArch64::BI_InterlockedExchange64:
1862 return MSVCIntrin::_InterlockedExchange;
1863 case clang::AArch64::BI_InterlockedExchangeAdd64:
1864 return MSVCIntrin::_InterlockedExchangeAdd;
1865 case clang::AArch64::BI_InterlockedExchangeSub64:
1866 return MSVCIntrin::_InterlockedExchangeSub;
1867 case clang::AArch64::BI_InterlockedOr64:
1868 return MSVCIntrin::_InterlockedOr;
1869 case clang::AArch64::BI_InterlockedXor64:
1870 return MSVCIntrin::_InterlockedXor;
1871 case clang::AArch64::BI_InterlockedDecrement64:
1872 return MSVCIntrin::_InterlockedDecrement;
1873 case clang::AArch64::BI_InterlockedIncrement64:
1874 return MSVCIntrin::_InterlockedIncrement;
1875 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1876 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1877 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1878 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1879 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1880 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1881 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1882 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1883 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1884 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1885 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1886 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1887 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1888 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1889 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1890 case clang::AArch64::BI_InterlockedExchange8_acq:
1891 case clang::AArch64::BI_InterlockedExchange16_acq:
1892 case clang::AArch64::BI_InterlockedExchange_acq:
1893 case clang::AArch64::BI_InterlockedExchange64_acq:
1894 case clang::AArch64::BI_InterlockedExchangePointer_acq:
1895 return MSVCIntrin::_InterlockedExchange_acq;
1896 case clang::AArch64::BI_InterlockedExchange8_rel:
1897 case clang::AArch64::BI_InterlockedExchange16_rel:
1898 case clang::AArch64::BI_InterlockedExchange_rel:
1899 case clang::AArch64::BI_InterlockedExchange64_rel:
1900 case clang::AArch64::BI_InterlockedExchangePointer_rel:
1901 return MSVCIntrin::_InterlockedExchange_rel;
1902 case clang::AArch64::BI_InterlockedExchange8_nf:
1903 case clang::AArch64::BI_InterlockedExchange16_nf:
1904 case clang::AArch64::BI_InterlockedExchange_nf:
1905 case clang::AArch64::BI_InterlockedExchange64_nf:
1906 case clang::AArch64::BI_InterlockedExchangePointer_nf:
1907 return MSVCIntrin::_InterlockedExchange_nf;
1908 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1909 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1910 case clang::AArch64::BI_InterlockedCompareExchange_acq:
1911 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1912 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
1913 return MSVCIntrin::_InterlockedCompareExchange_acq;
1914 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1915 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1916 case clang::AArch64::BI_InterlockedCompareExchange_rel:
1917 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1918 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
1919 return MSVCIntrin::_InterlockedCompareExchange_rel;
1920 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1921 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1922 case clang::AArch64::BI_InterlockedCompareExchange_nf:
1923 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1924 return MSVCIntrin::_InterlockedCompareExchange_nf;
1925 case clang::AArch64::BI_InterlockedCompareExchange128:
1926 return MSVCIntrin::_InterlockedCompareExchange128;
1927 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1928 return MSVCIntrin::_InterlockedCompareExchange128_acq;
1929 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1930 return MSVCIntrin::_InterlockedCompareExchange128_nf;
1931 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1932 return MSVCIntrin::_InterlockedCompareExchange128_rel;
1933 case clang::AArch64::BI_InterlockedOr8_acq:
1934 case clang::AArch64::BI_InterlockedOr16_acq:
1935 case clang::AArch64::BI_InterlockedOr_acq:
1936 case clang::AArch64::BI_InterlockedOr64_acq:
1937 return MSVCIntrin::_InterlockedOr_acq;
1938 case clang::AArch64::BI_InterlockedOr8_rel:
1939 case clang::AArch64::BI_InterlockedOr16_rel:
1940 case clang::AArch64::BI_InterlockedOr_rel:
1941 case clang::AArch64::BI_InterlockedOr64_rel:
1942 return MSVCIntrin::_InterlockedOr_rel;
1943 case clang::AArch64::BI_InterlockedOr8_nf:
1944 case clang::AArch64::BI_InterlockedOr16_nf:
1945 case clang::AArch64::BI_InterlockedOr_nf:
1946 case clang::AArch64::BI_InterlockedOr64_nf:
1947 return MSVCIntrin::_InterlockedOr_nf;
1948 case clang::AArch64::BI_InterlockedXor8_acq:
1949 case clang::AArch64::BI_InterlockedXor16_acq:
1950 case clang::AArch64::BI_InterlockedXor_acq:
1951 case clang::AArch64::BI_InterlockedXor64_acq:
1952 return MSVCIntrin::_InterlockedXor_acq;
1953 case clang::AArch64::BI_InterlockedXor8_rel:
1954 case clang::AArch64::BI_InterlockedXor16_rel:
1955 case clang::AArch64::BI_InterlockedXor_rel:
1956 case clang::AArch64::BI_InterlockedXor64_rel:
1957 return MSVCIntrin::_InterlockedXor_rel;
1958 case clang::AArch64::BI_InterlockedXor8_nf:
1959 case clang::AArch64::BI_InterlockedXor16_nf:
1960 case clang::AArch64::BI_InterlockedXor_nf:
1961 case clang::AArch64::BI_InterlockedXor64_nf:
1962 return MSVCIntrin::_InterlockedXor_nf;
1963 case clang::AArch64::BI_InterlockedAnd8_acq:
1964 case clang::AArch64::BI_InterlockedAnd16_acq:
1965 case clang::AArch64::BI_InterlockedAnd_acq:
1966 case clang::AArch64::BI_InterlockedAnd64_acq:
1967 return MSVCIntrin::_InterlockedAnd_acq;
1968 case clang::AArch64::BI_InterlockedAnd8_rel:
1969 case clang::AArch64::BI_InterlockedAnd16_rel:
1970 case clang::AArch64::BI_InterlockedAnd_rel:
1971 case clang::AArch64::BI_InterlockedAnd64_rel:
1972 return MSVCIntrin::_InterlockedAnd_rel;
1973 case clang::AArch64::BI_InterlockedAnd8_nf:
1974 case clang::AArch64::BI_InterlockedAnd16_nf:
1975 case clang::AArch64::BI_InterlockedAnd_nf:
1976 case clang::AArch64::BI_InterlockedAnd64_nf:
1977 return MSVCIntrin::_InterlockedAnd_nf;
1978 case clang::AArch64::BI_InterlockedIncrement16_acq:
1979 case clang::AArch64::BI_InterlockedIncrement_acq:
1980 case clang::AArch64::BI_InterlockedIncrement64_acq:
1981 return MSVCIntrin::_InterlockedIncrement_acq;
1982 case clang::AArch64::BI_InterlockedIncrement16_rel:
1983 case clang::AArch64::BI_InterlockedIncrement_rel:
1984 case clang::AArch64::BI_InterlockedIncrement64_rel:
1985 return MSVCIntrin::_InterlockedIncrement_rel;
1986 case clang::AArch64::BI_InterlockedIncrement16_nf:
1987 case clang::AArch64::BI_InterlockedIncrement_nf:
1988 case clang::AArch64::BI_InterlockedIncrement64_nf:
1989 return MSVCIntrin::_InterlockedIncrement_nf;
1990 case clang::AArch64::BI_InterlockedDecrement16_acq:
1991 case clang::AArch64::BI_InterlockedDecrement_acq:
1992 case clang::AArch64::BI_InterlockedDecrement64_acq:
1993 return MSVCIntrin::_InterlockedDecrement_acq;
1994 case clang::AArch64::BI_InterlockedDecrement16_rel:
1995 case clang::AArch64::BI_InterlockedDecrement_rel:
1996 case clang::AArch64::BI_InterlockedDecrement64_rel:
1997 return MSVCIntrin::_InterlockedDecrement_rel;
1998 case clang::AArch64::BI_InterlockedDecrement16_nf:
1999 case clang::AArch64::BI_InterlockedDecrement_nf:
2000 case clang::AArch64::BI_InterlockedDecrement64_nf:
2001 return MSVCIntrin::_InterlockedDecrement_nf;
2003 llvm_unreachable(
"must return from switch");
2006static std::optional<CodeGenFunction::MSVCIntrin>
2009 switch (BuiltinID) {
2011 return std::nullopt;
2012 case clang::X86::BI_BitScanForward:
2013 case clang::X86::BI_BitScanForward64:
2014 return MSVCIntrin::_BitScanForward;
2015 case clang::X86::BI_BitScanReverse:
2016 case clang::X86::BI_BitScanReverse64:
2017 return MSVCIntrin::_BitScanReverse;
2018 case clang::X86::BI_InterlockedAnd64:
2019 return MSVCIntrin::_InterlockedAnd;
2020 case clang::X86::BI_InterlockedCompareExchange128:
2021 return MSVCIntrin::_InterlockedCompareExchange128;
2022 case clang::X86::BI_InterlockedExchange64:
2023 return MSVCIntrin::_InterlockedExchange;
2024 case clang::X86::BI_InterlockedExchangeAdd64:
2025 return MSVCIntrin::_InterlockedExchangeAdd;
2026 case clang::X86::BI_InterlockedExchangeSub64:
2027 return MSVCIntrin::_InterlockedExchangeSub;
2028 case clang::X86::BI_InterlockedOr64:
2029 return MSVCIntrin::_InterlockedOr;
2030 case clang::X86::BI_InterlockedXor64:
2031 return MSVCIntrin::_InterlockedXor;
2032 case clang::X86::BI_InterlockedDecrement64:
2033 return MSVCIntrin::_InterlockedDecrement;
2034 case clang::X86::BI_InterlockedIncrement64:
2035 return MSVCIntrin::_InterlockedIncrement;
2037 llvm_unreachable(
"must return from switch");
2043 switch (BuiltinID) {
2044 case MSVCIntrin::_BitScanForward:
2045 case MSVCIntrin::_BitScanReverse: {
2049 llvm::Type *ArgType = ArgValue->
getType();
2050 llvm::Type *IndexType = IndexAddress.getElementType();
2053 Value *ArgZero = llvm::Constant::getNullValue(ArgType);
2054 Value *ResZero = llvm::Constant::getNullValue(ResultType);
2055 Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
2060 PHINode *
Result =
Builder.CreatePHI(ResultType, 2,
"bitscan_result");
2063 Value *IsZero =
Builder.CreateICmpEQ(ArgValue, ArgZero);
2065 Builder.CreateCondBr(IsZero, End, NotZero);
2068 Builder.SetInsertPoint(NotZero);
2070 if (BuiltinID == MSVCIntrin::_BitScanForward) {
2073 ZeroCount =
Builder.CreateIntCast(ZeroCount, IndexType,
false);
2076 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
2077 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
2081 ZeroCount =
Builder.CreateIntCast(ZeroCount, IndexType,
false);
2082 Value *Index =
Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
2086 Result->addIncoming(ResOne, NotZero);
2091 case MSVCIntrin::_InterlockedAnd:
2093 case MSVCIntrin::_InterlockedExchange:
2095 case MSVCIntrin::_InterlockedExchangeAdd:
2097 case MSVCIntrin::_InterlockedExchangeSub:
2099 case MSVCIntrin::_InterlockedOr:
2101 case MSVCIntrin::_InterlockedXor:
2103 case MSVCIntrin::_InterlockedExchangeAdd_acq:
2105 AtomicOrdering::Acquire);
2106 case MSVCIntrin::_InterlockedExchangeAdd_rel:
2108 AtomicOrdering::Release);
2109 case MSVCIntrin::_InterlockedExchangeAdd_nf:
2111 AtomicOrdering::Monotonic);
2112 case MSVCIntrin::_InterlockedExchange_acq:
2114 AtomicOrdering::Acquire);
2115 case MSVCIntrin::_InterlockedExchange_rel:
2117 AtomicOrdering::Release);
2118 case MSVCIntrin::_InterlockedExchange_nf:
2120 AtomicOrdering::Monotonic);
2121 case MSVCIntrin::_InterlockedCompareExchange:
2123 case MSVCIntrin::_InterlockedCompareExchange_acq:
2125 case MSVCIntrin::_InterlockedCompareExchange_rel:
2127 case MSVCIntrin::_InterlockedCompareExchange_nf:
2129 case MSVCIntrin::_InterlockedCompareExchange128:
2131 *
this,
E, AtomicOrdering::SequentiallyConsistent);
2132 case MSVCIntrin::_InterlockedCompareExchange128_acq:
2134 case MSVCIntrin::_InterlockedCompareExchange128_rel:
2136 case MSVCIntrin::_InterlockedCompareExchange128_nf:
2138 case MSVCIntrin::_InterlockedOr_acq:
2140 AtomicOrdering::Acquire);
2141 case MSVCIntrin::_InterlockedOr_rel:
2143 AtomicOrdering::Release);
2144 case MSVCIntrin::_InterlockedOr_nf:
2146 AtomicOrdering::Monotonic);
2147 case MSVCIntrin::_InterlockedXor_acq:
2149 AtomicOrdering::Acquire);
2150 case MSVCIntrin::_InterlockedXor_rel:
2152 AtomicOrdering::Release);
2153 case MSVCIntrin::_InterlockedXor_nf:
2155 AtomicOrdering::Monotonic);
2156 case MSVCIntrin::_InterlockedAnd_acq:
2158 AtomicOrdering::Acquire);
2159 case MSVCIntrin::_InterlockedAnd_rel:
2161 AtomicOrdering::Release);
2162 case MSVCIntrin::_InterlockedAnd_nf:
2164 AtomicOrdering::Monotonic);
2165 case MSVCIntrin::_InterlockedIncrement_acq:
2167 case MSVCIntrin::_InterlockedIncrement_rel:
2169 case MSVCIntrin::_InterlockedIncrement_nf:
2171 case MSVCIntrin::_InterlockedDecrement_acq:
2173 case MSVCIntrin::_InterlockedDecrement_rel:
2175 case MSVCIntrin::_InterlockedDecrement_nf:
2178 case MSVCIntrin::_InterlockedDecrement:
2180 case MSVCIntrin::_InterlockedIncrement:
2183 case MSVCIntrin::__fastfail: {
2188 StringRef
Asm, Constraints;
2193 case llvm::Triple::x86:
2194 case llvm::Triple::x86_64:
2196 Constraints =
"{cx}";
2198 case llvm::Triple::thumb:
2200 Constraints =
"{r0}";
2202 case llvm::Triple::aarch64:
2203 Asm =
"brk #0xF003";
2204 Constraints =
"{w0}";
2206 llvm::FunctionType *FTy = llvm::FunctionType::get(
VoidTy, {
Int32Ty},
false);
2207 llvm::InlineAsm *IA =
2208 llvm::InlineAsm::get(FTy,
Asm, Constraints,
true);
2209 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
2211 llvm::Attribute::NoReturn);
2213 CI->setAttributes(NoReturnAttr);
2217 llvm_unreachable(
"Incorrect MSVC intrinsic!");
2223 CallObjCArcUse(llvm::Value *
object) : object(object) {}
2224 llvm::Value *object;
2233 BuiltinCheckKind Kind) {
2235 "Unsupported builtin check kind");
2241 SanitizerScope SanScope(
this);
2243 ArgValue, llvm::Constant::getNullValue(ArgValue->
getType()));
2244 EmitCheck(std::make_pair(Cond, SanitizerKind::SO_Builtin),
2245 SanitizerHandler::InvalidBuiltin,
2247 llvm::ConstantInt::get(
Builder.getInt8Ty(), Kind)},
2257 SanitizerScope SanScope(
this);
2259 std::make_pair(ArgValue, SanitizerKind::SO_Builtin),
2260 SanitizerHandler::InvalidBuiltin,
2268 return CGF.
Builder.CreateBinaryIntrinsic(
2269 Intrinsic::abs, ArgValue,
2270 ConstantInt::get(CGF.
Builder.getInt1Ty(), HasNSW));
2274 bool SanitizeOverflow) {
2278 if (
const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
2279 if (!VCI->isMinSignedValue())
2280 return EmitAbs(CGF, ArgValue,
true);
2283 CodeGenFunction::SanitizerScope SanScope(&CGF);
2285 Constant *Zero = Constant::getNullValue(ArgValue->
getType());
2286 Value *ResultAndOverflow = CGF.
Builder.CreateBinaryIntrinsic(
2287 Intrinsic::ssub_with_overflow, Zero, ArgValue);
2290 CGF.
Builder.CreateExtractValue(ResultAndOverflow, 1));
2293 if (SanitizeOverflow) {
2294 CGF.
EmitCheck({{NotOverflow, SanitizerKind::SO_SignedIntegerOverflow}},
2295 SanitizerHandler::NegateOverflow,
2300 CGF.
EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
2302 Value *CmpResult = CGF.
Builder.CreateICmpSLT(ArgValue, Zero,
"abscond");
2303 return CGF.
Builder.CreateSelect(CmpResult,
Result, ArgValue,
"abs");
2308 QualType UnsignedTy =
C.getIntTypeForBitwidth(Size * 8,
false);
2309 return C.getCanonicalType(UnsignedTy);
2319 raw_svector_ostream OS(Name);
2320 OS <<
"__os_log_helper";
2324 for (
const auto &Item : Layout.
Items)
2325 OS <<
"_" <<
int(Item.getSizeByte()) <<
"_"
2326 <<
int(Item.getDescriptorByte());
2329 if (llvm::Function *F =
CGM.
getModule().getFunction(Name))
2339 for (
unsigned int I = 0,
E = Layout.
Items.size(); I <
E; ++I) {
2340 char Size = Layout.
Items[I].getSizeByte();
2347 &Ctx.
Idents.
get(std::string(
"arg") + llvm::to_string(I)), ArgTy,
2349 ArgTys.emplace_back(ArgTy);
2360 llvm::Function *
Fn = llvm::Function::Create(
2361 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &
CGM.
getModule());
2362 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2365 Fn->setDoesNotThrow();
2369 Fn->addFnAttr(llvm::Attribute::NoInline);
2387 for (
const auto &Item : Layout.
Items) {
2389 Builder.getInt8(Item.getDescriptorByte()),
2392 Builder.getInt8(Item.getSizeByte()),
2396 if (!
Size.getQuantity())
2413 assert(
E.getNumArgs() >= 2 &&
2414 "__builtin_os_log_format takes at least 2 arguments");
2425 for (
const auto &Item : Layout.
Items) {
2426 int Size = Item.getSizeByte();
2430 llvm::Value *ArgVal;
2434 for (
unsigned I = 0,
E = Item.getMaskType().size(); I <
E; ++I)
2435 Val |= ((
uint64_t)Item.getMaskType()[I]) << I * 8;
2436 ArgVal = llvm::Constant::getIntegerValue(
Int64Ty, llvm::APInt(64, Val));
2437 }
else if (
const Expr *TheExpr = Item.getExpr()) {
2443 auto LifetimeExtendObject = [&](
const Expr *
E) {
2451 if (isa<CallExpr>(
E) || isa<ObjCMessageExpr>(
E))
2456 if (TheExpr->getType()->isObjCRetainableType() &&
2457 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
2459 "Only scalar can be a ObjC retainable type");
2460 if (!isa<Constant>(ArgVal)) {
2474 pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
2478 ArgVal =
Builder.getInt32(Item.getConstValue().getQuantity());
2481 unsigned ArgValSize =
2485 ArgVal =
Builder.CreateBitOrPointerCast(ArgVal,
IntTy);
2501 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2502 WidthAndSignedness ResultInfo) {
2503 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2504 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
2505 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
2510 const clang::Expr *Op2, WidthAndSignedness Op2Info,
2512 WidthAndSignedness ResultInfo) {
2514 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2515 "Cannot specialize this multiply");
2520 llvm::Value *HasOverflow;
2522 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2527 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2528 llvm::Value *IntMaxValue = llvm::ConstantInt::get(
Result->getType(), IntMax);
2530 llvm::Value *IntMaxOverflow = CGF.
Builder.CreateICmpUGT(
Result, IntMaxValue);
2531 HasOverflow = CGF.
Builder.CreateOr(HasOverflow, IntMaxOverflow);
2543 WidthAndSignedness Op1Info,
2544 WidthAndSignedness Op2Info,
2545 WidthAndSignedness ResultInfo) {
2546 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2547 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2548 Op1Info.Signed != Op2Info.Signed;
2555 WidthAndSignedness Op1Info,
const clang::Expr *Op2,
2556 WidthAndSignedness Op2Info,
2558 WidthAndSignedness ResultInfo) {
2560 Op2Info, ResultInfo) &&
2561 "Not a mixed-sign multipliction we can specialize");
2564 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2565 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2568 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2569 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2572 if (SignedOpWidth < UnsignedOpWidth)
2574 if (UnsignedOpWidth < SignedOpWidth)
2577 llvm::Type *OpTy =
Signed->getType();
2578 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2581 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2584 llvm::Value *IsNegative = CGF.
Builder.CreateICmpSLT(
Signed, Zero);
2585 llvm::Value *AbsOfNegative = CGF.
Builder.CreateSub(Zero,
Signed);
2586 llvm::Value *AbsSigned =
2587 CGF.
Builder.CreateSelect(IsNegative, AbsOfNegative,
Signed);
2590 llvm::Value *UnsignedOverflow;
2591 llvm::Value *UnsignedResult =
2595 llvm::Value *Overflow, *
Result;
2596 if (ResultInfo.Signed) {
2600 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2601 llvm::Value *MaxResult =
2602 CGF.
Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2603 CGF.
Builder.CreateZExt(IsNegative, OpTy));
2604 llvm::Value *SignedOverflow =
2605 CGF.
Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2606 Overflow = CGF.
Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2609 llvm::Value *NegativeResult = CGF.
Builder.CreateNeg(UnsignedResult);
2610 llvm::Value *SignedResult =
2611 CGF.
Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2615 llvm::Value *Underflow = CGF.
Builder.CreateAnd(
2616 IsNegative, CGF.
Builder.CreateIsNotNull(UnsignedResult));
2617 Overflow = CGF.
Builder.CreateOr(UnsignedOverflow, Underflow);
2618 if (ResultInfo.Width < OpWidth) {
2620 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2621 llvm::Value *TruncOverflow = CGF.
Builder.CreateICmpUGT(
2622 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2623 Overflow = CGF.
Builder.CreateOr(Overflow, TruncOverflow);
2628 IsNegative, CGF.
Builder.CreateNeg(UnsignedResult), UnsignedResult);
2632 assert(Overflow &&
Result &&
"Missing overflow or result");
2643 llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2652 if (!Seen.insert(
Record).second)
2655 assert(
Record->hasDefinition() &&
2656 "Incomplete types should already be diagnosed");
2658 if (
Record->isDynamicClass())
2683 llvm::Type *Ty = Src->getType();
2684 ShiftAmt =
Builder.CreateIntCast(ShiftAmt, Ty,
false);
2687 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2694 switch (BuiltinID) {
2695#define MUTATE_LDBL(func) \
2696 case Builtin::BI__builtin_##func##l: \
2697 return Builtin::BI__builtin_##func##f128;
2766 if (CGF.
Builder.getIsFPConstrained() &&
2767 CGF.
Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2779 auto UBF = CGF->
CGM.
getModule().getOrInsertFunction(Name, FnTy);
2782 for (
auto &&FormalTy : FnTy->params())
2783 Args.push_back(llvm::PoisonValue::get(FormalTy));
2792 "Should not codegen for consteval builtins");
2799 !
Result.hasSideEffects()) {
2803 if (
Result.Val.isFloat())
2812 if (
getTarget().getTriple().isPPC64() &&
2813 &
getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2820 const unsigned BuiltinIDIfNoAsmLabel =
2821 FD->
hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2823 std::optional<bool> ErrnoOverriden;
2827 if (
E->hasStoredFPFeatures()) {
2829 if (OP.hasMathErrnoOverride())
2830 ErrnoOverriden = OP.getMathErrnoOverride();
2839 bool ErrnoOverridenToFalseWithOpt =
2840 ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
2858 switch (BuiltinID) {
2859 case Builtin::BI__builtin_fma:
2860 case Builtin::BI__builtin_fmaf:
2861 case Builtin::BI__builtin_fmal:
2862 case Builtin::BI__builtin_fmaf16:
2863 case Builtin::BIfma:
2864 case Builtin::BIfmaf:
2865 case Builtin::BIfmal: {
2867 if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
2875 bool ConstWithoutErrnoAndExceptions =
2877 bool ConstWithoutExceptions =
2895 bool ConstWithoutErrnoOrExceptions =
2896 ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
2897 bool GenerateIntrinsics =
2898 (ConstAlways && !OptNone) ||
2900 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2901 if (!GenerateIntrinsics) {
2902 GenerateIntrinsics =
2903 ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
2904 if (!GenerateIntrinsics)
2905 GenerateIntrinsics =
2906 ConstWithoutErrnoOrExceptions &&
2908 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2909 if (!GenerateIntrinsics)
2910 GenerateIntrinsics =
2911 ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
2913 if (GenerateIntrinsics) {
2914 switch (BuiltinIDIfNoAsmLabel) {
2915 case Builtin::BIacos:
2916 case Builtin::BIacosf:
2917 case Builtin::BIacosl:
2918 case Builtin::BI__builtin_acos:
2919 case Builtin::BI__builtin_acosf:
2920 case Builtin::BI__builtin_acosf16:
2921 case Builtin::BI__builtin_acosl:
2922 case Builtin::BI__builtin_acosf128:
2924 *
this,
E, Intrinsic::acos, Intrinsic::experimental_constrained_acos));
2926 case Builtin::BIasin:
2927 case Builtin::BIasinf:
2928 case Builtin::BIasinl:
2929 case Builtin::BI__builtin_asin:
2930 case Builtin::BI__builtin_asinf:
2931 case Builtin::BI__builtin_asinf16:
2932 case Builtin::BI__builtin_asinl:
2933 case Builtin::BI__builtin_asinf128:
2935 *
this,
E, Intrinsic::asin, Intrinsic::experimental_constrained_asin));
2937 case Builtin::BIatan:
2938 case Builtin::BIatanf:
2939 case Builtin::BIatanl:
2940 case Builtin::BI__builtin_atan:
2941 case Builtin::BI__builtin_atanf:
2942 case Builtin::BI__builtin_atanf16:
2943 case Builtin::BI__builtin_atanl:
2944 case Builtin::BI__builtin_atanf128:
2946 *
this,
E, Intrinsic::atan, Intrinsic::experimental_constrained_atan));
2948 case Builtin::BIatan2:
2949 case Builtin::BIatan2f:
2950 case Builtin::BIatan2l:
2951 case Builtin::BI__builtin_atan2:
2952 case Builtin::BI__builtin_atan2f:
2953 case Builtin::BI__builtin_atan2f16:
2954 case Builtin::BI__builtin_atan2l:
2955 case Builtin::BI__builtin_atan2f128:
2957 *
this,
E, Intrinsic::atan2,
2958 Intrinsic::experimental_constrained_atan2));
2960 case Builtin::BIceil:
2961 case Builtin::BIceilf:
2962 case Builtin::BIceill:
2963 case Builtin::BI__builtin_ceil:
2964 case Builtin::BI__builtin_ceilf:
2965 case Builtin::BI__builtin_ceilf16:
2966 case Builtin::BI__builtin_ceill:
2967 case Builtin::BI__builtin_ceilf128:
2970 Intrinsic::experimental_constrained_ceil));
2972 case Builtin::BIcopysign:
2973 case Builtin::BIcopysignf:
2974 case Builtin::BIcopysignl:
2975 case Builtin::BI__builtin_copysign:
2976 case Builtin::BI__builtin_copysignf:
2977 case Builtin::BI__builtin_copysignf16:
2978 case Builtin::BI__builtin_copysignl:
2979 case Builtin::BI__builtin_copysignf128:
2981 emitBuiltinWithOneOverloadedType<2>(*
this,
E, Intrinsic::copysign));
2983 case Builtin::BIcos:
2984 case Builtin::BIcosf:
2985 case Builtin::BIcosl:
2986 case Builtin::BI__builtin_cos:
2987 case Builtin::BI__builtin_cosf:
2988 case Builtin::BI__builtin_cosf16:
2989 case Builtin::BI__builtin_cosl:
2990 case Builtin::BI__builtin_cosf128:
2993 Intrinsic::experimental_constrained_cos));
2995 case Builtin::BIcosh:
2996 case Builtin::BIcoshf:
2997 case Builtin::BIcoshl:
2998 case Builtin::BI__builtin_cosh:
2999 case Builtin::BI__builtin_coshf:
3000 case Builtin::BI__builtin_coshf16:
3001 case Builtin::BI__builtin_coshl:
3002 case Builtin::BI__builtin_coshf128:
3004 *
this,
E, Intrinsic::cosh, Intrinsic::experimental_constrained_cosh));
3006 case Builtin::BIexp:
3007 case Builtin::BIexpf:
3008 case Builtin::BIexpl:
3009 case Builtin::BI__builtin_exp:
3010 case Builtin::BI__builtin_expf:
3011 case Builtin::BI__builtin_expf16:
3012 case Builtin::BI__builtin_expl:
3013 case Builtin::BI__builtin_expf128:
3016 Intrinsic::experimental_constrained_exp));
3018 case Builtin::BIexp2:
3019 case Builtin::BIexp2f:
3020 case Builtin::BIexp2l:
3021 case Builtin::BI__builtin_exp2:
3022 case Builtin::BI__builtin_exp2f:
3023 case Builtin::BI__builtin_exp2f16:
3024 case Builtin::BI__builtin_exp2l:
3025 case Builtin::BI__builtin_exp2f128:
3028 Intrinsic::experimental_constrained_exp2));
3029 case Builtin::BI__builtin_exp10:
3030 case Builtin::BI__builtin_exp10f:
3031 case Builtin::BI__builtin_exp10f16:
3032 case Builtin::BI__builtin_exp10l:
3033 case Builtin::BI__builtin_exp10f128: {
3035 if (
Builder.getIsFPConstrained())
3038 emitBuiltinWithOneOverloadedType<1>(*
this,
E, Intrinsic::exp10));
3040 case Builtin::BIfabs:
3041 case Builtin::BIfabsf:
3042 case Builtin::BIfabsl:
3043 case Builtin::BI__builtin_fabs:
3044 case Builtin::BI__builtin_fabsf:
3045 case Builtin::BI__builtin_fabsf16:
3046 case Builtin::BI__builtin_fabsl:
3047 case Builtin::BI__builtin_fabsf128:
3049 emitBuiltinWithOneOverloadedType<1>(*
this,
E, Intrinsic::fabs));
3051 case Builtin::BIfloor:
3052 case Builtin::BIfloorf:
3053 case Builtin::BIfloorl:
3054 case Builtin::BI__builtin_floor:
3055 case Builtin::BI__builtin_floorf:
3056 case Builtin::BI__builtin_floorf16:
3057 case Builtin::BI__builtin_floorl:
3058 case Builtin::BI__builtin_floorf128:
3061 Intrinsic::experimental_constrained_floor));
3063 case Builtin::BIfma:
3064 case Builtin::BIfmaf:
3065 case Builtin::BIfmal:
3066 case Builtin::BI__builtin_fma:
3067 case Builtin::BI__builtin_fmaf:
3068 case Builtin::BI__builtin_fmaf16:
3069 case Builtin::BI__builtin_fmal:
3070 case Builtin::BI__builtin_fmaf128:
3073 Intrinsic::experimental_constrained_fma));
3075 case Builtin::BIfmax:
3076 case Builtin::BIfmaxf:
3077 case Builtin::BIfmaxl:
3078 case Builtin::BI__builtin_fmax:
3079 case Builtin::BI__builtin_fmaxf:
3080 case Builtin::BI__builtin_fmaxf16:
3081 case Builtin::BI__builtin_fmaxl:
3082 case Builtin::BI__builtin_fmaxf128:
3085 Intrinsic::experimental_constrained_maxnum));
3087 case Builtin::BIfmin:
3088 case Builtin::BIfminf:
3089 case Builtin::BIfminl:
3090 case Builtin::BI__builtin_fmin:
3091 case Builtin::BI__builtin_fminf:
3092 case Builtin::BI__builtin_fminf16:
3093 case Builtin::BI__builtin_fminl:
3094 case Builtin::BI__builtin_fminf128:
3097 Intrinsic::experimental_constrained_minnum));
3099 case Builtin::BIfmaximum_num:
3100 case Builtin::BIfmaximum_numf:
3101 case Builtin::BIfmaximum_numl:
3102 case Builtin::BI__builtin_fmaximum_num:
3103 case Builtin::BI__builtin_fmaximum_numf:
3104 case Builtin::BI__builtin_fmaximum_numf16:
3105 case Builtin::BI__builtin_fmaximum_numl:
3106 case Builtin::BI__builtin_fmaximum_numf128:
3108 emitBuiltinWithOneOverloadedType<2>(*
this,
E, Intrinsic::maximumnum));
3110 case Builtin::BIfminimum_num:
3111 case Builtin::BIfminimum_numf:
3112 case Builtin::BIfminimum_numl:
3113 case Builtin::BI__builtin_fminimum_num:
3114 case Builtin::BI__builtin_fminimum_numf:
3115 case Builtin::BI__builtin_fminimum_numf16:
3116 case Builtin::BI__builtin_fminimum_numl:
3117 case Builtin::BI__builtin_fminimum_numf128:
3119 emitBuiltinWithOneOverloadedType<2>(*
this,
E, Intrinsic::minimumnum));
3123 case Builtin::BIfmod:
3124 case Builtin::BIfmodf:
3125 case Builtin::BIfmodl:
3126 case Builtin::BI__builtin_fmod:
3127 case Builtin::BI__builtin_fmodf:
3128 case Builtin::BI__builtin_fmodf16:
3129 case Builtin::BI__builtin_fmodl:
3130 case Builtin::BI__builtin_fmodf128:
3131 case Builtin::BI__builtin_elementwise_fmod: {
3132 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*
this,
E);
3138 case Builtin::BIlog:
3139 case Builtin::BIlogf:
3140 case Builtin::BIlogl:
3141 case Builtin::BI__builtin_log:
3142 case Builtin::BI__builtin_logf:
3143 case Builtin::BI__builtin_logf16:
3144 case Builtin::BI__builtin_logl:
3145 case Builtin::BI__builtin_logf128:
3148 Intrinsic::experimental_constrained_log));
3150 case Builtin::BIlog10:
3151 case Builtin::BIlog10f:
3152 case Builtin::BIlog10l:
3153 case Builtin::BI__builtin_log10:
3154 case Builtin::BI__builtin_log10f:
3155 case Builtin::BI__builtin_log10f16:
3156 case Builtin::BI__builtin_log10l:
3157 case Builtin::BI__builtin_log10f128:
3160 Intrinsic::experimental_constrained_log10));
3162 case Builtin::BIlog2:
3163 case Builtin::BIlog2f:
3164 case Builtin::BIlog2l:
3165 case Builtin::BI__builtin_log2:
3166 case Builtin::BI__builtin_log2f:
3167 case Builtin::BI__builtin_log2f16:
3168 case Builtin::BI__builtin_log2l:
3169 case Builtin::BI__builtin_log2f128:
3172 Intrinsic::experimental_constrained_log2));
3174 case Builtin::BInearbyint:
3175 case Builtin::BInearbyintf:
3176 case Builtin::BInearbyintl:
3177 case Builtin::BI__builtin_nearbyint:
3178 case Builtin::BI__builtin_nearbyintf:
3179 case Builtin::BI__builtin_nearbyintl:
3180 case Builtin::BI__builtin_nearbyintf128:
3182 Intrinsic::nearbyint,
3183 Intrinsic::experimental_constrained_nearbyint));
3185 case Builtin::BIpow:
3186 case Builtin::BIpowf:
3187 case Builtin::BIpowl:
3188 case Builtin::BI__builtin_pow:
3189 case Builtin::BI__builtin_powf:
3190 case Builtin::BI__builtin_powf16:
3191 case Builtin::BI__builtin_powl:
3192 case Builtin::BI__builtin_powf128:
3195 Intrinsic::experimental_constrained_pow));
3197 case Builtin::BIrint:
3198 case Builtin::BIrintf:
3199 case Builtin::BIrintl:
3200 case Builtin::BI__builtin_rint:
3201 case Builtin::BI__builtin_rintf:
3202 case Builtin::BI__builtin_rintf16:
3203 case Builtin::BI__builtin_rintl:
3204 case Builtin::BI__builtin_rintf128:
3207 Intrinsic::experimental_constrained_rint));
3209 case Builtin::BIround:
3210 case Builtin::BIroundf:
3211 case Builtin::BIroundl:
3212 case Builtin::BI__builtin_round:
3213 case Builtin::BI__builtin_roundf:
3214 case Builtin::BI__builtin_roundf16:
3215 case Builtin::BI__builtin_roundl:
3216 case Builtin::BI__builtin_roundf128:
3219 Intrinsic::experimental_constrained_round));
3221 case Builtin::BIroundeven:
3222 case Builtin::BIroundevenf:
3223 case Builtin::BIroundevenl:
3224 case Builtin::BI__builtin_roundeven:
3225 case Builtin::BI__builtin_roundevenf:
3226 case Builtin::BI__builtin_roundevenf16:
3227 case Builtin::BI__builtin_roundevenl:
3228 case Builtin::BI__builtin_roundevenf128:
3230 Intrinsic::roundeven,
3231 Intrinsic::experimental_constrained_roundeven));
3233 case Builtin::BIsin:
3234 case Builtin::BIsinf:
3235 case Builtin::BIsinl:
3236 case Builtin::BI__builtin_sin:
3237 case Builtin::BI__builtin_sinf:
3238 case Builtin::BI__builtin_sinf16:
3239 case Builtin::BI__builtin_sinl:
3240 case Builtin::BI__builtin_sinf128:
3243 Intrinsic::experimental_constrained_sin));
3245 case Builtin::BIsinh:
3246 case Builtin::BIsinhf:
3247 case Builtin::BIsinhl:
3248 case Builtin::BI__builtin_sinh:
3249 case Builtin::BI__builtin_sinhf:
3250 case Builtin::BI__builtin_sinhf16:
3251 case Builtin::BI__builtin_sinhl:
3252 case Builtin::BI__builtin_sinhf128:
3254 *
this,
E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh));
3256 case Builtin::BI__builtin_sincos:
3257 case Builtin::BI__builtin_sincosf:
3258 case Builtin::BI__builtin_sincosf16:
3259 case Builtin::BI__builtin_sincosl:
3260 case Builtin::BI__builtin_sincosf128:
3264 case Builtin::BIsqrt:
3265 case Builtin::BIsqrtf:
3266 case Builtin::BIsqrtl:
3267 case Builtin::BI__builtin_sqrt:
3268 case Builtin::BI__builtin_sqrtf:
3269 case Builtin::BI__builtin_sqrtf16:
3270 case Builtin::BI__builtin_sqrtl:
3271 case Builtin::BI__builtin_sqrtf128:
3272 case Builtin::BI__builtin_elementwise_sqrt: {
3274 *
this,
E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
3279 case Builtin::BItan:
3280 case Builtin::BItanf:
3281 case Builtin::BItanl:
3282 case Builtin::BI__builtin_tan:
3283 case Builtin::BI__builtin_tanf:
3284 case Builtin::BI__builtin_tanf16:
3285 case Builtin::BI__builtin_tanl:
3286 case Builtin::BI__builtin_tanf128:
3288 *
this,
E, Intrinsic::tan, Intrinsic::experimental_constrained_tan));
3290 case Builtin::BItanh:
3291 case Builtin::BItanhf:
3292 case Builtin::BItanhl:
3293 case Builtin::BI__builtin_tanh:
3294 case Builtin::BI__builtin_tanhf:
3295 case Builtin::BI__builtin_tanhf16:
3296 case Builtin::BI__builtin_tanhl:
3297 case Builtin::BI__builtin_tanhf128:
3299 *
this,
E, Intrinsic::tanh, Intrinsic::experimental_constrained_tanh));
3301 case Builtin::BItrunc:
3302 case Builtin::BItruncf:
3303 case Builtin::BItruncl:
3304 case Builtin::BI__builtin_trunc:
3305 case Builtin::BI__builtin_truncf:
3306 case Builtin::BI__builtin_truncf16:
3307 case Builtin::BI__builtin_truncl:
3308 case Builtin::BI__builtin_truncf128:
3311 Intrinsic::experimental_constrained_trunc));
3313 case Builtin::BIlround:
3314 case Builtin::BIlroundf:
3315 case Builtin::BIlroundl:
3316 case Builtin::BI__builtin_lround:
3317 case Builtin::BI__builtin_lroundf:
3318 case Builtin::BI__builtin_lroundl:
3319 case Builtin::BI__builtin_lroundf128:
3321 *
this,
E, Intrinsic::lround,
3322 Intrinsic::experimental_constrained_lround));
3324 case Builtin::BIllround:
3325 case Builtin::BIllroundf:
3326 case Builtin::BIllroundl:
3327 case Builtin::BI__builtin_llround:
3328 case Builtin::BI__builtin_llroundf:
3329 case Builtin::BI__builtin_llroundl:
3330 case Builtin::BI__builtin_llroundf128:
3332 *
this,
E, Intrinsic::llround,
3333 Intrinsic::experimental_constrained_llround));
3335 case Builtin::BIlrint:
3336 case Builtin::BIlrintf:
3337 case Builtin::BIlrintl:
3338 case Builtin::BI__builtin_lrint:
3339 case Builtin::BI__builtin_lrintf:
3340 case Builtin::BI__builtin_lrintl:
3341 case Builtin::BI__builtin_lrintf128:
3343 *
this,
E, Intrinsic::lrint,
3344 Intrinsic::experimental_constrained_lrint));
3346 case Builtin::BIllrint:
3347 case Builtin::BIllrintf:
3348 case Builtin::BIllrintl:
3349 case Builtin::BI__builtin_llrint:
3350 case Builtin::BI__builtin_llrintf:
3351 case Builtin::BI__builtin_llrintl:
3352 case Builtin::BI__builtin_llrintf128:
3354 *
this,
E, Intrinsic::llrint,
3355 Intrinsic::experimental_constrained_llrint));
3356 case Builtin::BI__builtin_ldexp:
3357 case Builtin::BI__builtin_ldexpf:
3358 case Builtin::BI__builtin_ldexpl:
3359 case Builtin::BI__builtin_ldexpf16:
3360 case Builtin::BI__builtin_ldexpf128: {
3362 *
this,
E, Intrinsic::ldexp,
3363 Intrinsic::experimental_constrained_ldexp));
3373 Value *Val = A.emitRawPointer(*
this);
3379 SkippedChecks.
set(SanitizerKind::All);
3380 SkippedChecks.
clear(SanitizerKind::Alignment);
3383 if (
auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
3384 if (CE->getCastKind() == CK_BitCast)
3385 Arg = CE->getSubExpr();
3391 switch (BuiltinIDIfNoAsmLabel) {
3393 case Builtin::BI__builtin___CFStringMakeConstantString:
3394 case Builtin::BI__builtin___NSStringMakeConstantString:
3396 case Builtin::BI__builtin_stdarg_start:
3397 case Builtin::BI__builtin_va_start:
3398 case Builtin::BI__va_start:
3399 case Builtin::BI__builtin_va_end:
3403 BuiltinID != Builtin::BI__builtin_va_end);
3405 case Builtin::BI__builtin_va_copy: {
3412 case Builtin::BIabs:
3413 case Builtin::BIlabs:
3414 case Builtin::BIllabs:
3415 case Builtin::BI__builtin_abs:
3416 case Builtin::BI__builtin_labs:
3417 case Builtin::BI__builtin_llabs: {
3418 bool SanitizeOverflow =
SanOpts.
has(SanitizerKind::SignedIntegerOverflow);
3421 switch (
getLangOpts().getSignedOverflowBehavior()) {
3426 if (!SanitizeOverflow) {
3438 case Builtin::BI__builtin_complex: {
3443 case Builtin::BI__builtin_conj:
3444 case Builtin::BI__builtin_conjf:
3445 case Builtin::BI__builtin_conjl:
3446 case Builtin::BIconj:
3447 case Builtin::BIconjf:
3448 case Builtin::BIconjl: {
3450 Value *Real = ComplexVal.first;
3451 Value *Imag = ComplexVal.second;
3452 Imag =
Builder.CreateFNeg(Imag,
"neg");
3455 case Builtin::BI__builtin_creal:
3456 case Builtin::BI__builtin_crealf:
3457 case Builtin::BI__builtin_creall:
3458 case Builtin::BIcreal:
3459 case Builtin::BIcrealf:
3460 case Builtin::BIcreall: {
3465 case Builtin::BI__builtin_preserve_access_index: {
3486 case Builtin::BI__builtin_cimag:
3487 case Builtin::BI__builtin_cimagf:
3488 case Builtin::BI__builtin_cimagl:
3489 case Builtin::BIcimag:
3490 case Builtin::BIcimagf:
3491 case Builtin::BIcimagl: {
3496 case Builtin::BI__builtin_clrsb:
3497 case Builtin::BI__builtin_clrsbl:
3498 case Builtin::BI__builtin_clrsbll: {
3502 llvm::Type *ArgType = ArgValue->
getType();
3506 Value *
Zero = llvm::Constant::getNullValue(ArgType);
3507 Value *IsNeg =
Builder.CreateICmpSLT(ArgValue, Zero,
"isneg");
3509 Value *Tmp =
Builder.CreateSelect(IsNeg, Inverse, ArgValue);
3516 case Builtin::BI__builtin_ctzs:
3517 case Builtin::BI__builtin_ctz:
3518 case Builtin::BI__builtin_ctzl:
3519 case Builtin::BI__builtin_ctzll:
3520 case Builtin::BI__builtin_ctzg: {
3521 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
3522 E->getNumArgs() > 1;
3528 llvm::Type *ArgType = ArgValue->
getType();
3535 if (
Result->getType() != ResultType)
3541 Value *
Zero = Constant::getNullValue(ArgType);
3542 Value *IsZero =
Builder.CreateICmpEQ(ArgValue, Zero,
"iszero");
3544 Value *ResultOrFallback =
3545 Builder.CreateSelect(IsZero, FallbackValue,
Result,
"ctzg");
3548 case Builtin::BI__builtin_clzs:
3549 case Builtin::BI__builtin_clz:
3550 case Builtin::BI__builtin_clzl:
3551 case Builtin::BI__builtin_clzll:
3552 case Builtin::BI__builtin_clzg: {
3553 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
3554 E->getNumArgs() > 1;
3560 llvm::Type *ArgType = ArgValue->
getType();
3567 if (
Result->getType() != ResultType)
3573 Value *
Zero = Constant::getNullValue(ArgType);
3574 Value *IsZero =
Builder.CreateICmpEQ(ArgValue, Zero,
"iszero");
3576 Value *ResultOrFallback =
3577 Builder.CreateSelect(IsZero, FallbackValue,
Result,
"clzg");
3580 case Builtin::BI__builtin_ffs:
3581 case Builtin::BI__builtin_ffsl:
3582 case Builtin::BI__builtin_ffsll: {
3586 llvm::Type *ArgType = ArgValue->
getType();
3591 Builder.CreateAdd(
Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
3592 llvm::ConstantInt::get(ArgType, 1));
3593 Value *
Zero = llvm::Constant::getNullValue(ArgType);
3594 Value *IsZero =
Builder.CreateICmpEQ(ArgValue, Zero,
"iszero");
3596 if (
Result->getType() != ResultType)
3601 case Builtin::BI__builtin_parity:
3602 case Builtin::BI__builtin_parityl:
3603 case Builtin::BI__builtin_parityll: {
3607 llvm::Type *ArgType = ArgValue->
getType();
3613 if (
Result->getType() != ResultType)
3618 case Builtin::BI__lzcnt16:
3619 case Builtin::BI__lzcnt:
3620 case Builtin::BI__lzcnt64: {
3623 llvm::Type *ArgType = ArgValue->
getType();
3628 if (
Result->getType() != ResultType)
3633 case Builtin::BI__popcnt16:
3634 case Builtin::BI__popcnt:
3635 case Builtin::BI__popcnt64:
3636 case Builtin::BI__builtin_popcount:
3637 case Builtin::BI__builtin_popcountl:
3638 case Builtin::BI__builtin_popcountll:
3639 case Builtin::BI__builtin_popcountg: {
3642 llvm::Type *ArgType = ArgValue->
getType();
3647 if (
Result->getType() != ResultType)
3652 case Builtin::BI__builtin_unpredictable: {
3658 case Builtin::BI__builtin_expect: {
3660 llvm::Type *ArgType = ArgValue->
getType();
3671 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue},
"expval");
3674 case Builtin::BI__builtin_expect_with_probability: {
3676 llvm::Type *ArgType = ArgValue->
getType();
3679 llvm::APFloat Probability(0.0);
3680 const Expr *ProbArg =
E->getArg(2);
3682 assert(EvalSucceed &&
"probability should be able to evaluate as float");
3684 bool LoseInfo =
false;
3685 Probability.convert(llvm::APFloat::IEEEdouble(),
3686 llvm::RoundingMode::Dynamic, &LoseInfo);
3688 Constant *Confidence = ConstantFP::get(Ty, Probability);
3698 FnExpect, {ArgValue, ExpectedValue, Confidence},
"expval");
3701 case Builtin::BI__builtin_assume_aligned: {
3702 const Expr *Ptr =
E->getArg(0);
3704 Value *OffsetValue =
3708 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
3709 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
3710 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
3711 llvm::Value::MaximumAlignment);
3715 AlignmentCI, OffsetValue);
3718 case Builtin::BI__assume:
3719 case Builtin::BI__builtin_assume: {
3725 Builder.CreateCall(FnAssume, ArgValue);
3728 case Builtin::BI__builtin_assume_separate_storage: {
3729 const Expr *Arg0 =
E->getArg(0);
3730 const Expr *Arg1 =
E->getArg(1);
3735 Value *Values[] = {Value0, Value1};
3736 OperandBundleDefT<Value *> OBD(
"separate_storage", Values);
3740 case Builtin::BI__builtin_allow_runtime_check: {
3744 llvm::Value *Allow =
Builder.CreateCall(
3746 llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind)));
3749 case Builtin::BI__arithmetic_fence: {
3752 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*
this,
E);
3753 llvm::FastMathFlags FMF =
Builder.getFastMathFlags();
3754 bool isArithmeticFenceEnabled =
3755 FMF.allowReassoc() &&
3759 if (isArithmeticFenceEnabled) {
3762 Value *Real =
Builder.CreateArithmeticFence(ComplexVal.first,
3764 Value *Imag =
Builder.CreateArithmeticFence(ComplexVal.second,
3769 Value *Real = ComplexVal.first;
3770 Value *Imag = ComplexVal.second;
3774 if (isArithmeticFenceEnabled)
3779 case Builtin::BI__builtin_bswap16:
3780 case Builtin::BI__builtin_bswap32:
3781 case Builtin::BI__builtin_bswap64:
3782 case Builtin::BI_byteswap_ushort:
3783 case Builtin::BI_byteswap_ulong:
3784 case Builtin::BI_byteswap_uint64: {
3786 emitBuiltinWithOneOverloadedType<1>(*
this,
E, Intrinsic::bswap));
3788 case Builtin::BI__builtin_bitreverse8:
3789 case Builtin::BI__builtin_bitreverse16:
3790 case Builtin::BI__builtin_bitreverse32:
3791 case Builtin::BI__builtin_bitreverse64: {
3793 emitBuiltinWithOneOverloadedType<1>(*
this,
E, Intrinsic::bitreverse));
3795 case Builtin::BI__builtin_rotateleft8:
3796 case Builtin::BI__builtin_rotateleft16:
3797 case Builtin::BI__builtin_rotateleft32:
3798 case Builtin::BI__builtin_rotateleft64:
3799 case Builtin::BI_rotl8:
3800 case Builtin::BI_rotl16:
3801 case Builtin::BI_rotl:
3802 case Builtin::BI_lrotl:
3803 case Builtin::BI_rotl64:
3806 case Builtin::BI__builtin_rotateright8:
3807 case Builtin::BI__builtin_rotateright16:
3808 case Builtin::BI__builtin_rotateright32:
3809 case Builtin::BI__builtin_rotateright64:
3810 case Builtin::BI_rotr8:
3811 case Builtin::BI_rotr16:
3812 case Builtin::BI_rotr:
3813 case Builtin::BI_lrotr:
3814 case Builtin::BI_rotr64:
3817 case Builtin::BI__builtin_constant_p: {
3820 const Expr *Arg =
E->getArg(0);
3828 return RValue::get(ConstantInt::get(ResultType, 0));
3833 return RValue::get(ConstantInt::get(ResultType, 0));
3845 if (
Result->getType() != ResultType)
3849 case Builtin::BI__builtin_dynamic_object_size:
3850 case Builtin::BI__builtin_object_size: {
3857 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3859 nullptr, IsDynamic));
3861 case Builtin::BI__builtin_counted_by_ref: {
3863 llvm::Value *
Result = llvm::ConstantPointerNull::get(
3868 if (
auto *UO = dyn_cast<UnaryOperator>(Arg);
3869 UO && UO->getOpcode() == UO_AddrOf) {
3872 if (
auto *ASE = dyn_cast<ArraySubscriptExpr>(Arg))
3876 if (
const MemberExpr *ME = dyn_cast_if_present<MemberExpr>(Arg)) {
3880 const auto *FAMDecl = cast<FieldDecl>(ME->getMemberDecl());
3884 llvm::report_fatal_error(
"Cannot find the counted_by 'count' field");
3890 case Builtin::BI__builtin_prefetch: {
3894 llvm::ConstantInt::get(
Int32Ty, 0);
3896 llvm::ConstantInt::get(
Int32Ty, 3);
3902 case Builtin::BI__builtin_readcyclecounter: {
3906 case Builtin::BI__builtin_readsteadycounter: {
3910 case Builtin::BI__builtin___clear_cache: {
3916 case Builtin::BI__builtin_trap:
3919 case Builtin::BI__builtin_verbose_trap: {
3920 llvm::DILocation *TrapLocation =
Builder.getCurrentDebugLocation();
3931 case Builtin::BI__debugbreak:
3934 case Builtin::BI__builtin_unreachable: {
3943 case Builtin::BI__builtin_powi:
3944 case Builtin::BI__builtin_powif:
3945 case Builtin::BI__builtin_powil: {
3949 if (
Builder.getIsFPConstrained()) {
3952 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*
this,
E);
3959 { Src0->getType(), Src1->getType() });
3962 case Builtin::BI__builtin_frexpl: {
3966 if (&
getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3970 case Builtin::BI__builtin_frexp:
3971 case Builtin::BI__builtin_frexpf:
3972 case Builtin::BI__builtin_frexpf128:
3973 case Builtin::BI__builtin_frexpf16:
3975 case Builtin::BI__builtin_isgreater:
3976 case Builtin::BI__builtin_isgreaterequal:
3977 case Builtin::BI__builtin_isless:
3978 case Builtin::BI__builtin_islessequal:
3979 case Builtin::BI__builtin_islessgreater:
3980 case Builtin::BI__builtin_isunordered: {
3983 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*
this,
E);
3987 switch (BuiltinID) {
3988 default: llvm_unreachable(
"Unknown ordered comparison");
3989 case Builtin::BI__builtin_isgreater:
3990 LHS =
Builder.CreateFCmpOGT(LHS, RHS,
"cmp");
3992 case Builtin::BI__builtin_isgreaterequal:
3993 LHS =
Builder.CreateFCmpOGE(LHS, RHS,
"cmp");
3995 case Builtin::BI__builtin_isless:
3996 LHS =
Builder.CreateFCmpOLT(LHS, RHS,
"cmp");
3998 case Builtin::BI__builtin_islessequal:
3999 LHS =
Builder.CreateFCmpOLE(LHS, RHS,
"cmp");
4001 case Builtin::BI__builtin_islessgreater:
4002 LHS =
Builder.CreateFCmpONE(LHS, RHS,
"cmp");
4004 case Builtin::BI__builtin_isunordered:
4005 LHS =
Builder.CreateFCmpUNO(LHS, RHS,
"cmp");
4012 case Builtin::BI__builtin_isnan: {
4013 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*
this,
E);
4022 case Builtin::BI__builtin_issignaling: {
4023 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*
this,
E);
4030 case Builtin::BI__builtin_isinf: {
4031 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*
this,
E);
4040 case Builtin::BIfinite:
4041 case Builtin::BI__finite:
4042 case Builtin::BIfinitef:
4043 case Builtin::BI__finitef:
4044 case Builtin::BIfinitel:
4045 case Builtin::BI__finitel:
4046 case Builtin::BI__builtin_isfinite: {
4047 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*
this,
E);
4056 case Builtin::BI__builtin_isnormal: {
4057 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*
this,
E);
4064 case Builtin::BI__builtin_issubnormal: {
4065 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*
this,
E);
4068 Builder.CreateZExt(
Builder.createIsFPClass(
V, FPClassTest::fcSubnormal),
4072 case Builtin::BI__builtin_iszero: {
4073 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*
this,
E);
4080 case Builtin::BI__builtin_isfpclass: {
4085 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*
this,
E);
4091 case Builtin::BI__builtin_nondeterministic_value: {
4100 case Builtin::BI__builtin_elementwise_abs: {
4105 QT = VecTy->getElementType();
4109 Builder.getFalse(),
nullptr,
"elt.abs");
4111 Result = emitBuiltinWithOneOverloadedType<1>(
4112 *
this,
E, llvm::Intrinsic::fabs,
"elt.abs");
4116 case Builtin::BI__builtin_elementwise_acos:
4117 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4118 *
this,
E, llvm::Intrinsic::acos,
"elt.acos"));
4119 case Builtin::BI__builtin_elementwise_asin:
4120 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4121 *
this,
E, llvm::Intrinsic::asin,
"elt.asin"));
4122 case Builtin::BI__builtin_elementwise_atan:
4123 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4124 *
this,
E, llvm::Intrinsic::atan,
"elt.atan"));
4125 case Builtin::BI__builtin_elementwise_atan2:
4126 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
4127 *
this,
E, llvm::Intrinsic::atan2,
"elt.atan2"));
4128 case Builtin::BI__builtin_elementwise_ceil:
4129 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4130 *
this,
E, llvm::Intrinsic::ceil,
"elt.ceil"));
4131 case Builtin::BI__builtin_elementwise_exp:
4132 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4133 *
this,
E, llvm::Intrinsic::exp,
"elt.exp"));
4134 case Builtin::BI__builtin_elementwise_exp2:
4135 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4136 *
this,
E, llvm::Intrinsic::exp2,
"elt.exp2"));
4137 case Builtin::BI__builtin_elementwise_log:
4138 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4139 *
this,
E, llvm::Intrinsic::log,
"elt.log"));
4140 case Builtin::BI__builtin_elementwise_log2:
4141 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4142 *
this,
E, llvm::Intrinsic::log2,
"elt.log2"));
4143 case Builtin::BI__builtin_elementwise_log10:
4144 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4145 *
this,
E, llvm::Intrinsic::log10,
"elt.log10"));
4146 case Builtin::BI__builtin_elementwise_pow: {
4148 emitBuiltinWithOneOverloadedType<2>(*
this,
E, llvm::Intrinsic::pow));
4150 case Builtin::BI__builtin_elementwise_bitreverse:
4151 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4152 *
this,
E, llvm::Intrinsic::bitreverse,
"elt.bitreverse"));
4153 case Builtin::BI__builtin_elementwise_cos:
4154 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4155 *
this,
E, llvm::Intrinsic::cos,
"elt.cos"));
4156 case Builtin::BI__builtin_elementwise_cosh:
4157 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4158 *
this,
E, llvm::Intrinsic::cosh,
"elt.cosh"));
4159 case Builtin::BI__builtin_elementwise_floor:
4160 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4161 *
this,
E, llvm::Intrinsic::floor,
"elt.floor"));
4162 case Builtin::BI__builtin_elementwise_popcount:
4163 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4164 *
this,
E, llvm::Intrinsic::ctpop,
"elt.ctpop"));
4165 case Builtin::BI__builtin_elementwise_roundeven:
4166 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4167 *
this,
E, llvm::Intrinsic::roundeven,
"elt.roundeven"));
4168 case Builtin::BI__builtin_elementwise_round:
4169 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4170 *
this,
E, llvm::Intrinsic::round,
"elt.round"));
4171 case Builtin::BI__builtin_elementwise_rint:
4172 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4173 *
this,
E, llvm::Intrinsic::rint,
"elt.rint"));
4174 case Builtin::BI__builtin_elementwise_nearbyint:
4175 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4176 *
this,
E, llvm::Intrinsic::nearbyint,
"elt.nearbyint"));
4177 case Builtin::BI__builtin_elementwise_sin:
4178 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4179 *
this,
E, llvm::Intrinsic::sin,
"elt.sin"));
4180 case Builtin::BI__builtin_elementwise_sinh:
4181 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4182 *
this,
E, llvm::Intrinsic::sinh,
"elt.sinh"));
4183 case Builtin::BI__builtin_elementwise_tan:
4184 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4185 *
this,
E, llvm::Intrinsic::tan,
"elt.tan"));
4186 case Builtin::BI__builtin_elementwise_tanh:
4187 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4188 *
this,
E, llvm::Intrinsic::tanh,
"elt.tanh"));
4189 case Builtin::BI__builtin_elementwise_trunc:
4190 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4191 *
this,
E, llvm::Intrinsic::trunc,
"elt.trunc"));
4192 case Builtin::BI__builtin_elementwise_canonicalize:
4193 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4194 *
this,
E, llvm::Intrinsic::canonicalize,
"elt.canonicalize"));
4195 case Builtin::BI__builtin_elementwise_copysign:
4196 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
4197 *
this,
E, llvm::Intrinsic::copysign));
4198 case Builtin::BI__builtin_elementwise_fma:
4200 emitBuiltinWithOneOverloadedType<3>(*
this,
E, llvm::Intrinsic::fma));
4201 case Builtin::BI__builtin_elementwise_add_sat:
4202 case Builtin::BI__builtin_elementwise_sub_sat: {
4206 assert(Op0->
getType()->isIntOrIntVectorTy() &&
"integer type expected");
4209 Ty = VecTy->getElementType();
4212 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
4213 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
4215 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
4216 Result =
Builder.CreateBinaryIntrinsic(Opc, Op0, Op1,
nullptr,
"elt.sat");
4220 case Builtin::BI__builtin_elementwise_max: {
4224 if (Op0->
getType()->isIntOrIntVectorTy()) {
4227 Ty = VecTy->getElementType();
4229 ? llvm::Intrinsic::smax
4230 : llvm::Intrinsic::umax,
4231 Op0, Op1,
nullptr,
"elt.max");
4236 case Builtin::BI__builtin_elementwise_min: {
4240 if (Op0->
getType()->isIntOrIntVectorTy()) {
4243 Ty = VecTy->getElementType();
4245 ? llvm::Intrinsic::smin
4246 : llvm::Intrinsic::umin,
4247 Op0, Op1,
nullptr,
"elt.min");
4253 case Builtin::BI__builtin_elementwise_maximum: {
4257 Op1,
nullptr,
"elt.maximum");
4261 case Builtin::BI__builtin_elementwise_minimum: {
4265 Op1,
nullptr,
"elt.minimum");
4269 case Builtin::BI__builtin_reduce_max: {
4270 auto GetIntrinsicID = [
this](
QualType QT) {
4272 QT = VecTy->getElementType();
4277 return llvm::Intrinsic::vector_reduce_smax;
4279 return llvm::Intrinsic::vector_reduce_umax;
4281 return llvm::Intrinsic::vector_reduce_fmax;
4283 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4284 *
this,
E, GetIntrinsicID(
E->getArg(0)->
getType()),
"rdx.min"));
4287 case Builtin::BI__builtin_reduce_min: {
4288 auto GetIntrinsicID = [
this](
QualType QT) {
4290 QT = VecTy->getElementType();
4295 return llvm::Intrinsic::vector_reduce_smin;
4297 return llvm::Intrinsic::vector_reduce_umin;
4299 return llvm::Intrinsic::vector_reduce_fmin;
4302 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4303 *
this,
E, GetIntrinsicID(
E->getArg(0)->
getType()),
"rdx.min"));
4306 case Builtin::BI__builtin_reduce_add:
4307 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4308 *
this,
E, llvm::Intrinsic::vector_reduce_add,
"rdx.add"));
4309 case Builtin::BI__builtin_reduce_mul:
4310 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4311 *
this,
E, llvm::Intrinsic::vector_reduce_mul,
"rdx.mul"));
4312 case Builtin::BI__builtin_reduce_xor:
4313 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4314 *
this,
E, llvm::Intrinsic::vector_reduce_xor,
"rdx.xor"));
4315 case Builtin::BI__builtin_reduce_or:
4316 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4317 *
this,
E, llvm::Intrinsic::vector_reduce_or,
"rdx.or"));
4318 case Builtin::BI__builtin_reduce_and:
4319 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4320 *
this,
E, llvm::Intrinsic::vector_reduce_and,
"rdx.and"));
4321 case Builtin::BI__builtin_reduce_maximum:
4322 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4323 *
this,
E, llvm::Intrinsic::vector_reduce_fmaximum,
"rdx.maximum"));
4324 case Builtin::BI__builtin_reduce_minimum:
4325 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4326 *
this,
E, llvm::Intrinsic::vector_reduce_fminimum,
"rdx.minimum"));
4328 case Builtin::BI__builtin_matrix_transpose: {
4332 Value *
Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
4333 MatrixTy->getNumColumns());
4337 case Builtin::BI__builtin_matrix_column_major_load: {
4343 assert(PtrTy &&
"arg0 must be of pointer type");
4353 ResultTy->getNumRows(), ResultTy->getNumColumns(),
"matrix");
4357 case Builtin::BI__builtin_matrix_column_major_store: {
4365 assert(PtrTy &&
"arg1 must be of pointer type");
4374 MatrixTy->getNumRows(), MatrixTy->getNumColumns());
4378 case Builtin::BI__builtin_isinf_sign: {
4380 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*
this,
E);
4385 AbsArg, ConstantFP::getInfinity(Arg->
getType()),
"isinf");
4391 Value *NegativeOne = ConstantInt::get(
IntTy, -1);
4392 Value *SignResult =
Builder.CreateSelect(IsNeg, NegativeOne, One);
4397 case Builtin::BI__builtin_flt_rounds: {
4402 if (
Result->getType() != ResultType)
4408 case Builtin::BI__builtin_set_flt_rounds: {
4416 case Builtin::BI__builtin_fpclassify: {
4417 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*
this,
E);
4428 "fpclassify_result");
4432 Value *IsZero =
Builder.CreateFCmpOEQ(
V, Constant::getNullValue(Ty),
4436 Builder.CreateCondBr(IsZero, End, NotZero);
4440 Builder.SetInsertPoint(NotZero);
4444 Builder.CreateCondBr(IsNan, End, NotNan);
4445 Result->addIncoming(NanLiteral, NotZero);
4448 Builder.SetInsertPoint(NotNan);
4451 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(
V->getType()),
4455 Builder.CreateCondBr(IsInf, End, NotInf);
4456 Result->addIncoming(InfLiteral, NotNan);
4459 Builder.SetInsertPoint(NotInf);
4460 APFloat Smallest = APFloat::getSmallestNormalized(
4463 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(
V->getContext(), Smallest),
4465 Value *NormalResult =
4469 Result->addIncoming(NormalResult, NotInf);
4482 case Builtin::BIalloca:
4483 case Builtin::BI_alloca:
4484 case Builtin::BI__builtin_alloca_uninitialized:
4485 case Builtin::BI__builtin_alloca: {
4489 const Align SuitableAlignmentInBytes =
4493 AllocaInst *AI =
Builder.CreateAlloca(
Builder.getInt8Ty(), Size);
4494 AI->setAlignment(SuitableAlignmentInBytes);
4495 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
4507 case Builtin::BI__builtin_alloca_with_align_uninitialized:
4508 case Builtin::BI__builtin_alloca_with_align: {
4511 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
4512 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
4513 const Align AlignmentInBytes =
4515 AllocaInst *AI =
Builder.CreateAlloca(
Builder.getInt8Ty(), Size);
4516 AI->setAlignment(AlignmentInBytes);
4517 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
4529 case Builtin::BIbzero:
4530 case Builtin::BI__builtin_bzero: {
4539 case Builtin::BIbcopy:
4540 case Builtin::BI__builtin_bcopy: {
4554 case Builtin::BImemcpy:
4555 case Builtin::BI__builtin_memcpy:
4556 case Builtin::BImempcpy:
4557 case Builtin::BI__builtin_mempcpy: {
4561 EmitArgCheck(
TCK_Store, Dest,
E->getArg(0), 0);
4562 EmitArgCheck(
TCK_Load, Src,
E->getArg(1), 1);
4564 if (BuiltinID == Builtin::BImempcpy ||
4565 BuiltinID == Builtin::BI__builtin_mempcpy)
4572 case Builtin::BI__builtin_memcpy_inline: {
4577 EmitArgCheck(
TCK_Store, Dest,
E->getArg(0), 0);
4578 EmitArgCheck(
TCK_Load, Src,
E->getArg(1), 1);
4583 case Builtin::BI__builtin_char_memchr:
4584 BuiltinID = Builtin::BI__builtin_memchr;
4587 case Builtin::BI__builtin___memcpy_chk: {
4594 llvm::APSInt DstSize = DstSizeResult.
Val.
getInt();
4595 if (
Size.ugt(DstSize))
4599 Value *SizeVal = llvm::ConstantInt::get(
Builder.getContext(), Size);
4604 case Builtin::BI__builtin_objc_memmove_collectable: {
4609 DestAddr, SrcAddr, SizeVal);
4613 case Builtin::BI__builtin___memmove_chk: {
4620 llvm::APSInt DstSize = DstSizeResult.
Val.
getInt();
4621 if (
Size.ugt(DstSize))
4625 Value *SizeVal = llvm::ConstantInt::get(
Builder.getContext(), Size);
4630 case Builtin::BImemmove:
4631 case Builtin::BI__builtin_memmove: {
4635 EmitArgCheck(
TCK_Store, Dest,
E->getArg(0), 0);
4636 EmitArgCheck(
TCK_Load, Src,
E->getArg(1), 1);
4640 case Builtin::BImemset:
4641 case Builtin::BI__builtin_memset: {
4651 case Builtin::BI__builtin_memset_inline: {
4663 case Builtin::BI__builtin___memset_chk: {
4670 llvm::APSInt DstSize = DstSizeResult.
Val.
getInt();
4671 if (
Size.ugt(DstSize))
4676 Value *SizeVal = llvm::ConstantInt::get(
Builder.getContext(), Size);
4680 case Builtin::BI__builtin_wmemchr: {
4683 if (!
getTarget().getTriple().isOSMSVCRT())
4691 BasicBlock *Entry =
Builder.GetInsertBlock();
4696 Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
4700 StrPhi->addIncoming(Str, Entry);
4702 SizePhi->addIncoming(Size, Entry);
4706 Value *FoundChr =
Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
4708 Builder.CreateCondBr(StrEqChr, Exit, Next);
4711 Value *NextStr =
Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
4713 Value *NextSizeEq0 =
4714 Builder.CreateICmpEQ(NextSize, ConstantInt::get(
SizeTy, 0));
4715 Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
4716 StrPhi->addIncoming(NextStr, Next);
4717 SizePhi->addIncoming(NextSize, Next);
4721 Ret->addIncoming(llvm::Constant::getNullValue(Str->
getType()), Entry);
4722 Ret->addIncoming(llvm::Constant::getNullValue(Str->
getType()), Next);
4723 Ret->addIncoming(FoundChr, CmpEq);
4726 case Builtin::BI__builtin_wmemcmp: {
4729 if (!
getTarget().getTriple().isOSMSVCRT())
4738 BasicBlock *Entry =
Builder.GetInsertBlock();
4744 Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
4748 DstPhi->addIncoming(Dst, Entry);
4750 SrcPhi->addIncoming(Src, Entry);
4752 SizePhi->addIncoming(Size, Entry);
4758 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
4762 Builder.CreateCondBr(DstLtSrc, Exit, Next);
4765 Value *NextDst =
Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
4766 Value *NextSrc =
Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
4768 Value *NextSizeEq0 =
4769 Builder.CreateICmpEQ(NextSize, ConstantInt::get(
SizeTy, 0));
4770 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
4771 DstPhi->addIncoming(NextDst, Next);
4772 SrcPhi->addIncoming(NextSrc, Next);
4773 SizePhi->addIncoming(NextSize, Next);
4777 Ret->addIncoming(ConstantInt::get(
IntTy, 0), Entry);
4778 Ret->addIncoming(ConstantInt::get(
IntTy, 1), CmpGT);
4779 Ret->addIncoming(ConstantInt::get(
IntTy, -1), CmpLT);
4780 Ret->addIncoming(ConstantInt::get(
IntTy, 0), Next);
4783 case Builtin::BI__builtin_dwarf_cfa: {
4796 llvm::ConstantInt::get(
Int32Ty, Offset)));
4798 case Builtin::BI__builtin_return_address: {
4804 case Builtin::BI_ReturnAddress: {
4808 case Builtin::BI__builtin_frame_address: {
4814 case Builtin::BI__builtin_extract_return_addr: {
4819 case Builtin::BI__builtin_frob_return_addr: {
4824 case Builtin::BI__builtin_dwarf_sp_column: {
4825 llvm::IntegerType *Ty
4834 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4840 case Builtin::BI__builtin_eh_return: {
4844 llvm::IntegerType *
IntTy = cast<llvm::IntegerType>(
Int->getType());
4845 assert((
IntTy->getBitWidth() == 32 ||
IntTy->getBitWidth() == 64) &&
4846 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4849 : Intrinsic::eh_return_i64);
4858 case Builtin::BI__builtin_unwind_init: {
4863 case Builtin::BI__builtin_extend_pointer: {
4888 case Builtin::BI__builtin_setjmp: {
4892 if (
getTarget().getTriple().getArch() == llvm::Triple::systemz) {
4903 ConstantInt::get(
Int32Ty, 0));
4917 case Builtin::BI__builtin_longjmp: {
4931 case Builtin::BI__builtin_launder: {
4932 const Expr *Arg =
E->getArg(0);
4940 case Builtin::BI__sync_fetch_and_add:
4941 case Builtin::BI__sync_fetch_and_sub:
4942 case Builtin::BI__sync_fetch_and_or:
4943 case Builtin::BI__sync_fetch_and_and:
4944 case Builtin::BI__sync_fetch_and_xor:
4945 case Builtin::BI__sync_fetch_and_nand:
4946 case Builtin::BI__sync_add_and_fetch:
4947 case Builtin::BI__sync_sub_and_fetch:
4948 case Builtin::BI__sync_and_and_fetch:
4949 case Builtin::BI__sync_or_and_fetch:
4950 case Builtin::BI__sync_xor_and_fetch:
4951 case Builtin::BI__sync_nand_and_fetch:
4952 case Builtin::BI__sync_val_compare_and_swap:
4953 case Builtin::BI__sync_bool_compare_and_swap:
4954 case Builtin::BI__sync_lock_test_and_set:
4955 case Builtin::BI__sync_lock_release:
4956 case Builtin::BI__sync_swap:
4957 llvm_unreachable(
"Shouldn't make it through sema");
4958 case Builtin::BI__sync_fetch_and_add_1:
4959 case Builtin::BI__sync_fetch_and_add_2:
4960 case Builtin::BI__sync_fetch_and_add_4:
4961 case Builtin::BI__sync_fetch_and_add_8:
4962 case Builtin::BI__sync_fetch_and_add_16:
4964 case Builtin::BI__sync_fetch_and_sub_1:
4965 case Builtin::BI__sync_fetch_and_sub_2:
4966 case Builtin::BI__sync_fetch_and_sub_4:
4967 case Builtin::BI__sync_fetch_and_sub_8:
4968 case Builtin::BI__sync_fetch_and_sub_16:
4970 case Builtin::BI__sync_fetch_and_or_1:
4971 case Builtin::BI__sync_fetch_and_or_2:
4972 case Builtin::BI__sync_fetch_and_or_4:
4973 case Builtin::BI__sync_fetch_and_or_8:
4974 case Builtin::BI__sync_fetch_and_or_16:
4976 case Builtin::BI__sync_fetch_and_and_1:
4977 case Builtin::BI__sync_fetch_and_and_2:
4978 case Builtin::BI__sync_fetch_and_and_4:
4979 case Builtin::BI__sync_fetch_and_and_8:
4980 case Builtin::BI__sync_fetch_and_and_16:
4982 case Builtin::BI__sync_fetch_and_xor_1:
4983 case Builtin::BI__sync_fetch_and_xor_2:
4984 case Builtin::BI__sync_fetch_and_xor_4:
4985 case Builtin::BI__sync_fetch_and_xor_8:
4986 case Builtin::BI__sync_fetch_and_xor_16:
4988 case Builtin::BI__sync_fetch_and_nand_1:
4989 case Builtin::BI__sync_fetch_and_nand_2:
4990 case Builtin::BI__sync_fetch_and_nand_4:
4991 case Builtin::BI__sync_fetch_and_nand_8:
4992 case Builtin::BI__sync_fetch_and_nand_16:
4996 case Builtin::BI__sync_fetch_and_min:
4998 case Builtin::BI__sync_fetch_and_max:
5000 case Builtin::BI__sync_fetch_and_umin:
5002 case Builtin::BI__sync_fetch_and_umax:
5005 case Builtin::BI__sync_add_and_fetch_1:
5006 case Builtin::BI__sync_add_and_fetch_2:
5007 case Builtin::BI__sync_add_and_fetch_4:
5008 case Builtin::BI__sync_add_and_fetch_8:
5009 case Builtin::BI__sync_add_and_fetch_16:
5011 llvm::Instruction::Add);
5012 case Builtin::BI__sync_sub_and_fetch_1:
5013 case Builtin::BI__sync_sub_and_fetch_2:
5014 case Builtin::BI__sync_sub_and_fetch_4:
5015 case Builtin::BI__sync_sub_and_fetch_8:
5016 case Builtin::BI__sync_sub_and_fetch_16:
5018 llvm::Instruction::Sub);
5019 case Builtin::BI__sync_and_and_fetch_1:
5020 case Builtin::BI__sync_and_and_fetch_2:
5021 case Builtin::BI__sync_and_and_fetch_4:
5022 case Builtin::BI__sync_and_and_fetch_8:
5023 case Builtin::BI__sync_and_and_fetch_16:
5025 llvm::Instruction::And);
5026 case Builtin::BI__sync_or_and_fetch_1:
5027 case Builtin::BI__sync_or_and_fetch_2:
5028 case Builtin::BI__sync_or_and_fetch_4:
5029 case Builtin::BI__sync_or_and_fetch_8:
5030 case Builtin::BI__sync_or_and_fetch_16:
5032 llvm::Instruction::Or);
5033 case Builtin::BI__sync_xor_and_fetch_1:
5034 case Builtin::BI__sync_xor_and_fetch_2:
5035 case Builtin::BI__sync_xor_and_fetch_4:
5036 case Builtin::BI__sync_xor_and_fetch_8:
5037 case Builtin::BI__sync_xor_and_fetch_16:
5039 llvm::Instruction::Xor);
5040 case Builtin::BI__sync_nand_and_fetch_1:
5041 case Builtin::BI__sync_nand_and_fetch_2:
5042 case Builtin::BI__sync_nand_and_fetch_4:
5043 case Builtin::BI__sync_nand_and_fetch_8:
5044 case Builtin::BI__sync_nand_and_fetch_16:
5046 llvm::Instruction::And,
true);
5048 case Builtin::BI__sync_val_compare_and_swap_1:
5049 case Builtin::BI__sync_val_compare_and_swap_2:
5050 case Builtin::BI__sync_val_compare_and_swap_4:
5051 case Builtin::BI__sync_val_compare_and_swap_8:
5052 case Builtin::BI__sync_val_compare_and_swap_16:
5055 case Builtin::BI__sync_bool_compare_and_swap_1:
5056 case Builtin::BI__sync_bool_compare_and_swap_2:
5057 case Builtin::BI__sync_bool_compare_and_swap_4:
5058 case Builtin::BI__sync_bool_compare_and_swap_8:
5059 case Builtin::BI__sync_bool_compare_and_swap_16:
5062 case Builtin::BI__sync_swap_1:
5063 case Builtin::BI__sync_swap_2:
5064 case Builtin::BI__sync_swap_4:
5065 case Builtin::BI__sync_swap_8:
5066 case Builtin::BI__sync_swap_16:
5069 case Builtin::BI__sync_lock_test_and_set_1:
5070 case Builtin::BI__sync_lock_test_and_set_2:
5071 case Builtin::BI__sync_lock_test_and_set_4:
5072 case Builtin::BI__sync_lock_test_and_set_8:
5073 case Builtin::BI__sync_lock_test_and_set_16:
5076 case Builtin::BI__sync_lock_release_1:
5077 case Builtin::BI__sync_lock_release_2:
5078 case Builtin::BI__sync_lock_release_4:
5079 case Builtin::BI__sync_lock_release_8:
5080 case Builtin::BI__sync_lock_release_16: {
5086 llvm::StoreInst *
Store =
5088 Store->setAtomic(llvm::AtomicOrdering::Release);
5092 case Builtin::BI__sync_synchronize: {
5100 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
5104 case Builtin::BI__builtin_nontemporal_load:
5106 case Builtin::BI__builtin_nontemporal_store:
5108 case Builtin::BI__c11_atomic_is_lock_free:
5109 case Builtin::BI__atomic_is_lock_free: {
5113 const char *LibCallName =
"__atomic_is_lock_free";
5117 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
5131 case Builtin::BI__atomic_thread_fence:
5132 case Builtin::BI__atomic_signal_fence:
5133 case Builtin::BI__c11_atomic_thread_fence:
5134 case Builtin::BI__c11_atomic_signal_fence: {
5135 llvm::SyncScope::ID SSID;
5136 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
5137 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
5138 SSID = llvm::SyncScope::SingleThread;
5140 SSID = llvm::SyncScope::System;
5142 if (isa<llvm::ConstantInt>(Order)) {
5143 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5150 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
5153 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
5156 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
5159 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
5165 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
5172 Order =
Builder.CreateIntCast(Order,
Builder.getInt32Ty(),
false);
5173 llvm::SwitchInst *SI =
Builder.CreateSwitch(Order, ContBB);
5175 Builder.SetInsertPoint(AcquireBB);
5176 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
5178 SI->addCase(
Builder.getInt32(1), AcquireBB);
5179 SI->addCase(
Builder.getInt32(2), AcquireBB);
5181 Builder.SetInsertPoint(ReleaseBB);
5182 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
5184 SI->addCase(
Builder.getInt32(3), ReleaseBB);
5186 Builder.SetInsertPoint(AcqRelBB);
5187 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
5189 SI->addCase(
Builder.getInt32(4), AcqRelBB);
5191 Builder.SetInsertPoint(SeqCstBB);
5192 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
5194 SI->addCase(
Builder.getInt32(5), SeqCstBB);
5196 Builder.SetInsertPoint(ContBB);
5199 case Builtin::BI__scoped_atomic_thread_fence: {
5204 auto Ord = dyn_cast<llvm::ConstantInt>(Order);
5205 auto Scp = dyn_cast<llvm::ConstantInt>(
Scope);
5207 SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
5208 ? ScopeModel->map(Scp->getZExtValue())
5209 : ScopeModel->map(ScopeModel->getFallBackValue());
5210 switch (Ord->getZExtValue()) {
5217 llvm::AtomicOrdering::Acquire,
5219 llvm::AtomicOrdering::Acquire,
5224 llvm::AtomicOrdering::Release,
5226 llvm::AtomicOrdering::Release,
5230 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease,
5233 llvm::AtomicOrdering::AcquireRelease,
5237 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5240 llvm::AtomicOrdering::SequentiallyConsistent,
5252 switch (Ord->getZExtValue()) {
5255 ContBB->eraseFromParent();
5259 OrderBBs.emplace_back(
Builder.GetInsertBlock(),
5260 llvm::AtomicOrdering::Acquire);
5263 OrderBBs.emplace_back(
Builder.GetInsertBlock(),
5264 llvm::AtomicOrdering::Release);
5267 OrderBBs.emplace_back(
Builder.GetInsertBlock(),
5268 llvm::AtomicOrdering::AcquireRelease);
5271 OrderBBs.emplace_back(
Builder.GetInsertBlock(),
5272 llvm::AtomicOrdering::SequentiallyConsistent);
5281 Order =
Builder.CreateIntCast(Order,
Builder.getInt32Ty(),
false);
5282 llvm::SwitchInst *SI =
Builder.CreateSwitch(Order, ContBB);
5283 SI->addCase(
Builder.getInt32(1), AcquireBB);
5284 SI->addCase(
Builder.getInt32(2), AcquireBB);
5285 SI->addCase(
Builder.getInt32(3), ReleaseBB);
5286 SI->addCase(
Builder.getInt32(4), AcqRelBB);
5287 SI->addCase(
Builder.getInt32(5), SeqCstBB);
5289 OrderBBs.emplace_back(AcquireBB, llvm::AtomicOrdering::Acquire);
5290 OrderBBs.emplace_back(ReleaseBB, llvm::AtomicOrdering::Release);
5291 OrderBBs.emplace_back(AcqRelBB, llvm::AtomicOrdering::AcquireRelease);
5292 OrderBBs.emplace_back(SeqCstBB,
5293 llvm::AtomicOrdering::SequentiallyConsistent);
5296 for (
auto &[OrderBB, Ordering] : OrderBBs) {
5297 Builder.SetInsertPoint(OrderBB);
5299 SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
5300 ? ScopeModel->map(Scp->getZExtValue())
5301 : ScopeModel->map(ScopeModel->getFallBackValue());
5307 llvm::DenseMap<unsigned, llvm::BasicBlock *> BBs;
5308 for (
unsigned Scp : ScopeModel->getRuntimeValues())
5312 llvm::SwitchInst *SI =
Builder.CreateSwitch(SC, ContBB);
5313 for (
unsigned Scp : ScopeModel->getRuntimeValues()) {
5315 SI->addCase(
Builder.getInt32(Scp), B);
5326 Builder.SetInsertPoint(ContBB);
5330 case Builtin::BI__builtin_signbit:
5331 case Builtin::BI__builtin_signbitf:
5332 case Builtin::BI__builtin_signbitl: {
5337 case Builtin::BI__warn_memset_zero_len:
5339 case Builtin::BI__annotation: {
5342 for (
const Expr *Arg :
E->arguments()) {
5344 assert(Str->getCharByteWidth() == 2);
5345 StringRef WideBytes = Str->getBytes();
5346 std::string StrUtf8;
5347 if (!convertUTF16ToUTF8String(
5348 ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
5352 Strings.push_back(llvm::MDString::get(
getLLVMContext(), StrUtf8));
5362 case Builtin::BI__builtin_annotation: {
5371 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
5375 case Builtin::BI__builtin_addcb:
5376 case Builtin::BI__builtin_addcs:
5377 case Builtin::BI__builtin_addc:
5378 case Builtin::BI__builtin_addcl:
5379 case Builtin::BI__builtin_addcll:
5380 case Builtin::BI__builtin_subcb:
5381 case Builtin::BI__builtin_subcs:
5382 case Builtin::BI__builtin_subc:
5383 case Builtin::BI__builtin_subcl:
5384 case Builtin::BI__builtin_subcll: {
5410 llvm::Intrinsic::ID IntrinsicId;
5411 switch (BuiltinID) {
5412 default: llvm_unreachable(
"Unknown multiprecision builtin id.");
5413 case Builtin::BI__builtin_addcb:
5414 case Builtin::BI__builtin_addcs:
5415 case Builtin::BI__builtin_addc:
5416 case Builtin::BI__builtin_addcl:
5417 case Builtin::BI__builtin_addcll:
5418 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5420 case Builtin::BI__builtin_subcb:
5421 case Builtin::BI__builtin_subcs:
5422 case Builtin::BI__builtin_subc:
5423 case Builtin::BI__builtin_subcl:
5424 case Builtin::BI__builtin_subcll:
5425 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5430 llvm::Value *Carry1;
5433 llvm::Value *Carry2;
5435 Sum1, Carryin, Carry2);
5436 llvm::Value *CarryOut =
Builder.CreateZExt(
Builder.CreateOr(Carry1, Carry2),
5442 case Builtin::BI__builtin_add_overflow:
5443 case Builtin::BI__builtin_sub_overflow:
5444 case Builtin::BI__builtin_mul_overflow: {
5452 WidthAndSignedness LeftInfo =
5454 WidthAndSignedness RightInfo =
5456 WidthAndSignedness ResultInfo =
5463 RightInfo, ResultArg, ResultQTy,
5469 *
this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
5472 WidthAndSignedness EncompassingInfo =
5475 llvm::Type *EncompassingLLVMTy =
5480 llvm::Intrinsic::ID IntrinsicId;
5481 switch (BuiltinID) {
5483 llvm_unreachable(
"Unknown overflow builtin id.");
5484 case Builtin::BI__builtin_add_overflow:
5485 IntrinsicId = EncompassingInfo.Signed
5486 ? llvm::Intrinsic::sadd_with_overflow
5487 : llvm::Intrinsic::uadd_with_overflow;
5489 case Builtin::BI__builtin_sub_overflow:
5490 IntrinsicId = EncompassingInfo.Signed
5491 ? llvm::Intrinsic::ssub_with_overflow
5492 : llvm::Intrinsic::usub_with_overflow;
5494 case Builtin::BI__builtin_mul_overflow:
5495 IntrinsicId = EncompassingInfo.Signed
5496 ? llvm::Intrinsic::smul_with_overflow
5497 : llvm::Intrinsic::umul_with_overflow;
5506 Left =
Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
5507 Right =
Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
5510 llvm::Value *Overflow, *
Result;
5513 if (EncompassingInfo.Width > ResultInfo.Width) {
5516 llvm::Value *ResultTrunc =
Builder.CreateTrunc(
Result, ResultLLVMTy);
5520 llvm::Value *ResultTruncExt =
Builder.CreateIntCast(
5521 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
5522 llvm::Value *TruncationOverflow =
5525 Overflow =
Builder.CreateOr(Overflow, TruncationOverflow);
5537 case Builtin::BI__builtin_uadd_overflow:
5538 case Builtin::BI__builtin_uaddl_overflow:
5539 case Builtin::BI__builtin_uaddll_overflow:
5540 case Builtin::BI__builtin_usub_overflow:
5541 case Builtin::BI__builtin_usubl_overflow:
5542 case Builtin::BI__builtin_usubll_overflow:
5543 case Builtin::BI__builtin_umul_overflow:
5544 case Builtin::BI__builtin_umull_overflow:
5545 case Builtin::BI__builtin_umulll_overflow:
5546 case Builtin::BI__builtin_sadd_overflow:
5547 case Builtin::BI__builtin_saddl_overflow:
5548 case Builtin::BI__builtin_saddll_overflow:
5549 case Builtin::BI__builtin_ssub_overflow:
5550 case Builtin::BI__builtin_ssubl_overflow:
5551 case Builtin::BI__builtin_ssubll_overflow:
5552 case Builtin::BI__builtin_smul_overflow:
5553 case Builtin::BI__builtin_smull_overflow:
5554 case Builtin::BI__builtin_smulll_overflow: {
5564 llvm::Intrinsic::ID IntrinsicId;
5565 switch (BuiltinID) {
5566 default: llvm_unreachable(
"Unknown overflow builtin id.");
5567 case Builtin::BI__builtin_uadd_overflow:
5568 case Builtin::BI__builtin_uaddl_overflow:
5569 case Builtin::BI__builtin_uaddll_overflow:
5570 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5572 case Builtin::BI__builtin_usub_overflow:
5573 case Builtin::BI__builtin_usubl_overflow:
5574 case Builtin::BI__builtin_usubll_overflow:
5575 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5577 case Builtin::BI__builtin_umul_overflow:
5578 case Builtin::BI__builtin_umull_overflow:
5579 case Builtin::BI__builtin_umulll_overflow:
5580 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
5582 case Builtin::BI__builtin_sadd_overflow:
5583 case Builtin::BI__builtin_saddl_overflow:
5584 case Builtin::BI__builtin_saddll_overflow:
5585 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
5587 case Builtin::BI__builtin_ssub_overflow:
5588 case Builtin::BI__builtin_ssubl_overflow:
5589 case Builtin::BI__builtin_ssubll_overflow:
5590 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
5592 case Builtin::BI__builtin_smul_overflow:
5593 case Builtin::BI__builtin_smull_overflow:
5594 case Builtin::BI__builtin_smulll_overflow:
5595 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
5606 case Builtin::BIaddressof:
5607 case Builtin::BI__addressof:
5608 case Builtin::BI__builtin_addressof:
5610 case Builtin::BI__builtin_function_start:
5613 case Builtin::BI__builtin_operator_new:
5616 case Builtin::BI__builtin_operator_delete:
5621 case Builtin::BI__builtin_is_aligned:
5623 case Builtin::BI__builtin_align_up:
5625 case Builtin::BI__builtin_align_down:
5628 case Builtin::BI__noop:
5631 case Builtin::BI__builtin_call_with_static_chain: {
5633 const Expr *Chain =
E->getArg(1);
5638 case Builtin::BI_InterlockedExchange8:
5639 case Builtin::BI_InterlockedExchange16:
5640 case Builtin::BI_InterlockedExchange:
5641 case Builtin::BI_InterlockedExchangePointer:
5644 case Builtin::BI_InterlockedCompareExchangePointer:
5647 case Builtin::BI_InterlockedCompareExchangePointer_nf:
5650 case Builtin::BI_InterlockedCompareExchange8:
5651 case Builtin::BI_InterlockedCompareExchange16:
5652 case Builtin::BI_InterlockedCompareExchange:
5653 case Builtin::BI_InterlockedCompareExchange64:
5655 case Builtin::BI_InterlockedIncrement16:
5656 case Builtin::BI_InterlockedIncrement:
5659 case Builtin::BI_InterlockedDecrement16:
5660 case Builtin::BI_InterlockedDecrement:
5663 case Builtin::BI_InterlockedAnd8:
5664 case Builtin::BI_InterlockedAnd16:
5665 case Builtin::BI_InterlockedAnd:
5667 case Builtin::BI_InterlockedExchangeAdd8:
5668 case Builtin::BI_InterlockedExchangeAdd16:
5669 case Builtin::BI_InterlockedExchangeAdd:
5672 case Builtin::BI_InterlockedExchangeSub8:
5673 case Builtin::BI_InterlockedExchangeSub16:
5674 case Builtin::BI_InterlockedExchangeSub:
5677 case Builtin::BI_InterlockedOr8:
5678 case Builtin::BI_InterlockedOr16:
5679 case Builtin::BI_InterlockedOr:
5681 case Builtin::BI_InterlockedXor8:
5682 case Builtin::BI_InterlockedXor16:
5683 case Builtin::BI_InterlockedXor:
5686 case Builtin::BI_bittest64:
5687 case Builtin::BI_bittest:
5688 case Builtin::BI_bittestandcomplement64:
5689 case Builtin::BI_bittestandcomplement:
5690 case Builtin::BI_bittestandreset64:
5691 case Builtin::BI_bittestandreset:
5692 case Builtin::BI_bittestandset64:
5693 case Builtin::BI_bittestandset:
5694 case Builtin::BI_interlockedbittestandreset:
5695 case Builtin::BI_interlockedbittestandreset64:
5696 case Builtin::BI_interlockedbittestandset64:
5697 case Builtin::BI_interlockedbittestandset:
5698 case Builtin::BI_interlockedbittestandset_acq:
5699 case Builtin::BI_interlockedbittestandset_rel:
5700 case Builtin::BI_interlockedbittestandset_nf:
5701 case Builtin::BI_interlockedbittestandreset_acq:
5702 case Builtin::BI_interlockedbittestandreset_rel:
5703 case Builtin::BI_interlockedbittestandreset_nf:
5708 case Builtin::BI__iso_volatile_load8:
5709 case Builtin::BI__iso_volatile_load16:
5710 case Builtin::BI__iso_volatile_load32:
5711 case Builtin::BI__iso_volatile_load64:
5713 case Builtin::BI__iso_volatile_store8:
5714 case Builtin::BI__iso_volatile_store16:
5715 case Builtin::BI__iso_volatile_store32:
5716 case Builtin::BI__iso_volatile_store64:
5719 case Builtin::BI__builtin_ptrauth_sign_constant:
5722 case Builtin::BI__builtin_ptrauth_auth:
5723 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5724 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5725 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5726 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5727 case Builtin::BI__builtin_ptrauth_strip: {
5730 for (
auto argExpr :
E->arguments())
5734 llvm::Type *OrigValueType = Args[0]->getType();
5735 if (OrigValueType->isPointerTy())
5738 switch (BuiltinID) {
5739 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5740 if (Args[4]->getType()->isPointerTy())
5744 case Builtin::BI__builtin_ptrauth_auth:
5745 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5746 if (Args[2]->getType()->isPointerTy())
5750 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5751 if (Args[1]->getType()->isPointerTy())
5755 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5756 case Builtin::BI__builtin_ptrauth_strip:
5761 auto IntrinsicID = [&]() ->
unsigned {
5762 switch (BuiltinID) {
5763 case Builtin::BI__builtin_ptrauth_auth:
5764 return llvm::Intrinsic::ptrauth_auth;
5765 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5766 return llvm::Intrinsic::ptrauth_resign;
5767 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5768 return llvm::Intrinsic::ptrauth_blend;
5769 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5770 return llvm::Intrinsic::ptrauth_sign_generic;
5771 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5772 return llvm::Intrinsic::ptrauth_sign;
5773 case Builtin::BI__builtin_ptrauth_strip:
5774 return llvm::Intrinsic::ptrauth_strip;
5776 llvm_unreachable(
"bad ptrauth intrinsic");
5781 if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&
5782 BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&
5783 OrigValueType->isPointerTy()) {
5789 case Builtin::BI__exception_code:
5790 case Builtin::BI_exception_code:
5792 case Builtin::BI__exception_info:
5793 case Builtin::BI_exception_info:
5795 case Builtin::BI__abnormal_termination:
5796 case Builtin::BI_abnormal_termination:
5798 case Builtin::BI_setjmpex:
5799 if (
getTarget().getTriple().isOSMSVCRT() &&
E->getNumArgs() == 1 &&
5803 case Builtin::BI_setjmp:
5804 if (
getTarget().getTriple().isOSMSVCRT() &&
E->getNumArgs() == 1 &&
5806 if (
getTarget().getTriple().getArch() == llvm::Triple::x86)
5808 else if (
getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5815 case Builtin::BImove:
5816 case Builtin::BImove_if_noexcept:
5817 case Builtin::BIforward:
5818 case Builtin::BIforward_like:
5819 case Builtin::BIas_const:
5821 case Builtin::BI__GetExceptionInfo: {
5822 if (llvm::GlobalVariable *GV =
5828 case Builtin::BI__fastfail:
5831 case Builtin::BI__builtin_coro_id:
5833 case Builtin::BI__builtin_coro_promise:
5835 case Builtin::BI__builtin_coro_resume:
5838 case Builtin::BI__builtin_coro_frame:
5840 case Builtin::BI__builtin_coro_noop:
5842 case Builtin::BI__builtin_coro_free:
5844 case Builtin::BI__builtin_coro_destroy:
5847 case Builtin::BI__builtin_coro_done:
5849 case Builtin::BI__builtin_coro_alloc:
5851 case Builtin::BI__builtin_coro_begin:
5853 case Builtin::BI__builtin_coro_end:
5855 case Builtin::BI__builtin_coro_suspend:
5857 case Builtin::BI__builtin_coro_size:
5859 case Builtin::BI__builtin_coro_align:
5863 case Builtin::BIread_pipe:
5864 case Builtin::BIwrite_pipe: {
5868 Value *PacketSize = OpenCLRT.getPipeElemSize(
E->getArg(0));
5869 Value *PacketAlign = OpenCLRT.getPipeElemAlign(
E->getArg(0));
5872 unsigned GenericAS =
5874 llvm::Type *I8PTy = llvm::PointerType::get(
getLLVMContext(), GenericAS);
5877 if (2U ==
E->getNumArgs()) {
5878 const char *Name = (BuiltinID == Builtin::BIread_pipe) ?
"__read_pipe_2"
5883 llvm::FunctionType *FTy = llvm::FunctionType::get(
5888 {Arg0, ACast, PacketSize, PacketAlign}));
5890 assert(4 ==
E->getNumArgs() &&
5891 "Illegal number of parameters to pipe function");
5892 const char *Name = (BuiltinID == Builtin::BIread_pipe) ?
"__read_pipe_4"
5899 llvm::FunctionType *FTy = llvm::FunctionType::get(
5908 {Arg0, Arg1, Arg2, ACast, PacketSize, PacketAlign}));
5913 case Builtin::BIreserve_read_pipe:
5914 case Builtin::BIreserve_write_pipe:
5915 case Builtin::BIwork_group_reserve_read_pipe:
5916 case Builtin::BIwork_group_reserve_write_pipe:
5917 case Builtin::BIsub_group_reserve_read_pipe:
5918 case Builtin::BIsub_group_reserve_write_pipe: {
5921 if (BuiltinID == Builtin::BIreserve_read_pipe)
5922 Name =
"__reserve_read_pipe";
5923 else if (BuiltinID == Builtin::BIreserve_write_pipe)
5924 Name =
"__reserve_write_pipe";
5925 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
5926 Name =
"__work_group_reserve_read_pipe";
5927 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
5928 Name =
"__work_group_reserve_write_pipe";
5929 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
5930 Name =
"__sub_group_reserve_read_pipe";
5932 Name =
"__sub_group_reserve_write_pipe";
5938 Value *PacketSize = OpenCLRT.getPipeElemSize(
E->getArg(0));
5939 Value *PacketAlign = OpenCLRT.getPipeElemAlign(
E->getArg(0));
5943 llvm::FunctionType *FTy = llvm::FunctionType::get(
5950 {Arg0, Arg1, PacketSize, PacketAlign}));
5954 case Builtin::BIcommit_read_pipe:
5955 case Builtin::BIcommit_write_pipe:
5956 case Builtin::BIwork_group_commit_read_pipe:
5957 case Builtin::BIwork_group_commit_write_pipe:
5958 case Builtin::BIsub_group_commit_read_pipe:
5959 case Builtin::BIsub_group_commit_write_pipe: {
5961 if (BuiltinID == Builtin::BIcommit_read_pipe)
5962 Name =
"__commit_read_pipe";
5963 else if (BuiltinID == Builtin::BIcommit_write_pipe)
5964 Name =
"__commit_write_pipe";
5965 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
5966 Name =
"__work_group_commit_read_pipe";
5967 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
5968 Name =
"__work_group_commit_write_pipe";
5969 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
5970 Name =
"__sub_group_commit_read_pipe";
5972 Name =
"__sub_group_commit_write_pipe";
5977 Value *PacketSize = OpenCLRT.getPipeElemSize(
E->getArg(0));
5978 Value *PacketAlign = OpenCLRT.getPipeElemAlign(
E->getArg(0));
5982 llvm::FunctionType *FTy =
5987 {Arg0, Arg1, PacketSize, PacketAlign}));
5990 case Builtin::BIget_pipe_num_packets:
5991 case Builtin::BIget_pipe_max_packets: {
5992 const char *BaseName;
5994 if (BuiltinID == Builtin::BIget_pipe_num_packets)
5995 BaseName =
"__get_pipe_num_packets";
5997 BaseName =
"__get_pipe_max_packets";
5998 std::string Name = std::string(BaseName) +
5999 std::string(PipeTy->isReadOnly() ?
"_ro" :
"_wo");
6004 Value *PacketSize = OpenCLRT.getPipeElemSize(
E->getArg(0));
6005 Value *PacketAlign = OpenCLRT.getPipeElemAlign(
E->getArg(0));
6007 llvm::FunctionType *FTy = llvm::FunctionType::get(
6011 {Arg0, PacketSize, PacketAlign}));
6015 case Builtin::BIto_global:
6016 case Builtin::BIto_local:
6017 case Builtin::BIto_private: {
6019 auto NewArgT = llvm::PointerType::get(
6022 auto NewRetT = llvm::PointerType::get(
6026 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT},
false);
6027 llvm::Value *NewArg;
6028 if (Arg0->
getType()->getPointerAddressSpace() !=
6029 NewArgT->getPointerAddressSpace())
6032 NewArg =
Builder.CreateBitOrPointerCast(Arg0, NewArgT);
6033 auto NewName = std::string(
"__") +
E->getDirectCallee()->getName().str();
6048 case Builtin::BIenqueue_kernel: {
6050 unsigned NumArgs =
E->getNumArgs();
6053 llvm::Type *GenericVoidPtrTy =
Builder.getPtrTy(
6065 Name =
"__enqueue_kernel_basic";
6066 llvm::Type *ArgTys[] = {QueueTy,
Int32Ty, RangeTy, GenericVoidPtrTy,
6068 llvm::FunctionType *FTy = llvm::FunctionType::get(
6074 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6075 llvm::Value *
Block =
6076 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6079 {Queue, Flags, Range, Kernel, Block});
6082 assert(NumArgs >= 5 &&
"Invalid enqueue_kernel signature");
6086 auto CreateArrayForSizeVar = [=](
unsigned First)
6087 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
6088 llvm::APInt ArraySize(32, NumArgs -
First);
6090 getContext().getSizeType(), ArraySize,
nullptr,
6094 llvm::Value *TmpPtr = Tmp.getPointer();
6099 llvm::Value *Alloca = TmpPtr->stripPointerCasts();
6102 llvm::Value *ElemPtr;
6105 auto *
Zero = llvm::ConstantInt::get(
IntTy, 0);
6106 for (
unsigned I =
First; I < NumArgs; ++I) {
6107 auto *Index = llvm::ConstantInt::get(
IntTy, I -
First);
6119 return std::tie(ElemPtr, TmpSize, Alloca);
6125 Name =
"__enqueue_kernel_varargs";
6129 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6130 auto *
Block =
Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6131 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
6132 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
6136 llvm::Value *
const Args[] = {Queue, Flags,
6140 llvm::Type *
const ArgTys[] = {
6141 QueueTy,
IntTy, RangeTy, GenericVoidPtrTy,
6142 GenericVoidPtrTy,
IntTy, ElemPtr->getType()};
6144 llvm::FunctionType *FTy = llvm::FunctionType::get(
Int32Ty, ArgTys,
false);
6153 llvm::PointerType *PtrTy = llvm::PointerType::get(
6157 llvm::Value *NumEvents =
6163 llvm::Value *EventWaitList =
nullptr;
6166 EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
6173 EventWaitList =
Builder.CreatePointerCast(EventWaitList, PtrTy);
6175 llvm::Value *EventRet =
nullptr;
6178 EventRet = llvm::ConstantPointerNull::get(PtrTy);
6187 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6188 llvm::Value *
Block =
6189 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6191 std::vector<llvm::Type *> ArgTys = {
6193 PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
6195 std::vector<llvm::Value *> Args = {Queue, Flags,
Range,
6196 NumEvents, EventWaitList, EventRet,
6201 Name =
"__enqueue_kernel_basic_events";
6202 llvm::FunctionType *FTy = llvm::FunctionType::get(
6210 Args.push_back(ConstantInt::get(
Int32Ty, NumArgs - 7));
6212 Name =
"__enqueue_kernel_events_varargs";
6214 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
6215 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
6216 Args.push_back(ElemPtr);
6217 ArgTys.push_back(ElemPtr->getType());
6219 llvm::FunctionType *FTy = llvm::FunctionType::get(
6228 llvm_unreachable(
"Unexpected enqueue_kernel signature");
6232 case Builtin::BIget_kernel_work_group_size: {
6233 llvm::Type *GenericVoidPtrTy =
Builder.getPtrTy(
6238 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6239 Value *Arg =
Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6242 llvm::FunctionType::get(
IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
6244 "__get_kernel_work_group_size_impl"),
6247 case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
6248 llvm::Type *GenericVoidPtrTy =
Builder.getPtrTy(
6253 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6254 Value *Arg =
Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6257 llvm::FunctionType::get(
IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
6259 "__get_kernel_preferred_work_group_size_multiple_impl"),
6262 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
6263 case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
6264 llvm::Type *GenericVoidPtrTy =
Builder.getPtrTy(
6271 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6274 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
6275 ?
"__get_kernel_max_sub_group_size_for_ndrange_impl"
6276 :
"__get_kernel_sub_group_count_for_ndrange_impl";
6279 llvm::FunctionType::get(
6280 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
6283 {NDRange, Kernel, Block}));
6285 case Builtin::BI__builtin_store_half:
6286 case Builtin::BI__builtin_store_halff: {
6293 case Builtin::BI__builtin_load_half: {
6298 case Builtin::BI__builtin_load_halff: {
6303 case Builtin::BI__builtin_printf:
6304 case Builtin::BIprintf:
6305 if (
getTarget().getTriple().isNVPTX() ||
6308 getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) {
6311 if ((
getTarget().getTriple().isAMDGCN() ||
6318 case Builtin::BI__builtin_canonicalize:
6319 case Builtin::BI__builtin_canonicalizef:
6320 case Builtin::BI__builtin_canonicalizef16:
6321 case Builtin::BI__builtin_canonicalizel:
6323 emitBuiltinWithOneOverloadedType<1>(*
this,
E, Intrinsic::canonicalize));
6325 case Builtin::BI__builtin_thread_pointer: {
6326 if (!
getContext().getTargetInfo().isTLSSupported())
6331 case Builtin::BI__builtin_os_log_format:
6334 case Builtin::BI__xray_customevent: {
6347 auto FTy = F->getFunctionType();
6348 auto Arg0 =
E->getArg(0);
6350 auto Arg0Ty = Arg0->
getType();
6351 auto PTy0 = FTy->getParamType(0);
6352 if (PTy0 != Arg0Val->getType()) {
6353 if (Arg0Ty->isArrayType())
6356 Arg0Val =
Builder.CreatePointerCast(Arg0Val, PTy0);
6359 auto PTy1 = FTy->getParamType(1);
6361 Arg1 =
Builder.CreateTruncOrBitCast(Arg1, PTy1);
6365 case Builtin::BI__xray_typedevent: {
6381 auto FTy = F->getFunctionType();
6383 auto PTy0 = FTy->getParamType(0);
6385 Arg0 =
Builder.CreateTruncOrBitCast(Arg0, PTy0);
6386 auto Arg1 =
E->getArg(1);
6388 auto Arg1Ty = Arg1->
getType();
6389 auto PTy1 = FTy->getParamType(1);
6390 if (PTy1 != Arg1Val->getType()) {
6391 if (Arg1Ty->isArrayType())
6394 Arg1Val =
Builder.CreatePointerCast(Arg1Val, PTy1);
6397 auto PTy2 = FTy->getParamType(2);
6399 Arg2 =
Builder.CreateTruncOrBitCast(Arg2, PTy2);
6403 case Builtin::BI__builtin_ms_va_start:
6404 case Builtin::BI__builtin_ms_va_end:
6407 BuiltinID == Builtin::BI__builtin_ms_va_start));
6409 case Builtin::BI__builtin_ms_va_copy: {
6426 case Builtin::BI__builtin_get_device_side_mangled_name: {
6454 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
6458 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
6460 llvm::Triple::getArchTypePrefix(
getTarget().getTriple().getArch());
6461 if (!Prefix.empty()) {
6462 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
6463 if (IntrinsicID == Intrinsic::not_intrinsic && Prefix ==
"spv" &&
6464 getTarget().getTriple().getOS() == llvm::Triple::OSType::AMDHSA)
6465 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(
"amdgcn", Name);
6469 if (IntrinsicID == Intrinsic::not_intrinsic)
6470 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
6473 if (IntrinsicID != Intrinsic::not_intrinsic) {
6478 unsigned ICEArguments = 0;
6484 llvm::FunctionType *FTy = F->getFunctionType();
6486 for (
unsigned i = 0, e =
E->getNumArgs(); i != e; ++i) {
6490 llvm::Type *PTy = FTy->getParamType(i);
6491 if (PTy != ArgValue->
getType()) {
6493 if (
auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
6494 if (PtrTy->getAddressSpace() !=
6495 ArgValue->
getType()->getPointerAddressSpace()) {
6498 PtrTy->getAddressSpace()));
6504 if (PTy->isX86_AMXTy())
6505 ArgValue =
Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
6506 {ArgValue->
getType()}, {ArgValue});
6508 ArgValue =
Builder.CreateBitCast(ArgValue, PTy);
6511 Args.push_back(ArgValue);
6517 llvm::Type *RetTy =
VoidTy;
6521 if (RetTy !=
V->getType()) {
6523 if (
auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
6524 if (PtrTy->getAddressSpace() !=
V->getType()->getPointerAddressSpace()) {
6527 PtrTy->getAddressSpace()));
6533 if (
V->getType()->isX86_AMXTy())
6534 V =
Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
6540 if (RetTy->isVoidTy())
6560 if (
V->getType()->isVoidTy())
6567 llvm_unreachable(
"No current target builtin returns complex");
6569 llvm_unreachable(
"Bad evaluation kind in EmitBuiltinExpr");
6576 if (
V->getType()->isVoidTy())
6583 llvm_unreachable(
"No current hlsl builtin returns complex");
6585 llvm_unreachable(
"Bad evaluation kind in EmitBuiltinExpr");
6600 llvm::Triple::ArchType Arch) {
6612 case llvm::Triple::arm:
6613 case llvm::Triple::armeb:
6614 case llvm::Triple::thumb:
6615 case llvm::Triple::thumbeb:
6617 case llvm::Triple::aarch64:
6618 case llvm::Triple::aarch64_32:
6619 case llvm::Triple::aarch64_be:
6621 case llvm::Triple::bpfeb:
6622 case llvm::Triple::bpfel:
6624 case llvm::Triple::x86:
6625 case llvm::Triple::x86_64:
6627 case llvm::Triple::ppc:
6628 case llvm::Triple::ppcle:
6629 case llvm::Triple::ppc64:
6630 case llvm::Triple::ppc64le:
6632 case llvm::Triple::r600:
6633 case llvm::Triple::amdgcn:
6635 case llvm::Triple::systemz:
6637 case llvm::Triple::nvptx:
6638 case llvm::Triple::nvptx64:
6640 case llvm::Triple::wasm32:
6641 case llvm::Triple::wasm64:
6643 case llvm::Triple::hexagon:
6645 case llvm::Triple::riscv32:
6646 case llvm::Triple::riscv64:
6648 case llvm::Triple::spirv:
6650 case llvm::Triple::spirv64:
6663 assert(
getContext().getAuxTargetInfo() &&
"Missing aux target info");
6675 bool HasLegalHalfType =
true,
6677 bool AllowBFloatArgsAndRet =
true) {
6678 int IsQuad = TypeFlags.
isQuad();
6682 return llvm::FixedVectorType::get(CGF->
Int8Ty, V1Ty ? 1 : (8 << IsQuad));
6685 return llvm::FixedVectorType::get(CGF->
Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6687 if (AllowBFloatArgsAndRet)
6688 return llvm::FixedVectorType::get(CGF->
BFloatTy, V1Ty ? 1 : (4 << IsQuad));
6690 return llvm::FixedVectorType::get(CGF->
Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6692 if (HasLegalHalfType)
6693 return llvm::FixedVectorType::get(CGF->
HalfTy, V1Ty ? 1 : (4 << IsQuad));
6695 return llvm::FixedVectorType::get(CGF->
Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6697 return llvm::FixedVectorType::get(CGF->
Int32Ty, V1Ty ? 1 : (2 << IsQuad));
6700 return llvm::FixedVectorType::get(CGF->
Int64Ty, V1Ty ? 1 : (1 << IsQuad));
6705 return llvm::FixedVectorType::get(CGF->
Int8Ty, 16);
6707 return llvm::FixedVectorType::get(CGF->
FloatTy, V1Ty ? 1 : (2 << IsQuad));
6709 return llvm::FixedVectorType::get(CGF->
DoubleTy, V1Ty ? 1 : (1 << IsQuad));
6711 llvm_unreachable(
"Unknown vector element type!");
6716 int IsQuad = IntTypeFlags.
isQuad();
6719 return llvm::FixedVectorType::get(CGF->
HalfTy, (4 << IsQuad));
6721 return llvm::FixedVectorType::get(CGF->
FloatTy, (2 << IsQuad));
6723 return llvm::FixedVectorType::get(CGF->
DoubleTy, (1 << IsQuad));
6725 llvm_unreachable(
"Type can't be converted to floating-point!");
6730 const ElementCount &Count) {
6731 Value *SV = llvm::ConstantVector::getSplat(Count,
C);
6732 return Builder.CreateShuffleVector(
V,
V, SV,
"lane");
6736 ElementCount EC = cast<llvm::VectorType>(
V->getType())->getElementCount();
6742 unsigned shift,
bool rightshift) {
6744 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6745 ai != ae; ++ai, ++j) {
6746 if (F->isConstrainedFPIntrinsic())
6747 if (ai->getType()->isMetadataTy())
6749 if (shift > 0 && shift == j)
6752 Ops[j] =
Builder.CreateBitCast(Ops[j], ai->getType(), name);
6755 if (F->isConstrainedFPIntrinsic())
6756 return Builder.CreateConstrainedFPCall(F, Ops, name);
6758 return Builder.CreateCall(F, Ops, name);
6763 int SV = cast<ConstantInt>(
V)->getSExtValue();
6764 return ConstantInt::get(Ty, neg ? -SV : SV);
6769 llvm::Type *Ty,
bool usgn,
6771 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6773 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
6774 int EltSize = VTy->getScalarSizeInBits();
6776 Vec =
Builder.CreateBitCast(Vec, Ty);
6780 if (ShiftAmt == EltSize) {
6783 return llvm::ConstantAggregateZero::get(VTy);
6788 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
6794 return Builder.CreateLShr(Vec, Shift, name);
6796 return Builder.CreateAShr(Vec, Shift, name);
6822struct ARMVectorIntrinsicInfo {
6823 const char *NameHint;
6825 unsigned LLVMIntrinsic;
6826 unsigned AltLLVMIntrinsic;
6829 bool operator<(
unsigned RHSBuiltinID)
const {
6830 return BuiltinID < RHSBuiltinID;
6832 bool operator<(
const ARMVectorIntrinsicInfo &TE)
const {
6833 return BuiltinID < TE.BuiltinID;
6838#define NEONMAP0(NameBase) \
6839 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6841#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6842 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6843 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6845#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6846 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6847 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6851 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
6858 NEONMAP1(vabs_v, arm_neon_vabs, 0),
6859 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
6863 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
6864 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
6865 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
6866 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
6867 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
6868 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
6869 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
6870 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
6871 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
6884 NEONMAP1(vcage_v, arm_neon_vacge, 0),
6885 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
6886 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
6887 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
6888 NEONMAP1(vcale_v, arm_neon_vacge, 0),
6889 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
6890 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
6891 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
6908 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
6911 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
6913 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6914 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6915 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6916 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6917 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6918 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6919 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6920 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6921 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6928 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
6929 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
6930 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
6931 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
6932 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
6933 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
6934 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
6935 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
6936 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
6937 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
6938 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
6939 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
6940 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
6941 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
6942 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
6943 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
6944 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
6945 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
6946 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
6947 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
6948 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
6949 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
6950 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
6951 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
6952 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
6953 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
6954 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
6955 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
6956 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
6957 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
6958 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
6959 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
6960 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
6961 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
6962 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
6963 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
6964 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
6965 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
6966 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
6967 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
6968 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
6969 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
6970 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
6971 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
6972 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
6973 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
6974 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
6975 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
6976 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
6980 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6981 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6982 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6983 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6984 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6985 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6986 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6987 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6988 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6995 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
6996 NEONMAP1(vdot_u32, arm_neon_udot, 0),
6997 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
6998 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
7008 NEONMAP1(vld1_v, arm_neon_vld1, 0),
7009 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
7010 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
7011 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
7013 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
7014 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
7015 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
7016 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
7017 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
7018 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
7019 NEONMAP1(vld2_v, arm_neon_vld2, 0),
7020 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
7021 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
7022 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
7023 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
7024 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
7025 NEONMAP1(vld3_v, arm_neon_vld3, 0),
7026 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
7027 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
7028 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
7029 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
7030 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
7031 NEONMAP1(vld4_v, arm_neon_vld4, 0),
7032 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
7033 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
7034 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
7043 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
7044 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
7062 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
7063 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
7087 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
7088 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
7092 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
7093 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
7116 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
7117 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
7121 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
7122 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
7123 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
7124 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
7125 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
7126 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
7135 NEONMAP1(vst1_v, arm_neon_vst1, 0),
7136 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
7137 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
7138 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
7139 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
7140 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
7141 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
7142 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
7143 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
7144 NEONMAP1(vst2_v, arm_neon_vst2, 0),
7145 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
7146 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
7147 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
7148 NEONMAP1(vst3_v, arm_neon_vst3, 0),
7149 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
7150 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
7151 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
7152 NEONMAP1(vst4_v, arm_neon_vst4, 0),
7153 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
7154 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
7160 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
7161 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
7162 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
7174 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
7175 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
7180 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
7181 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
7182 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
7183 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
7192 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
7193 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
7194 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
7195 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
7196 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
7207 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
7208 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
7209 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
7210 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
7211 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
7212 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
7213 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
7214 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
7251 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
7254 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
7256 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
7257 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
7258 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7259 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7260 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
7261 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
7262 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
7263 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
7264 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
7265 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
7271 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
7272 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
7273 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7274 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7275 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
7276 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
7277 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
7278 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
7279 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
7280 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
7282 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
7283 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
7284 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
7285 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
7298 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
7299 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
7300 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
7301 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
7302 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
7303 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
7304 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
7305 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
7310 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
7311 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
7312 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
7313 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
7314 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
7315 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
7316 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
7317 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
7330 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
7331 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
7332 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
7333 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7335 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
7336 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7351 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7352 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7354 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7355 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7363 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
7364 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
7368 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
7369 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7370 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7397 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7398 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7402 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
7403 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
7404 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
7405 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
7406 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
7407 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
7408 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
7409 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
7410 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
7411 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
7420 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
7421 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
7422 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
7423 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
7424 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
7425 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
7426 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
7427 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
7428 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
7429 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
7430 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
7431 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
7432 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
7433 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
7434 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
7438 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
7439 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
7440 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
7441 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
7498 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
7519 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
7547 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
7628 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
7629 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
7630 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
7631 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
7685 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
7686 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
7687 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
7688 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
7689 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
7690 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
7691 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
7692 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7693 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
7694 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
7695 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
7696 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
7697 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
7698 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7699 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7700 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7701 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7702 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7703 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7704 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7705 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7706 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7707 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7708 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7709 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7710 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7711 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7712 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7713 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7714 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7715 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7716 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7717 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7718 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7719 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7720 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7721 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7722 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7723 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7724 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7725 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7726 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7727 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7728 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7729 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7730 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7731 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7732 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7733 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7734 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7735 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7736 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7737 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7738 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7739 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7740 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7741 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7742 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7743 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7744 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7745 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7746 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7747 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7748 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7749 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7750 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7751 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7752 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7753 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7754 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7755 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7756 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7757 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7758 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7759 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7760 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7761 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7762 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7763 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7764 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7765 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7766 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7767 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7768 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7769 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7770 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7771 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7772 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7773 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7774 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7775 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7776 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7777 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7778 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7779 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7780 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7781 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7782 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7783 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7784 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7785 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7786 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7787 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7788 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7789 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7790 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7791 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7792 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7793 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7794 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7795 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7796 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7797 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7798 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7799 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7800 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7801 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7802 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7803 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7804 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7805 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7806 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7807 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7808 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7809 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7810 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7811 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7812 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7816 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7817 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7818 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7819 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7820 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7821 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7822 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7823 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7824 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7825 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7826 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7827 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7834#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7836 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7840#define SVEMAP2(NameBase, TypeModifier) \
7841 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7843#define GET_SVE_LLVM_INTRINSIC_MAP
7844#include "clang/Basic/arm_sve_builtin_cg.inc"
7845#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7846#undef GET_SVE_LLVM_INTRINSIC_MAP
7852#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7854 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7858#define SMEMAP2(NameBase, TypeModifier) \
7859 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
7861#define GET_SME_LLVM_INTRINSIC_MAP
7862#include "clang/Basic/arm_sme_builtin_cg.inc"
7863#undef GET_SME_LLVM_INTRINSIC_MAP
7876static const ARMVectorIntrinsicInfo *
7878 unsigned BuiltinID,
bool &MapProvenSorted) {
7881 if (!MapProvenSorted) {
7882 assert(llvm::is_sorted(IntrinsicMap));
7883 MapProvenSorted =
true;
7887 const ARMVectorIntrinsicInfo *Builtin =
7888 llvm::lower_bound(IntrinsicMap, BuiltinID);
7890 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
7898 llvm::Type *ArgType,
7911 Ty = llvm::FixedVectorType::get(
7912 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
7919 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
7920 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
7924 Tys.push_back(ArgType);
7927 Tys.push_back(ArgType);
7938 unsigned BuiltinID = SISDInfo.BuiltinID;
7939 unsigned int Int = SISDInfo.LLVMIntrinsic;
7940 unsigned Modifier = SISDInfo.TypeModifier;
7941 const char *
s = SISDInfo.NameHint;
7943 switch (BuiltinID) {
7944 case NEON::BI__builtin_neon_vcled_s64:
7945 case NEON::BI__builtin_neon_vcled_u64:
7946 case NEON::BI__builtin_neon_vcles_f32:
7947 case NEON::BI__builtin_neon_vcled_f64:
7948 case NEON::BI__builtin_neon_vcltd_s64:
7949 case NEON::BI__builtin_neon_vcltd_u64:
7950 case NEON::BI__builtin_neon_vclts_f32:
7951 case NEON::BI__builtin_neon_vcltd_f64:
7952 case NEON::BI__builtin_neon_vcales_f32:
7953 case NEON::BI__builtin_neon_vcaled_f64:
7954 case NEON::BI__builtin_neon_vcalts_f32:
7955 case NEON::BI__builtin_neon_vcaltd_f64:
7959 std::swap(Ops[0], Ops[1]);
7963 assert(Int &&
"Generic code assumes a valid intrinsic");
7966 const Expr *Arg =
E->getArg(0);
7971 ConstantInt *C0 = ConstantInt::get(CGF.
SizeTy, 0);
7972 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
7973 ai != ae; ++ai, ++j) {
7974 llvm::Type *ArgTy = ai->getType();
7975 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
7976 ArgTy->getPrimitiveSizeInBits())
7979 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
7982 Ops[j] = CGF.
Builder.CreateTruncOrBitCast(
7983 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
7985 CGF.
Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
7990 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
7991 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
7998 unsigned BuiltinID,
unsigned LLVMIntrinsic,
unsigned AltLLVMIntrinsic,
7999 const char *NameHint,
unsigned Modifier,
const CallExpr *
E,
8001 llvm::Triple::ArchType Arch) {
8003 const Expr *Arg =
E->getArg(
E->getNumArgs() - 1);
8004 std::optional<llvm::APSInt> NeonTypeConst =
8011 bool Usgn =
Type.isUnsigned();
8012 bool Quad =
Type.isQuad();
8014 const bool AllowBFloatArgsAndRet =
8017 llvm::FixedVectorType *VTy =
8018 GetNeonType(
this,
Type, HasLegalHalfType,
false, AllowBFloatArgsAndRet);
8019 llvm::Type *Ty = VTy;
8023 auto getAlignmentValue32 = [&](
Address addr) ->
Value* {
8024 return Builder.getInt32(addr.getAlignment().getQuantity());
8027 unsigned Int = LLVMIntrinsic;
8029 Int = AltLLVMIntrinsic;
8031 switch (BuiltinID) {
8033 case NEON::BI__builtin_neon_splat_lane_v:
8034 case NEON::BI__builtin_neon_splat_laneq_v:
8035 case NEON::BI__builtin_neon_splatq_lane_v:
8036 case NEON::BI__builtin_neon_splatq_laneq_v: {
8037 auto NumElements = VTy->getElementCount();
8038 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
8039 NumElements = NumElements * 2;
8040 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
8041 NumElements = NumElements.divideCoefficientBy(2);
8043 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
8044 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
8046 case NEON::BI__builtin_neon_vpadd_v:
8047 case NEON::BI__builtin_neon_vpaddq_v:
8049 if (VTy->getElementType()->isFloatingPointTy() &&
8050 Int == Intrinsic::aarch64_neon_addp)
8051 Int = Intrinsic::aarch64_neon_faddp;
8053 case NEON::BI__builtin_neon_vabs_v:
8054 case NEON::BI__builtin_neon_vabsq_v:
8055 if (VTy->getElementType()->isFloatingPointTy())
8058 case NEON::BI__builtin_neon_vadd_v:
8059 case NEON::BI__builtin_neon_vaddq_v: {
8060 llvm::Type *VTy = llvm::FixedVectorType::get(
Int8Ty, Quad ? 16 : 8);
8061 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
8062 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
8063 Ops[0] =
Builder.CreateXor(Ops[0], Ops[1]);
8064 return Builder.CreateBitCast(Ops[0], Ty);
8066 case NEON::BI__builtin_neon_vaddhn_v: {
8067 llvm::FixedVectorType *SrcTy =
8068 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8071 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
8072 Ops[1] =
Builder.CreateBitCast(Ops[1], SrcTy);
8073 Ops[0] =
Builder.CreateAdd(Ops[0], Ops[1],
"vaddhn");
8076 Constant *ShiftAmt =
8077 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8078 Ops[0] =
Builder.CreateLShr(Ops[0], ShiftAmt,
"vaddhn");
8081 return Builder.CreateTrunc(Ops[0], VTy,
"vaddhn");
8083 case NEON::BI__builtin_neon_vcale_v:
8084 case NEON::BI__builtin_neon_vcaleq_v:
8085 case NEON::BI__builtin_neon_vcalt_v:
8086 case NEON::BI__builtin_neon_vcaltq_v:
8087 std::swap(Ops[0], Ops[1]);
8089 case NEON::BI__builtin_neon_vcage_v:
8090 case NEON::BI__builtin_neon_vcageq_v:
8091 case NEON::BI__builtin_neon_vcagt_v:
8092 case NEON::BI__builtin_neon_vcagtq_v: {
8094 switch (VTy->getScalarSizeInBits()) {
8095 default: llvm_unreachable(
"unexpected type");
8106 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
8107 llvm::Type *Tys[] = { VTy, VecFlt };
8111 case NEON::BI__builtin_neon_vceqz_v:
8112 case NEON::BI__builtin_neon_vceqzq_v:
8114 ICmpInst::ICMP_EQ,
"vceqz");
8115 case NEON::BI__builtin_neon_vcgez_v:
8116 case NEON::BI__builtin_neon_vcgezq_v:
8118 ICmpInst::ICMP_SGE,
"vcgez");
8119 case NEON::BI__builtin_neon_vclez_v:
8120 case NEON::BI__builtin_neon_vclezq_v:
8122 ICmpInst::ICMP_SLE,
"vclez");
8123 case NEON::BI__builtin_neon_vcgtz_v:
8124 case NEON::BI__builtin_neon_vcgtzq_v:
8126 ICmpInst::ICMP_SGT,
"vcgtz");
8127 case NEON::BI__builtin_neon_vcltz_v:
8128 case NEON::BI__builtin_neon_vcltzq_v:
8130 ICmpInst::ICMP_SLT,
"vcltz");
8131 case NEON::BI__builtin_neon_vclz_v:
8132 case NEON::BI__builtin_neon_vclzq_v:
8137 case NEON::BI__builtin_neon_vcvt_f32_v:
8138 case NEON::BI__builtin_neon_vcvtq_f32_v:
8139 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
8142 return Usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
8143 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
8144 case NEON::BI__builtin_neon_vcvt_f16_s16:
8145 case NEON::BI__builtin_neon_vcvt_f16_u16:
8146 case NEON::BI__builtin_neon_vcvtq_f16_s16:
8147 case NEON::BI__builtin_neon_vcvtq_f16_u16:
8148 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
8151 return Usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
8152 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
8153 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
8154 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
8155 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
8156 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
8161 case NEON::BI__builtin_neon_vcvt_n_f32_v:
8162 case NEON::BI__builtin_neon_vcvt_n_f64_v:
8163 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
8164 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
8166 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
8170 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
8171 case NEON::BI__builtin_neon_vcvt_n_s32_v:
8172 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
8173 case NEON::BI__builtin_neon_vcvt_n_u32_v:
8174 case NEON::BI__builtin_neon_vcvt_n_s64_v:
8175 case NEON::BI__builtin_neon_vcvt_n_u64_v:
8176 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
8177 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
8178 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
8179 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
8180 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
8181 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
8186 case NEON::BI__builtin_neon_vcvt_s32_v:
8187 case NEON::BI__builtin_neon_vcvt_u32_v:
8188 case NEON::BI__builtin_neon_vcvt_s64_v:
8189 case NEON::BI__builtin_neon_vcvt_u64_v:
8190 case NEON::BI__builtin_neon_vcvt_s16_f16:
8191 case NEON::BI__builtin_neon_vcvt_u16_f16:
8192 case NEON::BI__builtin_neon_vcvtq_s32_v:
8193 case NEON::BI__builtin_neon_vcvtq_u32_v:
8194 case NEON::BI__builtin_neon_vcvtq_s64_v:
8195 case NEON::BI__builtin_neon_vcvtq_u64_v:
8196 case NEON::BI__builtin_neon_vcvtq_s16_f16:
8197 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
8199 return Usgn ?
Builder.CreateFPToUI(Ops[0], Ty,
"vcvt")
8200 :
Builder.CreateFPToSI(Ops[0], Ty,
"vcvt");
8202 case NEON::BI__builtin_neon_vcvta_s16_f16:
8203 case NEON::BI__builtin_neon_vcvta_s32_v:
8204 case NEON::BI__builtin_neon_vcvta_s64_v:
8205 case NEON::BI__builtin_neon_vcvta_u16_f16:
8206 case NEON::BI__builtin_neon_vcvta_u32_v:
8207 case NEON::BI__builtin_neon_vcvta_u64_v:
8208 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
8209 case NEON::BI__builtin_neon_vcvtaq_s32_v:
8210 case NEON::BI__builtin_neon_vcvtaq_s64_v:
8211 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
8212 case NEON::BI__builtin_neon_vcvtaq_u32_v:
8213 case NEON::BI__builtin_neon_vcvtaq_u64_v:
8214 case NEON::BI__builtin_neon_vcvtn_s16_f16:
8215 case NEON::BI__builtin_neon_vcvtn_s32_v:
8216 case NEON::BI__builtin_neon_vcvtn_s64_v:
8217 case NEON::BI__builtin_neon_vcvtn_u16_f16:
8218 case NEON::BI__builtin_neon_vcvtn_u32_v:
8219 case NEON::BI__builtin_neon_vcvtn_u64_v:
8220 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
8221 case NEON::BI__builtin_neon_vcvtnq_s32_v:
8222 case NEON::BI__builtin_neon_vcvtnq_s64_v:
8223 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
8224 case NEON::BI__builtin_neon_vcvtnq_u32_v:
8225 case NEON::BI__builtin_neon_vcvtnq_u64_v:
8226 case NEON::BI__builtin_neon_vcvtp_s16_f16:
8227 case NEON::BI__builtin_neon_vcvtp_s32_v:
8228 case NEON::BI__builtin_neon_vcvtp_s64_v:
8229 case NEON::BI__builtin_neon_vcvtp_u16_f16:
8230 case NEON::BI__builtin_neon_vcvtp_u32_v:
8231 case NEON::BI__builtin_neon_vcvtp_u64_v:
8232 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
8233 case NEON::BI__builtin_neon_vcvtpq_s32_v:
8234 case NEON::BI__builtin_neon_vcvtpq_s64_v:
8235 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
8236 case NEON::BI__builtin_neon_vcvtpq_u32_v:
8237 case NEON::BI__builtin_neon_vcvtpq_u64_v:
8238 case NEON::BI__builtin_neon_vcvtm_s16_f16:
8239 case NEON::BI__builtin_neon_vcvtm_s32_v:
8240 case NEON::BI__builtin_neon_vcvtm_s64_v:
8241 case NEON::BI__builtin_neon_vcvtm_u16_f16:
8242 case NEON::BI__builtin_neon_vcvtm_u32_v:
8243 case NEON::BI__builtin_neon_vcvtm_u64_v:
8244 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
8245 case NEON::BI__builtin_neon_vcvtmq_s32_v:
8246 case NEON::BI__builtin_neon_vcvtmq_s64_v:
8247 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
8248 case NEON::BI__builtin_neon_vcvtmq_u32_v:
8249 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
8253 case NEON::BI__builtin_neon_vcvtx_f32_v: {
8254 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
8258 case NEON::BI__builtin_neon_vext_v:
8259 case NEON::BI__builtin_neon_vextq_v: {
8260 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
8262 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8263 Indices.push_back(i+CV);
8265 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
8266 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
8267 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices,
"vext");
8269 case NEON::BI__builtin_neon_vfma_v:
8270 case NEON::BI__builtin_neon_vfmaq_v: {
8271 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
8272 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
8273 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
8277 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
8278 {Ops[1], Ops[2], Ops[0]});
8280 case NEON::BI__builtin_neon_vld1_v:
8281 case NEON::BI__builtin_neon_vld1q_v: {
8283 Ops.push_back(getAlignmentValue32(PtrOp0));
8286 case NEON::BI__builtin_neon_vld1_x2_v:
8287 case NEON::BI__builtin_neon_vld1q_x2_v:
8288 case NEON::BI__builtin_neon_vld1_x3_v:
8289 case NEON::BI__builtin_neon_vld1q_x3_v:
8290 case NEON::BI__builtin_neon_vld1_x4_v:
8291 case NEON::BI__builtin_neon_vld1q_x4_v: {
8294 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld1xN");
8297 case NEON::BI__builtin_neon_vld2_v:
8298 case NEON::BI__builtin_neon_vld2q_v:
8299 case NEON::BI__builtin_neon_vld3_v:
8300 case NEON::BI__builtin_neon_vld3q_v:
8301 case NEON::BI__builtin_neon_vld4_v:
8302 case NEON::BI__builtin_neon_vld4q_v:
8303 case NEON::BI__builtin_neon_vld2_dup_v:
8304 case NEON::BI__builtin_neon_vld2q_dup_v:
8305 case NEON::BI__builtin_neon_vld3_dup_v:
8306 case NEON::BI__builtin_neon_vld3q_dup_v:
8307 case NEON::BI__builtin_neon_vld4_dup_v:
8308 case NEON::BI__builtin_neon_vld4q_dup_v: {
8311 Value *Align = getAlignmentValue32(PtrOp1);
8312 Ops[1] =
Builder.CreateCall(F, {Ops[1], Align}, NameHint);
8315 case NEON::BI__builtin_neon_vld1_dup_v:
8316 case NEON::BI__builtin_neon_vld1q_dup_v: {
8317 Value *
V = PoisonValue::get(Ty);
8320 llvm::Constant *CI = ConstantInt::get(
SizeTy, 0);
8321 Ops[0] =
Builder.CreateInsertElement(
V, Ld, CI);
8324 case NEON::BI__builtin_neon_vld2_lane_v:
8325 case NEON::BI__builtin_neon_vld2q_lane_v:
8326 case NEON::BI__builtin_neon_vld3_lane_v:
8327 case NEON::BI__builtin_neon_vld3q_lane_v:
8328 case NEON::BI__builtin_neon_vld4_lane_v:
8329 case NEON::BI__builtin_neon_vld4q_lane_v: {
8332 for (
unsigned I = 2; I < Ops.size() - 1; ++I)
8333 Ops[I] =
Builder.CreateBitCast(Ops[I], Ty);
8334 Ops.push_back(getAlignmentValue32(PtrOp1));
8338 case NEON::BI__builtin_neon_vmovl_v: {
8339 llvm::FixedVectorType *DTy =
8340 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8341 Ops[0] =
Builder.CreateBitCast(Ops[0], DTy);
8343 return Builder.CreateZExt(Ops[0], Ty,
"vmovl");
8344 return Builder.CreateSExt(Ops[0], Ty,
"vmovl");
8346 case NEON::BI__builtin_neon_vmovn_v: {
8347 llvm::FixedVectorType *QTy =
8348 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8349 Ops[0] =
Builder.CreateBitCast(Ops[0], QTy);
8350 return Builder.CreateTrunc(Ops[0], Ty,
"vmovn");
8352 case NEON::BI__builtin_neon_vmull_v:
8358 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
8361 case NEON::BI__builtin_neon_vpadal_v:
8362 case NEON::BI__builtin_neon_vpadalq_v: {
8364 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8368 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8369 llvm::Type *Tys[2] = { Ty, NarrowTy };
8372 case NEON::BI__builtin_neon_vpaddl_v:
8373 case NEON::BI__builtin_neon_vpaddlq_v: {
8375 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8376 llvm::Type *EltTy = llvm::IntegerType::get(
getLLVMContext(), EltBits / 2);
8378 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8379 llvm::Type *Tys[2] = { Ty, NarrowTy };
8382 case NEON::BI__builtin_neon_vqdmlal_v:
8383 case NEON::BI__builtin_neon_vqdmlsl_v: {
8390 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
8391 case NEON::BI__builtin_neon_vqdmulh_lane_v:
8392 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
8393 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
8394 auto *RTy = cast<llvm::FixedVectorType>(Ty);
8395 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
8396 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
8397 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
8398 RTy->getNumElements() * 2);
8399 llvm::Type *Tys[2] = {
8404 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
8405 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
8406 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
8407 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
8408 llvm::Type *Tys[2] = {
8413 case NEON::BI__builtin_neon_vqshl_n_v:
8414 case NEON::BI__builtin_neon_vqshlq_n_v:
8417 case NEON::BI__builtin_neon_vqshlu_n_v:
8418 case NEON::BI__builtin_neon_vqshluq_n_v:
8421 case NEON::BI__builtin_neon_vrecpe_v:
8422 case NEON::BI__builtin_neon_vrecpeq_v:
8423 case NEON::BI__builtin_neon_vrsqrte_v:
8424 case NEON::BI__builtin_neon_vrsqrteq_v:
8425 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
8427 case NEON::BI__builtin_neon_vrndi_v:
8428 case NEON::BI__builtin_neon_vrndiq_v:
8430 ? Intrinsic::experimental_constrained_nearbyint
8431 : Intrinsic::nearbyint;
8433 case NEON::BI__builtin_neon_vrshr_n_v:
8434 case NEON::BI__builtin_neon_vrshrq_n_v:
8437 case NEON::BI__builtin_neon_vsha512hq_u64:
8438 case NEON::BI__builtin_neon_vsha512h2q_u64:
8439 case NEON::BI__builtin_neon_vsha512su0q_u64:
8440 case NEON::BI__builtin_neon_vsha512su1q_u64: {
8444 case NEON::BI__builtin_neon_vshl_n_v:
8445 case NEON::BI__builtin_neon_vshlq_n_v:
8447 return Builder.CreateShl(
Builder.CreateBitCast(Ops[0],Ty), Ops[1],
8449 case NEON::BI__builtin_neon_vshll_n_v: {
8450 llvm::FixedVectorType *SrcTy =
8451 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8452 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
8454 Ops[0] =
Builder.CreateZExt(Ops[0], VTy);
8456 Ops[0] =
Builder.CreateSExt(Ops[0], VTy);
8458 return Builder.CreateShl(Ops[0], Ops[1],
"vshll_n");
8460 case NEON::BI__builtin_neon_vshrn_n_v: {
8461 llvm::FixedVectorType *SrcTy =
8462 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8463 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
8466 Ops[0] =
Builder.CreateLShr(Ops[0], Ops[1]);
8468 Ops[0] =
Builder.CreateAShr(Ops[0], Ops[1]);
8469 return Builder.CreateTrunc(Ops[0], Ty,
"vshrn_n");
8471 case NEON::BI__builtin_neon_vshr_n_v:
8472 case NEON::BI__builtin_neon_vshrq_n_v:
8474 case NEON::BI__builtin_neon_vst1_v:
8475 case NEON::BI__builtin_neon_vst1q_v:
8476 case NEON::BI__builtin_neon_vst2_v:
8477 case NEON::BI__builtin_neon_vst2q_v:
8478 case NEON::BI__builtin_neon_vst3_v:
8479 case NEON::BI__builtin_neon_vst3q_v:
8480 case NEON::BI__builtin_neon_vst4_v:
8481 case NEON::BI__builtin_neon_vst4q_v:
8482 case NEON::BI__builtin_neon_vst2_lane_v:
8483 case NEON::BI__builtin_neon_vst2q_lane_v:
8484 case NEON::BI__builtin_neon_vst3_lane_v:
8485 case NEON::BI__builtin_neon_vst3q_lane_v:
8486 case NEON::BI__builtin_neon_vst4_lane_v:
8487 case NEON::BI__builtin_neon_vst4q_lane_v: {
8489 Ops.push_back(getAlignmentValue32(PtrOp0));
8492 case NEON::BI__builtin_neon_vsm3partw1q_u32:
8493 case NEON::BI__builtin_neon_vsm3partw2q_u32:
8494 case NEON::BI__builtin_neon_vsm3ss1q_u32:
8495 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
8496 case NEON::BI__builtin_neon_vsm4eq_u32: {
8500 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
8501 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
8502 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
8503 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
8508 case NEON::BI__builtin_neon_vst1_x2_v:
8509 case NEON::BI__builtin_neon_vst1q_x2_v:
8510 case NEON::BI__builtin_neon_vst1_x3_v:
8511 case NEON::BI__builtin_neon_vst1q_x3_v:
8512 case NEON::BI__builtin_neon_vst1_x4_v:
8513 case NEON::BI__builtin_neon_vst1q_x4_v: {
8516 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
8517 Arch == llvm::Triple::aarch64_32) {
8519 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
8525 case NEON::BI__builtin_neon_vsubhn_v: {
8526 llvm::FixedVectorType *SrcTy =
8527 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8530 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
8531 Ops[1] =
Builder.CreateBitCast(Ops[1], SrcTy);
8532 Ops[0] =
Builder.CreateSub(Ops[0], Ops[1],
"vsubhn");
8535 Constant *ShiftAmt =
8536 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8537 Ops[0] =
Builder.CreateLShr(Ops[0], ShiftAmt,
"vsubhn");
8540 return Builder.CreateTrunc(Ops[0], VTy,
"vsubhn");
8542 case NEON::BI__builtin_neon_vtrn_v:
8543 case NEON::BI__builtin_neon_vtrnq_v: {
8544 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
8545 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
8546 Value *SV =
nullptr;
8548 for (
unsigned vi = 0; vi != 2; ++vi) {
8550 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8551 Indices.push_back(i+vi);
8552 Indices.push_back(i+e+vi);
8554 Value *Addr =
Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8555 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vtrn");
8560 case NEON::BI__builtin_neon_vtst_v:
8561 case NEON::BI__builtin_neon_vtstq_v: {
8562 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
8563 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
8564 Ops[0] =
Builder.CreateAnd(Ops[0], Ops[1]);
8565 Ops[0] =
Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
8566 ConstantAggregateZero::get(Ty));
8567 return Builder.CreateSExt(Ops[0], Ty,
"vtst");
8569 case NEON::BI__builtin_neon_vuzp_v:
8570 case NEON::BI__builtin_neon_vuzpq_v: {
8571 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
8572 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
8573 Value *SV =
nullptr;
8575 for (
unsigned vi = 0; vi != 2; ++vi) {
8577 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8578 Indices.push_back(2*i+vi);
8580 Value *Addr =
Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8581 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vuzp");
8586 case NEON::BI__builtin_neon_vxarq_u64: {
8591 case NEON::BI__builtin_neon_vzip_v:
8592 case NEON::BI__builtin_neon_vzipq_v: {
8593 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
8594 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
8595 Value *SV =
nullptr;
8597 for (
unsigned vi = 0; vi != 2; ++vi) {
8599 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8600 Indices.push_back((i + vi*e) >> 1);
8601 Indices.push_back(((i + vi*e) >> 1)+e);
8603 Value *Addr =
Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8604 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vzip");
8609 case NEON::BI__builtin_neon_vdot_s32:
8610 case NEON::BI__builtin_neon_vdot_u32:
8611 case NEON::BI__builtin_neon_vdotq_s32:
8612 case NEON::BI__builtin_neon_vdotq_u32: {
8614 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8615 llvm::Type *Tys[2] = { Ty, InputTy };
8618 case NEON::BI__builtin_neon_vfmlal_low_f16:
8619 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
8621 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8622 llvm::Type *Tys[2] = { Ty, InputTy };
8625 case NEON::BI__builtin_neon_vfmlsl_low_f16:
8626 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
8628 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8629 llvm::Type *Tys[2] = { Ty, InputTy };
8632 case NEON::BI__builtin_neon_vfmlal_high_f16:
8633 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
8635 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8636 llvm::Type *Tys[2] = { Ty, InputTy };
8639 case NEON::BI__builtin_neon_vfmlsl_high_f16:
8640 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
8642 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8643 llvm::Type *Tys[2] = { Ty, InputTy };
8646 case NEON::BI__builtin_neon_vmmlaq_s32:
8647 case NEON::BI__builtin_neon_vmmlaq_u32: {
8649 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8650 llvm::Type *Tys[2] = { Ty, InputTy };
8653 case NEON::BI__builtin_neon_vusmmlaq_s32: {
8655 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8656 llvm::Type *Tys[2] = { Ty, InputTy };
8659 case NEON::BI__builtin_neon_vusdot_s32:
8660 case NEON::BI__builtin_neon_vusdotq_s32: {
8662 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8663 llvm::Type *Tys[2] = { Ty, InputTy };
8666 case NEON::BI__builtin_neon_vbfdot_f32:
8667 case NEON::BI__builtin_neon_vbfdotq_f32: {
8668 llvm::Type *InputTy =
8669 llvm::FixedVectorType::get(
BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
8670 llvm::Type *Tys[2] = { Ty, InputTy };
8673 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
8674 llvm::Type *Tys[1] = { Ty };
8681 assert(Int &&
"Expected valid intrinsic number");
8694 Value *Op, llvm::Type *Ty,
const CmpInst::Predicate Fp,
8695 const CmpInst::Predicate Ip,
const Twine &Name) {
8696 llvm::Type *OTy = Op->
getType();
8702 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8703 OTy = BI->getOperand(0)->getType();
8705 Op =
Builder.CreateBitCast(Op, OTy);
8706 if (OTy->getScalarType()->isFloatingPointTy()) {
8707 if (Fp == CmpInst::FCMP_OEQ)
8708 Op =
Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8710 Op =
Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8712 Op =
Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8714 return Builder.CreateSExt(Op, Ty, Name);
8719 llvm::Type *ResTy,
unsigned IntID,
8723 TblOps.push_back(ExtOp);
8727 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
8728 for (
unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
8729 Indices.push_back(2*i);
8730 Indices.push_back(2*i+1);
8733 int PairPos = 0, End = Ops.size() - 1;
8734 while (PairPos < End) {
8735 TblOps.push_back(CGF.
Builder.CreateShuffleVector(Ops[PairPos],
8736 Ops[PairPos+1], Indices,
8743 if (PairPos == End) {
8744 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
8745 TblOps.push_back(CGF.
Builder.CreateShuffleVector(Ops[PairPos],
8746 ZeroTbl, Indices, Name));
8750 TblOps.push_back(IndexOp);
8756Value *CodeGenFunction::GetValueForARMHint(
unsigned BuiltinID) {
8758 switch (BuiltinID) {
8761 case clang::ARM::BI__builtin_arm_nop:
8764 case clang::ARM::BI__builtin_arm_yield:
8765 case clang::ARM::BI__yield:
8768 case clang::ARM::BI__builtin_arm_wfe:
8769 case clang::ARM::BI__wfe:
8772 case clang::ARM::BI__builtin_arm_wfi:
8773 case clang::ARM::BI__wfi:
8776 case clang::ARM::BI__builtin_arm_sev:
8777 case clang::ARM::BI__sev:
8780 case clang::ARM::BI__builtin_arm_sevl:
8781 case clang::ARM::BI__sevl:
8800 llvm::Type *ValueType,
bool isExecHi) {
8805 llvm::Value *
Call = Builder.CreateCall(F, {Builder.getInt1(
true)});
8808 Value *Rt2 = Builder.CreateLShr(
Call, 32);
8809 Rt2 = Builder.CreateTrunc(Rt2, CGF.
Int32Ty);
8822 llvm::Type *ValueType,
8824 StringRef SysReg =
"") {
8828 "Unsupported size for register.");
8834 if (SysReg.empty()) {
8836 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
8839 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
8840 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
8841 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
8845 bool MixedTypes =
RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
8846 assert(!(
RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8847 &&
"Can't fit 64-bit value in 32-bit register");
8849 if (AccessKind !=
Write) {
8852 AccessKind ==
VolatileRead ? llvm::Intrinsic::read_volatile_register
8853 : llvm::Intrinsic::read_register,
8855 llvm::Value *
Call = Builder.CreateCall(F, Metadata);
8859 return Builder.CreateTrunc(
Call, ValueType);
8861 if (ValueType->isPointerTy())
8863 return Builder.CreateIntToPtr(
Call, ValueType);
8868 llvm::Function *F = CGM.
getIntrinsic(llvm::Intrinsic::write_register, Types);
8873 return Builder.CreateCall(F, { Metadata, ArgValue });
8876 if (ValueType->isPointerTy()) {
8878 ArgValue = Builder.CreatePtrToInt(ArgValue,
RegisterType);
8879 return Builder.CreateCall(F, { Metadata, ArgValue });
8882 return Builder.CreateCall(F, { Metadata, ArgValue });
8888 switch (BuiltinID) {
8890 case NEON::BI__builtin_neon_vget_lane_i8:
8891 case NEON::BI__builtin_neon_vget_lane_i16:
8892 case NEON::BI__builtin_neon_vget_lane_bf16:
8893 case NEON::BI__builtin_neon_vget_lane_i32:
8894 case NEON::BI__builtin_neon_vget_lane_i64:
8895 case NEON::BI__builtin_neon_vget_lane_f32:
8896 case NEON::BI__builtin_neon_vgetq_lane_i8:
8897 case NEON::BI__builtin_neon_vgetq_lane_i16:
8898 case NEON::BI__builtin_neon_vgetq_lane_bf16:
8899 case NEON::BI__builtin_neon_vgetq_lane_i32:
8900 case NEON::BI__builtin_neon_vgetq_lane_i64:
8901 case NEON::BI__builtin_neon_vgetq_lane_f32:
8902 case NEON::BI__builtin_neon_vduph_lane_bf16:
8903 case NEON::BI__builtin_neon_vduph_laneq_bf16:
8904 case NEON::BI__builtin_neon_vset_lane_i8:
8905 case NEON::BI__builtin_neon_vset_lane_i16:
8906 case NEON::BI__builtin_neon_vset_lane_bf16:
8907 case NEON::BI__builtin_neon_vset_lane_i32:
8908 case NEON::BI__builtin_neon_vset_lane_i64:
8909 case NEON::BI__builtin_neon_vset_lane_f32:
8910 case NEON::BI__builtin_neon_vsetq_lane_i8:
8911 case NEON::BI__builtin_neon_vsetq_lane_i16:
8912 case NEON::BI__builtin_neon_vsetq_lane_bf16:
8913 case NEON::BI__builtin_neon_vsetq_lane_i32:
8914 case NEON::BI__builtin_neon_vsetq_lane_i64:
8915 case NEON::BI__builtin_neon_vsetq_lane_f32:
8916 case NEON::BI__builtin_neon_vsha1h_u32:
8917 case NEON::BI__builtin_neon_vsha1cq_u32:
8918 case NEON::BI__builtin_neon_vsha1pq_u32:
8919 case NEON::BI__builtin_neon_vsha1mq_u32:
8920 case NEON::BI__builtin_neon_vcvth_bf16_f32:
8921 case clang::ARM::BI_MoveToCoprocessor:
8922 case clang::ARM::BI_MoveToCoprocessor2:
8931 llvm::Triple::ArchType Arch) {
8932 if (
auto Hint = GetValueForARMHint(BuiltinID))
8935 if (BuiltinID == clang::ARM::BI__emit) {
8937 llvm::FunctionType *FTy =
8938 llvm::FunctionType::get(
VoidTy,
false);
8942 llvm_unreachable(
"Sema will ensure that the parameter is constant");
8945 uint64_t ZExtValue =
Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
8947 llvm::InlineAsm *Emit =
8948 IsThumb ? InlineAsm::get(FTy,
".inst.n 0x" + utohexstr(ZExtValue),
"",
8950 : InlineAsm::get(FTy,
".inst 0x" + utohexstr(ZExtValue),
"",
8953 return Builder.CreateCall(Emit);
8956 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
8961 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
8973 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
8976 CGM.
getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
8979 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
8980 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
8984 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
8990 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
8994 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
9000 if (BuiltinID == clang::ARM::BI__clear_cache) {
9001 assert(
E->getNumArgs() == 2 &&
"__clear_cache takes 2 arguments");
9004 for (
unsigned i = 0; i < 2; i++)
9007 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
9008 StringRef Name = FD->
getName();
9012 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
9013 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
9016 switch (BuiltinID) {
9017 default: llvm_unreachable(
"unexpected builtin");
9018 case clang::ARM::BI__builtin_arm_mcrr:
9021 case clang::ARM::BI__builtin_arm_mcrr2:
9043 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
9046 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
9047 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
9050 switch (BuiltinID) {
9051 default: llvm_unreachable(
"unexpected builtin");
9052 case clang::ARM::BI__builtin_arm_mrrc:
9055 case clang::ARM::BI__builtin_arm_mrrc2:
9063 Value *RtAndRt2 =
Builder.CreateCall(F, {Coproc, Opc1, CRm});
9073 Value *ShiftCast = llvm::ConstantInt::get(
Int64Ty, 32);
9074 RtAndRt2 =
Builder.CreateShl(Rt, ShiftCast,
"shl",
true);
9075 RtAndRt2 =
Builder.CreateOr(RtAndRt2, Rt1);
9080 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
9081 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
9082 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
9084 BuiltinID == clang::ARM::BI__ldrexd) {
9087 switch (BuiltinID) {
9088 default: llvm_unreachable(
"unexpected builtin");
9089 case clang::ARM::BI__builtin_arm_ldaex:
9092 case clang::ARM::BI__builtin_arm_ldrexd:
9093 case clang::ARM::BI__builtin_arm_ldrex:
9094 case clang::ARM::BI__ldrexd:
9108 Val =
Builder.CreateShl(Val0, ShiftCst,
"shl",
true );
9109 Val =
Builder.CreateOr(Val, Val1);
9113 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
9114 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
9123 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
9124 : Intrinsic::arm_ldrex,
9126 CallInst *Val =
Builder.CreateCall(F, LoadAddr,
"ldrex");
9130 if (RealResTy->isPointerTy())
9131 return Builder.CreateIntToPtr(Val, RealResTy);
9133 llvm::Type *IntResTy = llvm::IntegerType::get(
9135 return Builder.CreateBitCast(
Builder.CreateTruncOrBitCast(Val, IntResTy),
9140 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
9141 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
9142 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
9145 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
9146 : Intrinsic::arm_strexd);
9159 return Builder.CreateCall(F, {Arg0, Arg1, StPtr},
"strexd");
9162 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
9163 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
9168 llvm::Type *StoreTy =
9171 if (StoreVal->
getType()->isPointerTy())
9174 llvm::Type *
IntTy = llvm::IntegerType::get(
9182 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
9183 : Intrinsic::arm_strex,
9186 CallInst *CI =
Builder.CreateCall(F, {StoreVal, StoreAddr},
"strex");
9188 1, Attribute::get(
getLLVMContext(), Attribute::ElementType, StoreTy));
9192 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
9198 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
9199 switch (BuiltinID) {
9200 case clang::ARM::BI__builtin_arm_crc32b:
9201 CRCIntrinsicID = Intrinsic::arm_crc32b;
break;
9202 case clang::ARM::BI__builtin_arm_crc32cb:
9203 CRCIntrinsicID = Intrinsic::arm_crc32cb;
break;
9204 case clang::ARM::BI__builtin_arm_crc32h:
9205 CRCIntrinsicID = Intrinsic::arm_crc32h;
break;
9206 case clang::ARM::BI__builtin_arm_crc32ch:
9207 CRCIntrinsicID = Intrinsic::arm_crc32ch;
break;
9208 case clang::ARM::BI__builtin_arm_crc32w:
9209 case clang::ARM::BI__builtin_arm_crc32d:
9210 CRCIntrinsicID = Intrinsic::arm_crc32w;
break;
9211 case clang::ARM::BI__builtin_arm_crc32cw:
9212 case clang::ARM::BI__builtin_arm_crc32cd:
9213 CRCIntrinsicID = Intrinsic::arm_crc32cw;
break;
9216 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
9222 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
9223 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
9231 return Builder.CreateCall(F, {Res, Arg1b});
9236 return Builder.CreateCall(F, {Arg0, Arg1});
9240 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
9241 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9242 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
9243 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
9244 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
9245 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
9248 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
9249 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9250 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
9253 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
9254 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
9256 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9257 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
9259 llvm::Type *ValueType;
9261 if (IsPointerBuiltin) {
9264 }
else if (Is64Bit) {
9274 if (BuiltinID == ARM::BI__builtin_sponentry) {
9293 return P.first == BuiltinID;
9296 BuiltinID = It->second;
9300 unsigned ICEArguments = 0;
9305 auto getAlignmentValue32 = [&](
Address addr) ->
Value* {
9306 return Builder.getInt32(addr.getAlignment().getQuantity());
9313 unsigned NumArgs =
E->getNumArgs() - (HasExtraArg ? 1 : 0);
9314 for (
unsigned i = 0, e = NumArgs; i != e; i++) {
9316 switch (BuiltinID) {
9317 case NEON::BI__builtin_neon_vld1_v:
9318 case NEON::BI__builtin_neon_vld1q_v:
9319 case NEON::BI__builtin_neon_vld1q_lane_v:
9320 case NEON::BI__builtin_neon_vld1_lane_v:
9321 case NEON::BI__builtin_neon_vld1_dup_v:
9322 case NEON::BI__builtin_neon_vld1q_dup_v:
9323 case NEON::BI__builtin_neon_vst1_v:
9324 case NEON::BI__builtin_neon_vst1q_v:
9325 case NEON::BI__builtin_neon_vst1q_lane_v:
9326 case NEON::BI__builtin_neon_vst1_lane_v:
9327 case NEON::BI__builtin_neon_vst2_v:
9328 case NEON::BI__builtin_neon_vst2q_v:
9329 case NEON::BI__builtin_neon_vst2_lane_v:
9330 case NEON::BI__builtin_neon_vst2q_lane_v:
9331 case NEON::BI__builtin_neon_vst3_v:
9332 case NEON::BI__builtin_neon_vst3q_v:
9333 case NEON::BI__builtin_neon_vst3_lane_v:
9334 case NEON::BI__builtin_neon_vst3q_lane_v:
9335 case NEON::BI__builtin_neon_vst4_v:
9336 case NEON::BI__builtin_neon_vst4q_v:
9337 case NEON::BI__builtin_neon_vst4_lane_v:
9338 case NEON::BI__builtin_neon_vst4q_lane_v:
9347 switch (BuiltinID) {
9348 case NEON::BI__builtin_neon_vld2_v:
9349 case NEON::BI__builtin_neon_vld2q_v:
9350 case NEON::BI__builtin_neon_vld3_v:
9351 case NEON::BI__builtin_neon_vld3q_v:
9352 case NEON::BI__builtin_neon_vld4_v:
9353 case NEON::BI__builtin_neon_vld4q_v:
9354 case NEON::BI__builtin_neon_vld2_lane_v:
9355 case NEON::BI__builtin_neon_vld2q_lane_v:
9356 case NEON::BI__builtin_neon_vld3_lane_v:
9357 case NEON::BI__builtin_neon_vld3q_lane_v:
9358 case NEON::BI__builtin_neon_vld4_lane_v:
9359 case NEON::BI__builtin_neon_vld4q_lane_v:
9360 case NEON::BI__builtin_neon_vld2_dup_v:
9361 case NEON::BI__builtin_neon_vld2q_dup_v:
9362 case NEON::BI__builtin_neon_vld3_dup_v:
9363 case NEON::BI__builtin_neon_vld3q_dup_v:
9364 case NEON::BI__builtin_neon_vld4_dup_v:
9365 case NEON::BI__builtin_neon_vld4q_dup_v:
9377 switch (BuiltinID) {
9380 case NEON::BI__builtin_neon_vget_lane_i8:
9381 case NEON::BI__builtin_neon_vget_lane_i16:
9382 case NEON::BI__builtin_neon_vget_lane_i32:
9383 case NEON::BI__builtin_neon_vget_lane_i64:
9384 case NEON::BI__builtin_neon_vget_lane_bf16:
9385 case NEON::BI__builtin_neon_vget_lane_f32:
9386 case NEON::BI__builtin_neon_vgetq_lane_i8:
9387 case NEON::BI__builtin_neon_vgetq_lane_i16:
9388 case NEON::BI__builtin_neon_vgetq_lane_i32:
9389 case NEON::BI__builtin_neon_vgetq_lane_i64:
9390 case NEON::BI__builtin_neon_vgetq_lane_bf16:
9391 case NEON::BI__builtin_neon_vgetq_lane_f32:
9392 case NEON::BI__builtin_neon_vduph_lane_bf16:
9393 case NEON::BI__builtin_neon_vduph_laneq_bf16:
9394 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
9396 case NEON::BI__builtin_neon_vrndns_f32: {
9398 llvm::Type *Tys[] = {Arg->
getType()};
9400 return Builder.CreateCall(F, {Arg},
"vrndn"); }
9402 case NEON::BI__builtin_neon_vset_lane_i8:
9403 case NEON::BI__builtin_neon_vset_lane_i16:
9404 case NEON::BI__builtin_neon_vset_lane_i32:
9405 case NEON::BI__builtin_neon_vset_lane_i64:
9406 case NEON::BI__builtin_neon_vset_lane_bf16:
9407 case NEON::BI__builtin_neon_vset_lane_f32:
9408 case NEON::BI__builtin_neon_vsetq_lane_i8:
9409 case NEON::BI__builtin_neon_vsetq_lane_i16:
9410 case NEON::BI__builtin_neon_vsetq_lane_i32:
9411 case NEON::BI__builtin_neon_vsetq_lane_i64:
9412 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9413 case NEON::BI__builtin_neon_vsetq_lane_f32:
9414 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
9416 case NEON::BI__builtin_neon_vsha1h_u32:
9419 case NEON::BI__builtin_neon_vsha1cq_u32:
9422 case NEON::BI__builtin_neon_vsha1pq_u32:
9425 case NEON::BI__builtin_neon_vsha1mq_u32:
9429 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
9436 case clang::ARM::BI_MoveToCoprocessor:
9437 case clang::ARM::BI_MoveToCoprocessor2: {
9439 ? Intrinsic::arm_mcr
9440 : Intrinsic::arm_mcr2);
9441 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
9442 Ops[3], Ops[4], Ops[5]});
9447 assert(HasExtraArg);
9448 const Expr *Arg =
E->getArg(
E->getNumArgs()-1);
9449 std::optional<llvm::APSInt>
Result =
9454 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
9455 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
9458 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
9464 bool usgn =
Result->getZExtValue() == 1;
9465 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
9469 return Builder.CreateCall(F, Ops,
"vcvtr");
9474 bool usgn =
Type.isUnsigned();
9475 bool rightShift =
false;
9477 llvm::FixedVectorType *VTy =
9480 llvm::Type *Ty = VTy;
9491 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
9492 Builtin->NameHint, Builtin->TypeModifier,
E, Ops, PtrOp0, PtrOp1, Arch);
9495 switch (BuiltinID) {
9496 default:
return nullptr;
9497 case NEON::BI__builtin_neon_vld1q_lane_v:
9500 if (VTy->getElementType()->isIntegerTy(64)) {
9502 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
9503 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
9504 Value *SV = llvm::ConstantVector::get(ConstantInt::get(
Int32Ty, 1-Lane));
9505 Ops[1] =
Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9507 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
9510 Value *Align = getAlignmentValue32(PtrOp0);
9513 int Indices[] = {1 - Lane, Lane};
9514 return Builder.CreateShuffleVector(Ops[1], Ld, Indices,
"vld1q_lane");
9517 case NEON::BI__builtin_neon_vld1_lane_v: {
9518 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
9521 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2],
"vld1_lane");
9523 case NEON::BI__builtin_neon_vqrshrn_n_v:
9525 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
9528 case NEON::BI__builtin_neon_vqrshrun_n_v:
9530 Ops,
"vqrshrun_n", 1,
true);
9531 case NEON::BI__builtin_neon_vqshrn_n_v:
9532 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
9535 case NEON::BI__builtin_neon_vqshrun_n_v:
9537 Ops,
"vqshrun_n", 1,
true);
9538 case NEON::BI__builtin_neon_vrecpe_v:
9539 case NEON::BI__builtin_neon_vrecpeq_v:
9542 case NEON::BI__builtin_neon_vrshrn_n_v:
9544 Ops,
"vrshrn_n", 1,
true);
9545 case NEON::BI__builtin_neon_vrsra_n_v:
9546 case NEON::BI__builtin_neon_vrsraq_n_v:
9547 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
9548 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
9550 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
9552 return Builder.CreateAdd(Ops[0], Ops[1],
"vrsra_n");
9553 case NEON::BI__builtin_neon_vsri_n_v:
9554 case NEON::BI__builtin_neon_vsriq_n_v:
9557 case NEON::BI__builtin_neon_vsli_n_v:
9558 case NEON::BI__builtin_neon_vsliq_n_v:
9562 case NEON::BI__builtin_neon_vsra_n_v:
9563 case NEON::BI__builtin_neon_vsraq_n_v:
9564 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
9566 return Builder.CreateAdd(Ops[0], Ops[1]);
9567 case NEON::BI__builtin_neon_vst1q_lane_v:
9570 if (VTy->getElementType()->isIntegerTy(64)) {
9571 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
9572 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
9573 Ops[1] =
Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9574 Ops[2] = getAlignmentValue32(PtrOp0);
9575 llvm::Type *Tys[] = {
Int8PtrTy, Ops[1]->getType()};
9580 case NEON::BI__builtin_neon_vst1_lane_v: {
9581 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
9582 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
9586 case NEON::BI__builtin_neon_vtbl1_v:
9589 case NEON::BI__builtin_neon_vtbl2_v:
9592 case NEON::BI__builtin_neon_vtbl3_v:
9595 case NEON::BI__builtin_neon_vtbl4_v:
9598 case NEON::BI__builtin_neon_vtbx1_v:
9601 case NEON::BI__builtin_neon_vtbx2_v:
9604 case NEON::BI__builtin_neon_vtbx3_v:
9607 case NEON::BI__builtin_neon_vtbx4_v:
9613template<
typename Integer>
9622 return Unsigned ? Builder.CreateZExt(
V,
T) : Builder.CreateSExt(
V,
T);
9632 unsigned LaneBits = cast<llvm::VectorType>(
V->getType())
9634 ->getPrimitiveSizeInBits();
9635 if (Shift == LaneBits) {
9640 return llvm::Constant::getNullValue(
V->getType());
9644 return Unsigned ? Builder.CreateLShr(
V, Shift) : Builder.CreateAShr(
V, Shift);
9651 unsigned Elements = 128 /
V->getType()->getPrimitiveSizeInBits();
9652 return Builder.CreateVectorSplat(Elements,
V);
9658 llvm::Type *DestType) {
9671 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
9672 return Builder.CreateCall(
9674 {DestType, V->getType()}),
9677 return Builder.CreateBitCast(
V, DestType);
9685 unsigned InputElements =
9686 cast<llvm::FixedVectorType>(
V->getType())->getNumElements();
9687 for (
unsigned i = 0; i < InputElements; i += 2)
9688 Indices.push_back(i + Odd);
9689 return Builder.CreateShuffleVector(
V, Indices);
9695 assert(V0->getType() == V1->getType() &&
"Can't zip different vector types");
9697 unsigned InputElements =
9698 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
9699 for (
unsigned i = 0; i < InputElements; i++) {
9700 Indices.push_back(i);
9701 Indices.push_back(i + InputElements);
9703 return Builder.CreateShuffleVector(V0, V1, Indices);
9706template<
unsigned HighBit,
unsigned OtherBits>
9710 llvm::Type *
T = cast<llvm::VectorType>(VT)->getElementType();
9711 unsigned LaneBits =
T->getPrimitiveSizeInBits();
9712 uint32_t
Value = HighBit << (LaneBits - 1);
9714 Value |= (1UL << (LaneBits - 1)) - 1;
9715 llvm::Value *Lane = llvm::ConstantInt::get(
T,
Value);
9721 unsigned ReverseWidth) {
9725 unsigned LaneSize =
V->getType()->getScalarSizeInBits();
9726 unsigned Elements = 128 / LaneSize;
9727 unsigned Mask = ReverseWidth / LaneSize - 1;
9728 for (
unsigned i = 0; i < Elements; i++)
9729 Indices.push_back(i ^ Mask);
9730 return Builder.CreateShuffleVector(
V, Indices);
9736 llvm::Triple::ArchType Arch) {
9737 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9738 Intrinsic::ID IRIntr;
9739 unsigned NumVectors;
9742 switch (BuiltinID) {
9743 #include "clang/Basic/arm_mve_builtin_cg.inc"
9754 switch (CustomCodeGenType) {
9756 case CustomCodeGen::VLD24: {
9762 assert(MvecLType->isStructTy() &&
9763 "Return type for vld[24]q should be a struct");
9764 assert(MvecLType->getStructNumElements() == 1 &&
9765 "Return-type struct for vld[24]q should have one element");
9766 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9767 assert(MvecLTypeInner->isArrayTy() &&
9768 "Return-type struct for vld[24]q should contain an array");
9769 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9770 "Array member of return-type struct vld[24]q has wrong length");
9771 auto VecLType = MvecLTypeInner->getArrayElementType();
9773 Tys.push_back(VecLType);
9775 auto Addr =
E->getArg(0);
9781 Value *MvecOut = PoisonValue::get(MvecLType);
9782 for (
unsigned i = 0; i < NumVectors; ++i) {
9783 Value *Vec =
Builder.CreateExtractValue(LoadResult, i);
9784 MvecOut =
Builder.CreateInsertValue(MvecOut, Vec, {0, i});
9793 case CustomCodeGen::VST24: {
9797 auto Addr =
E->getArg(0);
9801 auto MvecCType =
E->getArg(1)->
getType();
9803 assert(MvecLType->isStructTy() &&
"Data type for vst2q should be a struct");
9804 assert(MvecLType->getStructNumElements() == 1 &&
9805 "Data-type struct for vst2q should have one element");
9806 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9807 assert(MvecLTypeInner->isArrayTy() &&
9808 "Data-type struct for vst2q should contain an array");
9809 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9810 "Array member of return-type struct vld[24]q has wrong length");
9811 auto VecLType = MvecLTypeInner->getArrayElementType();
9813 Tys.push_back(VecLType);
9818 for (
unsigned i = 0; i < NumVectors; i++)
9819 Ops.push_back(
Builder.CreateExtractValue(Mvec, {0, i}));
9822 Value *ToReturn =
nullptr;
9823 for (
unsigned i = 0; i < NumVectors; i++) {
9824 Ops.push_back(llvm::ConstantInt::get(
Int32Ty, i));
9825 ToReturn =
Builder.CreateCall(F, Ops);
9831 llvm_unreachable(
"unknown custom codegen type.");
9837 llvm::Triple::ArchType Arch) {
9838 switch (BuiltinID) {
9841#include "clang/Basic/arm_cde_builtin_cg.inc"
9848 llvm::Triple::ArchType Arch) {
9849 unsigned int Int = 0;
9850 const char *
s =
nullptr;
9852 switch (BuiltinID) {
9855 case NEON::BI__builtin_neon_vtbl1_v:
9856 case NEON::BI__builtin_neon_vqtbl1_v:
9857 case NEON::BI__builtin_neon_vqtbl1q_v:
9858 case NEON::BI__builtin_neon_vtbl2_v:
9859 case NEON::BI__builtin_neon_vqtbl2_v:
9860 case NEON::BI__builtin_neon_vqtbl2q_v:
9861 case NEON::BI__builtin_neon_vtbl3_v:
9862 case NEON::BI__builtin_neon_vqtbl3_v:
9863 case NEON::BI__builtin_neon_vqtbl3q_v:
9864 case NEON::BI__builtin_neon_vtbl4_v:
9865 case NEON::BI__builtin_neon_vqtbl4_v:
9866 case NEON::BI__builtin_neon_vqtbl4q_v:
9868 case NEON::BI__builtin_neon_vtbx1_v:
9869 case NEON::BI__builtin_neon_vqtbx1_v:
9870 case NEON::BI__builtin_neon_vqtbx1q_v:
9871 case NEON::BI__builtin_neon_vtbx2_v:
9872 case NEON::BI__builtin_neon_vqtbx2_v:
9873 case NEON::BI__builtin_neon_vqtbx2q_v:
9874 case NEON::BI__builtin_neon_vtbx3_v:
9875 case NEON::BI__builtin_neon_vqtbx3_v:
9876 case NEON::BI__builtin_neon_vqtbx3q_v:
9877 case NEON::BI__builtin_neon_vtbx4_v:
9878 case NEON::BI__builtin_neon_vqtbx4_v:
9879 case NEON::BI__builtin_neon_vqtbx4q_v:
9883 assert(
E->getNumArgs() >= 3);
9886 const Expr *Arg =
E->getArg(
E->getNumArgs() - 1);
9887 std::optional<llvm::APSInt>
Result =
9902 switch (BuiltinID) {
9903 case NEON::BI__builtin_neon_vtbl1_v: {
9905 Ty, Intrinsic::aarch64_neon_tbl1,
"vtbl1");
9907 case NEON::BI__builtin_neon_vtbl2_v: {
9909 Ty, Intrinsic::aarch64_neon_tbl1,
"vtbl1");
9911 case NEON::BI__builtin_neon_vtbl3_v: {
9913 Ty, Intrinsic::aarch64_neon_tbl2,
"vtbl2");
9915 case NEON::BI__builtin_neon_vtbl4_v: {
9917 Ty, Intrinsic::aarch64_neon_tbl2,
"vtbl2");
9919 case NEON::BI__builtin_neon_vtbx1_v: {
9922 Intrinsic::aarch64_neon_tbl1,
"vtbl1");
9924 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
9925 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
9926 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9928 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9929 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9930 return Builder.CreateOr(EltsFromInput, EltsFromTbl,
"vtbx");
9932 case NEON::BI__builtin_neon_vtbx2_v: {
9934 Ty, Intrinsic::aarch64_neon_tbx1,
"vtbx1");
9936 case NEON::BI__builtin_neon_vtbx3_v: {
9939 Intrinsic::aarch64_neon_tbl2,
"vtbl2");
9941 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
9942 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
9944 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9946 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9947 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9948 return Builder.CreateOr(EltsFromInput, EltsFromTbl,
"vtbx");
9950 case NEON::BI__builtin_neon_vtbx4_v: {
9952 Ty, Intrinsic::aarch64_neon_tbx2,
"vtbx2");
9954 case NEON::BI__builtin_neon_vqtbl1_v:
9955 case NEON::BI__builtin_neon_vqtbl1q_v:
9956 Int = Intrinsic::aarch64_neon_tbl1;
s =
"vtbl1";
break;
9957 case NEON::BI__builtin_neon_vqtbl2_v:
9958 case NEON::BI__builtin_neon_vqtbl2q_v: {
9959 Int = Intrinsic::aarch64_neon_tbl2;
s =
"vtbl2";
break;
9960 case NEON::BI__builtin_neon_vqtbl3_v:
9961 case NEON::BI__builtin_neon_vqtbl3q_v:
9962 Int = Intrinsic::aarch64_neon_tbl3;
s =
"vtbl3";
break;
9963 case NEON::BI__builtin_neon_vqtbl4_v:
9964 case NEON::BI__builtin_neon_vqtbl4q_v:
9965 Int = Intrinsic::aarch64_neon_tbl4;
s =
"vtbl4";
break;
9966 case NEON::BI__builtin_neon_vqtbx1_v:
9967 case NEON::BI__builtin_neon_vqtbx1q_v:
9968 Int = Intrinsic::aarch64_neon_tbx1;
s =
"vtbx1";
break;
9969 case NEON::BI__builtin_neon_vqtbx2_v:
9970 case NEON::BI__builtin_neon_vqtbx2q_v:
9971 Int = Intrinsic::aarch64_neon_tbx2;
s =
"vtbx2";
break;
9972 case NEON::BI__builtin_neon_vqtbx3_v:
9973 case NEON::BI__builtin_neon_vqtbx3q_v:
9974 Int = Intrinsic::aarch64_neon_tbx3;
s =
"vtbx3";
break;
9975 case NEON::BI__builtin_neon_vqtbx4_v:
9976 case NEON::BI__builtin_neon_vqtbx4q_v:
9977 Int = Intrinsic::aarch64_neon_tbx4;
s =
"vtbx4";
break;
9989 auto *VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
9991 Value *
V = PoisonValue::get(VTy);
9992 llvm::Constant *CI = ConstantInt::get(
SizeTy, 0);
9993 Op =
Builder.CreateInsertElement(
V, Op, CI);
10002 case SVETypeFlags::MemEltTyDefault:
10004 case SVETypeFlags::MemEltTyInt8:
10006 case SVETypeFlags::MemEltTyInt16:
10008 case SVETypeFlags::MemEltTyInt32:
10010 case SVETypeFlags::MemEltTyInt64:
10013 llvm_unreachable(
"Unknown MemEltType");
10019 llvm_unreachable(
"Invalid SVETypeFlag!");
10021 case SVETypeFlags::EltTyInt8:
10023 case SVETypeFlags::EltTyInt16:
10025 case SVETypeFlags::EltTyInt32:
10027 case SVETypeFlags::EltTyInt64:
10029 case SVETypeFlags::EltTyInt128:
10030 return Builder.getInt128Ty();
10032 case SVETypeFlags::EltTyFloat16:
10034 case SVETypeFlags::EltTyFloat32:
10036 case SVETypeFlags::EltTyFloat64:
10037 return Builder.getDoubleTy();
10039 case SVETypeFlags::EltTyBFloat16:
10040 return Builder.getBFloatTy();
10042 case SVETypeFlags::EltTyBool8:
10043 case SVETypeFlags::EltTyBool16:
10044 case SVETypeFlags::EltTyBool32:
10045 case SVETypeFlags::EltTyBool64:
10052llvm::ScalableVectorType *
10055 default: llvm_unreachable(
"Unhandled SVETypeFlag!");
10057 case SVETypeFlags::EltTyInt8:
10058 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 16);
10059 case SVETypeFlags::EltTyInt16:
10060 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 8);
10061 case SVETypeFlags::EltTyInt32:
10062 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 4);
10063 case SVETypeFlags::EltTyInt64:
10064 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 2);
10066 case SVETypeFlags::EltTyBFloat16:
10067 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 8);
10068 case SVETypeFlags::EltTyFloat16:
10069 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 8);
10070 case SVETypeFlags::EltTyFloat32:
10071 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 4);
10072 case SVETypeFlags::EltTyFloat64:
10073 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 2);
10075 case SVETypeFlags::EltTyBool8:
10076 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 16);
10077 case SVETypeFlags::EltTyBool16:
10078 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 8);
10079 case SVETypeFlags::EltTyBool32:
10080 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 4);
10081 case SVETypeFlags::EltTyBool64:
10082 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 2);
10087llvm::ScalableVectorType *
10091 llvm_unreachable(
"Invalid SVETypeFlag!");
10093 case SVETypeFlags::EltTyInt8:
10094 return llvm::ScalableVectorType::get(
Builder.getInt8Ty(), 16);
10095 case SVETypeFlags::EltTyInt16:
10096 return llvm::ScalableVectorType::get(
Builder.getInt16Ty(), 8);
10097 case SVETypeFlags::EltTyInt32:
10098 return llvm::ScalableVectorType::get(
Builder.getInt32Ty(), 4);
10099 case SVETypeFlags::EltTyInt64:
10100 return llvm::ScalableVectorType::get(
Builder.getInt64Ty(), 2);
10102 case SVETypeFlags::EltTyMFloat8:
10103 return llvm::ScalableVectorType::get(
Builder.getInt8Ty(), 16);
10104 case SVETypeFlags::EltTyFloat16:
10105 return llvm::ScalableVectorType::get(
Builder.getHalfTy(), 8);
10106 case SVETypeFlags::EltTyBFloat16:
10107 return llvm::ScalableVectorType::get(
Builder.getBFloatTy(), 8);
10108 case SVETypeFlags::EltTyFloat32:
10109 return llvm::ScalableVectorType::get(
Builder.getFloatTy(), 4);
10110 case SVETypeFlags::EltTyFloat64:
10111 return llvm::ScalableVectorType::get(
Builder.getDoubleTy(), 2);
10113 case SVETypeFlags::EltTyBool8:
10114 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 16);
10115 case SVETypeFlags::EltTyBool16:
10116 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 8);
10117 case SVETypeFlags::EltTyBool32:
10118 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 4);
10119 case SVETypeFlags::EltTyBool64:
10120 return llvm::ScalableVectorType::get(
Builder.getInt1Ty(), 2);
10135 return llvm::ScalableVectorType::get(EltTy, NumElts);
10141 llvm::ScalableVectorType *VTy) {
10143 if (isa<TargetExtType>(Pred->
getType()) &&
10144 cast<TargetExtType>(Pred->
getType())->getName() ==
"aarch64.svcount")
10147 auto *RTy = llvm::VectorType::get(IntegerType::get(
getLLVMContext(), 1), VTy);
10152 llvm::Type *IntrinsicTy;
10153 switch (VTy->getMinNumElements()) {
10155 llvm_unreachable(
"unsupported element count!");
10160 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
10164 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
10165 IntrinsicTy = Pred->
getType();
10171 assert(
C->getType() == RTy &&
"Unexpected return type!");
10176 llvm::StructType *Ty) {
10177 if (PredTuple->
getType() == Ty)
10180 Value *
Ret = llvm::PoisonValue::get(Ty);
10181 for (
unsigned I = 0; I < Ty->getNumElements(); ++I) {
10182 Value *Pred =
Builder.CreateExtractValue(PredTuple, I);
10184 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
10185 Ret =
Builder.CreateInsertValue(Ret, Pred, I);
10195 auto *OverloadedTy =
10199 if (Ops[1]->getType()->isVectorTy())
10219 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
10224 if (Ops.size() == 2) {
10225 assert(Ops[1]->getType()->isVectorTy() &&
"Scalar base requires an offset");
10226 Ops.push_back(ConstantInt::get(
Int64Ty, 0));
10231 if (!TypeFlags.
isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
10232 unsigned BytesPerElt =
10233 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
10234 Ops[2] =
Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
10249 auto *OverloadedTy =
10254 Ops.insert(Ops.begin(), Ops.pop_back_val());
10257 if (Ops[2]->getType()->isVectorTy())
10272 if (Ops.size() == 3) {
10273 assert(Ops[1]->getType()->isVectorTy() &&
"Scalar base requires an offset");
10274 Ops.push_back(ConstantInt::get(
Int64Ty, 0));
10279 Ops[0] =
Builder.CreateTrunc(Ops[0], OverloadedTy);
10289 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
10293 if (!TypeFlags.
isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
10294 unsigned BytesPerElt =
10295 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
10296 Ops[3] =
Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
10299 return Builder.CreateCall(F, Ops);
10307 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
10309 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
10315 if (Ops[1]->getType()->isVectorTy()) {
10316 if (Ops.size() == 3) {
10318 Ops.push_back(ConstantInt::get(
Int64Ty, 0));
10321 std::swap(Ops[2], Ops[3]);
10325 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
10326 if (BytesPerElt > 1)
10327 Ops[2] =
Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
10332 return Builder.CreateCall(F, Ops);
10338 llvm::ScalableVectorType *VTy =
getSVEType(TypeFlags);
10340 Value *BasePtr = Ops[1];
10343 if (Ops.size() > 2)
10347 return Builder.CreateCall(F, {Predicate, BasePtr});
10353 llvm::ScalableVectorType *VTy =
getSVEType(TypeFlags);
10357 case Intrinsic::aarch64_sve_st2:
10358 case Intrinsic::aarch64_sve_st1_pn_x2:
10359 case Intrinsic::aarch64_sve_stnt1_pn_x2:
10360 case Intrinsic::aarch64_sve_st2q:
10363 case Intrinsic::aarch64_sve_st3:
10364 case Intrinsic::aarch64_sve_st3q:
10367 case Intrinsic::aarch64_sve_st4:
10368 case Intrinsic::aarch64_sve_st1_pn_x4:
10369 case Intrinsic::aarch64_sve_stnt1_pn_x4:
10370 case Intrinsic::aarch64_sve_st4q:
10374 llvm_unreachable(
"unknown intrinsic!");
10378 Value *BasePtr = Ops[1];
10381 if (Ops.size() > (2 + N))
10387 for (
unsigned I = Ops.size() - N; I < Ops.size(); ++I)
10388 Operands.push_back(Ops[I]);
10389 Operands.append({Predicate, BasePtr});
10392 return Builder.CreateCall(F, Operands);
10400 unsigned BuiltinID) {
10412 llvm::ScalableVectorType *Ty =
getSVEType(TypeFlags);
10418 llvm::Type *OverloadedTy =
getSVEType(TypeFlags);
10425 unsigned BuiltinID) {
10428 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10431 Value *BasePtr = Ops[1];
10434 if (Ops.size() > 3)
10437 Value *PrfOp = Ops.back();
10440 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
10444 llvm::Type *ReturnTy,
10446 unsigned IntrinsicID,
10447 bool IsZExtReturn) {
10454 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
10455 llvm::ScalableVectorType *MemoryTy =
nullptr;
10456 llvm::ScalableVectorType *PredTy =
nullptr;
10457 bool IsQuadLoad =
false;
10458 switch (IntrinsicID) {
10459 case Intrinsic::aarch64_sve_ld1uwq:
10460 case Intrinsic::aarch64_sve_ld1udq:
10461 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10462 PredTy = llvm::ScalableVectorType::get(
10467 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10473 Value *BasePtr = Ops[1];
10476 if (Ops.size() > 2)
10481 cast<llvm::Instruction>(
Builder.CreateCall(F, {Predicate, BasePtr}));
10488 return IsZExtReturn ?
Builder.CreateZExt(Load, VectorTy)
10489 :
Builder.CreateSExt(Load, VectorTy);
10494 unsigned IntrinsicID) {
10501 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
10502 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10504 auto PredTy = MemoryTy;
10505 auto AddrMemoryTy = MemoryTy;
10506 bool IsQuadStore =
false;
10508 switch (IntrinsicID) {
10509 case Intrinsic::aarch64_sve_st1wq:
10510 case Intrinsic::aarch64_sve_st1dq:
10511 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10513 llvm::ScalableVectorType::get(IntegerType::get(
getLLVMContext(), 1), 1);
10514 IsQuadStore =
true;
10520 Value *BasePtr = Ops[1];
10523 if (Ops.size() == 4)
10528 IsQuadStore ? Ops.back() :
Builder.CreateTrunc(Ops.back(), MemoryTy);
10533 cast<llvm::Instruction>(
Builder.CreateCall(F, {Val, Predicate, BasePtr}));
10546 NewOps.push_back(Ops[2]);
10548 llvm::Value *BasePtr = Ops[3];
10549 llvm::Value *RealSlice = Ops[1];
10552 if (Ops.size() == 5) {
10555 llvm::Value *StreamingVectorLengthCall =
10556 Builder.CreateCall(StreamingVectorLength);
10557 llvm::Value *Mulvl =
10558 Builder.CreateMul(StreamingVectorLengthCall, Ops[4],
"mulvl");
10562 RealSlice =
Builder.CreateAdd(RealSlice, Ops[4]);
10565 NewOps.push_back(BasePtr);
10566 NewOps.push_back(Ops[0]);
10567 NewOps.push_back(RealSlice);
10569 return Builder.CreateCall(F, NewOps);
10581 return Builder.CreateCall(F, Ops);
10588 if (Ops.size() == 0)
10589 Ops.push_back(llvm::ConstantInt::get(
Int32Ty, 255));
10591 return Builder.CreateCall(F, Ops);
10597 if (Ops.size() == 2)
10598 Ops.push_back(
Builder.getInt32(0));
10602 return Builder.CreateCall(F, Ops);
10608 return Builder.CreateVectorSplat(
10609 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
10613 if (
auto *Ty =
Scalar->getType(); Ty->isVectorTy()) {
10615 auto *VecTy = cast<llvm::VectorType>(Ty);
10616 ElementCount EC = VecTy->getElementCount();
10617 assert(EC.isScalar() && VecTy->getElementType() ==
Int8Ty &&
10618 "Only <1 x i8> expected");
10633 if (
auto *StructTy = dyn_cast<StructType>(Ty)) {
10634 Value *Tuple = llvm::PoisonValue::get(Ty);
10636 for (
unsigned I = 0; I < StructTy->getNumElements(); ++I) {
10638 Value *Out =
Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
10639 Tuple =
Builder.CreateInsertValue(Tuple, Out, I);
10645 return Builder.CreateBitCast(Val, Ty);
10650 auto *SplatZero = Constant::getNullValue(Ty);
10651 Ops.insert(Ops.begin(), SplatZero);
10656 auto *SplatUndef = UndefValue::get(Ty);
10657 Ops.insert(Ops.begin(), SplatUndef);
10662 llvm::Type *ResultType,
10667 llvm::Type *DefaultType =
getSVEType(TypeFlags);
10670 return {DefaultType, Ops[1]->getType()};
10676 return {Ops[0]->getType(), Ops.back()->getType()};
10678 if (TypeFlags.
isReductionQV() && !ResultType->isScalableTy() &&
10679 ResultType->isVectorTy())
10680 return {ResultType, Ops[1]->getType()};
10683 return {DefaultType};
10689 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
10690 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
10693 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
10694 return Builder.CreateExtractValue(Ops[0], Idx);
10700 assert(TypeFlags.
isTupleCreate() &&
"Expects TypleFlag isTupleCreate");
10702 Value *Tuple = llvm::PoisonValue::get(Ty);
10703 for (
unsigned Idx = 0; Idx < Ops.size(); Idx++)
10704 Tuple =
Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
10713 unsigned ICEArguments = 0;
10722 for (
unsigned i = 0, e =
E->getNumArgs(); i != e; i++) {
10723 bool IsICE = ICEArguments & (1 << i);
10729 std::optional<llvm::APSInt>
Result =
10731 assert(
Result &&
"Expected argument to be a constant");
10741 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
10742 for (
unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
10743 Ops.push_back(
Builder.CreateExtractValue(Arg, I));
10748 Ops.push_back(Arg);
10755 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10756 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
10771 else if (TypeFlags.
isStore())
10789 else if (TypeFlags.
isUndef())
10790 return UndefValue::get(Ty);
10791 else if (Builtin->LLVMIntrinsic != 0) {
10795 Ops.pop_back_val());
10796 if (TypeFlags.
getMergeType() == SVETypeFlags::MergeZeroExp)
10799 if (TypeFlags.
getMergeType() == SVETypeFlags::MergeAnyExp)
10805 Ops.push_back(
Builder.getInt32( 31));
10807 Ops.insert(&Ops[1],
Builder.getInt32( 31));
10810 for (
unsigned i = 0, e = Ops.size(); i != e; ++i)
10811 if (
auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10812 if (PredTy->getElementType()->isIntegerTy(1))
10822 std::swap(Ops[1], Ops[2]);
10824 std::swap(Ops[1], Ops[2]);
10827 std::swap(Ops[1], Ops[2]);
10830 std::swap(Ops[1], Ops[3]);
10833 if (TypeFlags.
getMergeType() == SVETypeFlags::MergeZero) {
10834 llvm::Type *OpndTy = Ops[1]->getType();
10835 auto *SplatZero = Constant::getNullValue(OpndTy);
10836 Ops[1] =
Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
10843 if (
Call->getType() == Ty)
10847 if (
auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
10849 if (
auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
10852 llvm_unreachable(
"unsupported element count!");
10855 switch (BuiltinID) {
10859 case SVE::BI__builtin_sve_svreinterpret_b: {
10863 CGM.
getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10864 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
10866 case SVE::BI__builtin_sve_svreinterpret_c: {
10870 CGM.
getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10871 return Builder.CreateCall(CastToSVCountF, Ops[0]);
10874 case SVE::BI__builtin_sve_svpsel_lane_b8:
10875 case SVE::BI__builtin_sve_svpsel_lane_b16:
10876 case SVE::BI__builtin_sve_svpsel_lane_b32:
10877 case SVE::BI__builtin_sve_svpsel_lane_b64:
10878 case SVE::BI__builtin_sve_svpsel_lane_c8:
10879 case SVE::BI__builtin_sve_svpsel_lane_c16:
10880 case SVE::BI__builtin_sve_svpsel_lane_c32:
10881 case SVE::BI__builtin_sve_svpsel_lane_c64: {
10882 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
10883 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->
getName() ==
10884 "aarch64.svcount")) &&
10885 "Unexpected TargetExtType");
10889 CGM.
getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10891 CGM.
getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10895 llvm::Value *Ops0 =
10896 IsSVCount ?
Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
10898 llvm::Value *PSel =
Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
10899 return IsSVCount ?
Builder.CreateCall(CastToSVCountF, PSel) : PSel;
10901 case SVE::BI__builtin_sve_svmov_b_z: {
10904 llvm::Type* OverloadedTy =
getSVEType(TypeFlags);
10906 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
10909 case SVE::BI__builtin_sve_svnot_b_z: {
10912 llvm::Type* OverloadedTy =
getSVEType(TypeFlags);
10914 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
10917 case SVE::BI__builtin_sve_svmovlb_u16:
10918 case SVE::BI__builtin_sve_svmovlb_u32:
10919 case SVE::BI__builtin_sve_svmovlb_u64:
10920 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
10922 case SVE::BI__builtin_sve_svmovlb_s16:
10923 case SVE::BI__builtin_sve_svmovlb_s32:
10924 case SVE::BI__builtin_sve_svmovlb_s64:
10925 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
10927 case SVE::BI__builtin_sve_svmovlt_u16:
10928 case SVE::BI__builtin_sve_svmovlt_u32:
10929 case SVE::BI__builtin_sve_svmovlt_u64:
10930 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
10932 case SVE::BI__builtin_sve_svmovlt_s16:
10933 case SVE::BI__builtin_sve_svmovlt_s32:
10934 case SVE::BI__builtin_sve_svmovlt_s64:
10935 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
10937 case SVE::BI__builtin_sve_svpmullt_u16:
10938 case SVE::BI__builtin_sve_svpmullt_u64:
10939 case SVE::BI__builtin_sve_svpmullt_n_u16:
10940 case SVE::BI__builtin_sve_svpmullt_n_u64:
10941 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
10943 case SVE::BI__builtin_sve_svpmullb_u16:
10944 case SVE::BI__builtin_sve_svpmullb_u64:
10945 case SVE::BI__builtin_sve_svpmullb_n_u16:
10946 case SVE::BI__builtin_sve_svpmullb_n_u64:
10947 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
10949 case SVE::BI__builtin_sve_svdup_n_b8:
10950 case SVE::BI__builtin_sve_svdup_n_b16:
10951 case SVE::BI__builtin_sve_svdup_n_b32:
10952 case SVE::BI__builtin_sve_svdup_n_b64: {
10954 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
10955 llvm::ScalableVectorType *OverloadedTy =
getSVEType(TypeFlags);
10960 case SVE::BI__builtin_sve_svdupq_n_b8:
10961 case SVE::BI__builtin_sve_svdupq_n_b16:
10962 case SVE::BI__builtin_sve_svdupq_n_b32:
10963 case SVE::BI__builtin_sve_svdupq_n_b64:
10964 case SVE::BI__builtin_sve_svdupq_n_u8:
10965 case SVE::BI__builtin_sve_svdupq_n_s8:
10966 case SVE::BI__builtin_sve_svdupq_n_u64:
10967 case SVE::BI__builtin_sve_svdupq_n_f64:
10968 case SVE::BI__builtin_sve_svdupq_n_s64:
10969 case SVE::BI__builtin_sve_svdupq_n_u16:
10970 case SVE::BI__builtin_sve_svdupq_n_f16:
10971 case SVE::BI__builtin_sve_svdupq_n_bf16:
10972 case SVE::BI__builtin_sve_svdupq_n_s16:
10973 case SVE::BI__builtin_sve_svdupq_n_u32:
10974 case SVE::BI__builtin_sve_svdupq_n_f32:
10975 case SVE::BI__builtin_sve_svdupq_n_s32: {
10978 unsigned NumOpnds = Ops.size();
10981 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
10986 llvm::Type *EltTy = Ops[0]->getType();
10991 for (
unsigned I = 0; I < NumOpnds; ++I)
10992 VecOps.push_back(
Builder.CreateZExt(Ops[I], EltTy));
10997 OverloadedTy, PoisonValue::get(OverloadedTy), Vec,
Builder.getInt64(0));
11012 : Intrinsic::aarch64_sve_cmpne_wide,
11019 case SVE::BI__builtin_sve_svpfalse_b:
11020 return ConstantInt::getFalse(Ty);
11022 case SVE::BI__builtin_sve_svpfalse_c: {
11023 auto SVBoolTy = ScalableVectorType::get(
Builder.getInt1Ty(), 16);
11026 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
11029 case SVE::BI__builtin_sve_svlen_bf16:
11030 case SVE::BI__builtin_sve_svlen_f16:
11031 case SVE::BI__builtin_sve_svlen_f32:
11032 case SVE::BI__builtin_sve_svlen_f64:
11033 case SVE::BI__builtin_sve_svlen_s8:
11034 case SVE::BI__builtin_sve_svlen_s16:
11035 case SVE::BI__builtin_sve_svlen_s32:
11036 case SVE::BI__builtin_sve_svlen_s64:
11037 case SVE::BI__builtin_sve_svlen_u8:
11038 case SVE::BI__builtin_sve_svlen_u16:
11039 case SVE::BI__builtin_sve_svlen_u32:
11040 case SVE::BI__builtin_sve_svlen_u64: {
11042 auto VTy = cast<llvm::VectorType>(
getSVEType(TF));
11044 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
11050 case SVE::BI__builtin_sve_svtbl2_u8:
11051 case SVE::BI__builtin_sve_svtbl2_s8:
11052 case SVE::BI__builtin_sve_svtbl2_u16:
11053 case SVE::BI__builtin_sve_svtbl2_s16:
11054 case SVE::BI__builtin_sve_svtbl2_u32:
11055 case SVE::BI__builtin_sve_svtbl2_s32:
11056 case SVE::BI__builtin_sve_svtbl2_u64:
11057 case SVE::BI__builtin_sve_svtbl2_s64:
11058 case SVE::BI__builtin_sve_svtbl2_f16:
11059 case SVE::BI__builtin_sve_svtbl2_bf16:
11060 case SVE::BI__builtin_sve_svtbl2_f32:
11061 case SVE::BI__builtin_sve_svtbl2_f64: {
11063 auto VTy = cast<llvm::ScalableVectorType>(
getSVEType(TF));
11065 return Builder.CreateCall(F, Ops);
11068 case SVE::BI__builtin_sve_svset_neonq_s8:
11069 case SVE::BI__builtin_sve_svset_neonq_s16:
11070 case SVE::BI__builtin_sve_svset_neonq_s32:
11071 case SVE::BI__builtin_sve_svset_neonq_s64:
11072 case SVE::BI__builtin_sve_svset_neonq_u8:
11073 case SVE::BI__builtin_sve_svset_neonq_u16:
11074 case SVE::BI__builtin_sve_svset_neonq_u32:
11075 case SVE::BI__builtin_sve_svset_neonq_u64:
11076 case SVE::BI__builtin_sve_svset_neonq_f16:
11077 case SVE::BI__builtin_sve_svset_neonq_f32:
11078 case SVE::BI__builtin_sve_svset_neonq_f64:
11079 case SVE::BI__builtin_sve_svset_neonq_bf16: {
11080 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1],
Builder.getInt64(0));
11083 case SVE::BI__builtin_sve_svget_neonq_s8:
11084 case SVE::BI__builtin_sve_svget_neonq_s16:
11085 case SVE::BI__builtin_sve_svget_neonq_s32:
11086 case SVE::BI__builtin_sve_svget_neonq_s64:
11087 case SVE::BI__builtin_sve_svget_neonq_u8:
11088 case SVE::BI__builtin_sve_svget_neonq_u16:
11089 case SVE::BI__builtin_sve_svget_neonq_u32:
11090 case SVE::BI__builtin_sve_svget_neonq_u64:
11091 case SVE::BI__builtin_sve_svget_neonq_f16:
11092 case SVE::BI__builtin_sve_svget_neonq_f32:
11093 case SVE::BI__builtin_sve_svget_neonq_f64:
11094 case SVE::BI__builtin_sve_svget_neonq_bf16: {
11095 return Builder.CreateExtractVector(Ty, Ops[0],
Builder.getInt64(0));
11098 case SVE::BI__builtin_sve_svdup_neonq_s8:
11099 case SVE::BI__builtin_sve_svdup_neonq_s16:
11100 case SVE::BI__builtin_sve_svdup_neonq_s32:
11101 case SVE::BI__builtin_sve_svdup_neonq_s64:
11102 case SVE::BI__builtin_sve_svdup_neonq_u8:
11103 case SVE::BI__builtin_sve_svdup_neonq_u16:
11104 case SVE::BI__builtin_sve_svdup_neonq_u32:
11105 case SVE::BI__builtin_sve_svdup_neonq_u64:
11106 case SVE::BI__builtin_sve_svdup_neonq_f16:
11107 case SVE::BI__builtin_sve_svdup_neonq_f32:
11108 case SVE::BI__builtin_sve_svdup_neonq_f64:
11109 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
11112 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
11124 switch (BuiltinID) {
11127 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
11130 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
11131 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
11134 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
11135 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
11141 for (
unsigned I = 0; I < MultiVec; ++I)
11142 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
11155 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11158 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
11159 BuiltinID == SME::BI__builtin_sme_svzero_za)
11160 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11161 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
11162 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
11163 BuiltinID == SME::BI__builtin_sme_svldr_za ||
11164 BuiltinID == SME::BI__builtin_sme_svstr_za)
11165 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11170 Ops.pop_back_val());
11175 if (Builtin->LLVMIntrinsic == 0)
11178 if (BuiltinID == SME::BI__builtin_sme___arm_in_streaming_mode) {
11181 const auto *FD = cast<FunctionDecl>(
CurFuncDecl);
11183 unsigned SMEAttrs = FPT->getAArch64SMEAttributes();
11186 return ConstantInt::getBool(
Builder.getContext(), IsStreaming);
11192 for (
unsigned i = 0, e = Ops.size(); i != e; ++i)
11193 if (
auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
11194 if (PredTy->getElementType()->isIntegerTy(1))
11202 return Builder.CreateCall(F, Ops);
11207 llvm::Triple::ArchType Arch) {
11216 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
11217 return EmitAArch64CpuSupports(
E);
11219 unsigned HintID =
static_cast<unsigned>(-1);
11220 switch (BuiltinID) {
11222 case clang::AArch64::BI__builtin_arm_nop:
11225 case clang::AArch64::BI__builtin_arm_yield:
11226 case clang::AArch64::BI__yield:
11229 case clang::AArch64::BI__builtin_arm_wfe:
11230 case clang::AArch64::BI__wfe:
11233 case clang::AArch64::BI__builtin_arm_wfi:
11234 case clang::AArch64::BI__wfi:
11237 case clang::AArch64::BI__builtin_arm_sev:
11238 case clang::AArch64::BI__sev:
11241 case clang::AArch64::BI__builtin_arm_sevl:
11242 case clang::AArch64::BI__sevl:
11247 if (HintID !=
static_cast<unsigned>(-1)) {
11249 return Builder.CreateCall(F, llvm::ConstantInt::get(
Int32Ty, HintID));
11252 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
11258 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
11263 "__arm_sme_state"));
11265 "aarch64_pstate_sm_compatible");
11266 CI->setAttributes(Attrs);
11267 CI->setCallingConv(
11268 llvm::CallingConv::
11269 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
11276 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
11278 "rbit of unusual size!");
11281 CGM.
getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
11283 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
11285 "rbit of unusual size!");
11288 CGM.
getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
11291 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
11292 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
11296 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
11301 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
11306 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
11312 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
11313 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
11315 llvm::Type *Ty = Arg->getType();
11320 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
11321 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
11323 llvm::Type *Ty = Arg->getType();
11328 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
11329 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
11331 llvm::Type *Ty = Arg->getType();
11336 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
11337 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
11339 llvm::Type *Ty = Arg->getType();
11344 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
11346 "__jcvt of unusual size!");
11352 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
11353 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
11354 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
11355 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
11359 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
11363 llvm::Value *Val =
Builder.CreateCall(F, MemAddr);
11364 llvm::Value *ToRet;
11365 for (
size_t i = 0; i < 8; i++) {
11366 llvm::Value *ValOffsetPtr =
11377 Args.push_back(MemAddr);
11378 for (
size_t i = 0; i < 8; i++) {
11379 llvm::Value *ValOffsetPtr =
11386 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
11387 ? Intrinsic::aarch64_st64b
11388 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
11389 ? Intrinsic::aarch64_st64bv
11390 : Intrinsic::aarch64_st64bv0);
11392 return Builder.CreateCall(F, Args);
11396 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
11397 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
11399 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
11400 ? Intrinsic::aarch64_rndr
11401 : Intrinsic::aarch64_rndrrs);
11403 llvm::Value *Val =
Builder.CreateCall(F);
11404 Value *RandomValue =
Builder.CreateExtractValue(Val, 0);
11413 if (BuiltinID == clang::AArch64::BI__clear_cache) {
11414 assert(
E->getNumArgs() == 2 &&
"__clear_cache takes 2 arguments");
11417 for (
unsigned i = 0; i < 2; i++)
11420 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
11421 StringRef Name = FD->
getName();
11425 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11426 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
11430 ? Intrinsic::aarch64_ldaxp
11431 : Intrinsic::aarch64_ldxp);
11438 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
11439 Val0 =
Builder.CreateZExt(Val0, Int128Ty);
11440 Val1 =
Builder.CreateZExt(Val1, Int128Ty);
11442 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
11443 Val =
Builder.CreateShl(Val0, ShiftCst,
"shl",
true );
11444 Val =
Builder.CreateOr(Val, Val1);
11446 }
else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11447 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
11452 llvm::Type *
IntTy =
11457 ? Intrinsic::aarch64_ldaxr
11458 : Intrinsic::aarch64_ldxr,
11460 CallInst *Val =
Builder.CreateCall(F, LoadAddr,
"ldxr");
11464 if (RealResTy->isPointerTy())
11465 return Builder.CreateIntToPtr(Val, RealResTy);
11467 llvm::Type *IntResTy = llvm::IntegerType::get(
11469 return Builder.CreateBitCast(
Builder.CreateTruncOrBitCast(Val, IntResTy),
11473 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11474 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
11478 ? Intrinsic::aarch64_stlxp
11479 : Intrinsic::aarch64_stxp);
11491 return Builder.CreateCall(F, {Arg0, Arg1, StPtr},
"stxp");
11494 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11495 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
11500 llvm::Type *StoreTy =
11503 if (StoreVal->
getType()->isPointerTy())
11506 llvm::Type *
IntTy = llvm::IntegerType::get(
11515 ? Intrinsic::aarch64_stlxr
11516 : Intrinsic::aarch64_stxr,
11518 CallInst *CI =
Builder.CreateCall(F, {StoreVal, StoreAddr},
"stxr");
11520 1, Attribute::get(
getLLVMContext(), Attribute::ElementType, StoreTy));
11524 if (BuiltinID == clang::AArch64::BI__getReg) {
11527 llvm_unreachable(
"Sema will ensure that the parameter is constant");
11533 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
11534 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11535 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11537 llvm::Function *F =
11539 return Builder.CreateCall(F, Metadata);
11542 if (BuiltinID == clang::AArch64::BI__break) {
11545 llvm_unreachable(
"Sema will ensure that the parameter is constant");
11547 llvm::Function *F =
CGM.
getIntrinsic(llvm::Intrinsic::aarch64_break);
11551 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
11553 return Builder.CreateCall(F);
11556 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
11557 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11558 llvm::SyncScope::SingleThread);
11561 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
11562 switch (BuiltinID) {
11563 case clang::AArch64::BI__builtin_arm_crc32b:
11564 CRCIntrinsicID = Intrinsic::aarch64_crc32b;
break;
11565 case clang::AArch64::BI__builtin_arm_crc32cb:
11566 CRCIntrinsicID = Intrinsic::aarch64_crc32cb;
break;
11567 case clang::AArch64::BI__builtin_arm_crc32h:
11568 CRCIntrinsicID = Intrinsic::aarch64_crc32h;
break;
11569 case clang::AArch64::BI__builtin_arm_crc32ch:
11570 CRCIntrinsicID = Intrinsic::aarch64_crc32ch;
break;
11571 case clang::AArch64::BI__builtin_arm_crc32w:
11572 CRCIntrinsicID = Intrinsic::aarch64_crc32w;
break;
11573 case clang::AArch64::BI__builtin_arm_crc32cw:
11574 CRCIntrinsicID = Intrinsic::aarch64_crc32cw;
break;
11575 case clang::AArch64::BI__builtin_arm_crc32d:
11576 CRCIntrinsicID = Intrinsic::aarch64_crc32x;
break;
11577 case clang::AArch64::BI__builtin_arm_crc32cd:
11578 CRCIntrinsicID = Intrinsic::aarch64_crc32cx;
break;
11581 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
11586 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
11587 Arg1 =
Builder.CreateZExtOrBitCast(Arg1, DataTy);
11589 return Builder.CreateCall(F, {Arg0, Arg1});
11593 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
11600 CGM.
getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
11604 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
11605 switch (BuiltinID) {
11606 case clang::AArch64::BI__builtin_arm_irg:
11607 MTEIntrinsicID = Intrinsic::aarch64_irg;
break;
11608 case clang::AArch64::BI__builtin_arm_addg:
11609 MTEIntrinsicID = Intrinsic::aarch64_addg;
break;
11610 case clang::AArch64::BI__builtin_arm_gmi:
11611 MTEIntrinsicID = Intrinsic::aarch64_gmi;
break;
11612 case clang::AArch64::BI__builtin_arm_ldg:
11613 MTEIntrinsicID = Intrinsic::aarch64_ldg;
break;
11614 case clang::AArch64::BI__builtin_arm_stg:
11615 MTEIntrinsicID = Intrinsic::aarch64_stg;
break;
11616 case clang::AArch64::BI__builtin_arm_subp:
11617 MTEIntrinsicID = Intrinsic::aarch64_subp;
break;
11620 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
11621 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
11629 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
11635 {Pointer, TagOffset});
11637 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
11648 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
11651 {TagAddress, TagAddress});
11656 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
11659 {TagAddress, TagAddress});
11661 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
11669 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11670 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11671 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11672 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11673 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
11674 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
11675 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
11676 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
11679 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11680 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11681 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11682 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
11685 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11686 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
11688 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11689 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
11691 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11692 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
11694 llvm::Type *ValueType;
11698 }
else if (Is128Bit) {
11699 llvm::Type *Int128Ty =
11701 ValueType = Int128Ty;
11703 }
else if (IsPointerBuiltin) {
11713 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
11714 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
11720 std::string SysRegStr;
11721 llvm::raw_string_ostream(SysRegStr) <<
11722 ((1 << 1) | ((SysReg >> 14) & 1)) <<
":" <<
11723 ((SysReg >> 11) & 7) <<
":" <<
11724 ((SysReg >> 7) & 15) <<
":" <<
11725 ((SysReg >> 3) & 15) <<
":" <<
11728 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
11729 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11730 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11735 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
11736 llvm::Function *F =
CGM.
getIntrinsic(llvm::Intrinsic::read_register, Types);
11738 return Builder.CreateCall(F, Metadata);
11741 llvm::Function *F =
CGM.
getIntrinsic(llvm::Intrinsic::write_register, Types);
11744 return Builder.CreateCall(F, { Metadata, ArgValue });
11747 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
11748 llvm::Function *F =
11750 return Builder.CreateCall(F);
11753 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
11755 return Builder.CreateCall(F);
11758 if (BuiltinID == clang::AArch64::BI__mulh ||
11759 BuiltinID == clang::AArch64::BI__umulh) {
11761 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
11763 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
11769 Value *MulResult, *HigherBits;
11771 MulResult =
Builder.CreateNSWMul(LHS, RHS);
11772 HigherBits =
Builder.CreateAShr(MulResult, 64);
11774 MulResult =
Builder.CreateNUWMul(LHS, RHS);
11775 HigherBits =
Builder.CreateLShr(MulResult, 64);
11777 HigherBits =
Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11782 if (BuiltinID == AArch64::BI__writex18byte ||
11783 BuiltinID == AArch64::BI__writex18word ||
11784 BuiltinID == AArch64::BI__writex18dword ||
11785 BuiltinID == AArch64::BI__writex18qword) {
11801 if (BuiltinID == AArch64::BI__readx18byte ||
11802 BuiltinID == AArch64::BI__readx18word ||
11803 BuiltinID == AArch64::BI__readx18dword ||
11804 BuiltinID == AArch64::BI__readx18qword) {
11819 if (BuiltinID == AArch64::BI__addx18byte ||
11820 BuiltinID == AArch64::BI__addx18word ||
11821 BuiltinID == AArch64::BI__addx18dword ||
11822 BuiltinID == AArch64::BI__addx18qword ||
11823 BuiltinID == AArch64::BI__incx18byte ||
11824 BuiltinID == AArch64::BI__incx18word ||
11825 BuiltinID == AArch64::BI__incx18dword ||
11826 BuiltinID == AArch64::BI__incx18qword) {
11829 switch (BuiltinID) {
11830 case AArch64::BI__incx18byte:
11832 isIncrement =
true;
11834 case AArch64::BI__incx18word:
11836 isIncrement =
true;
11838 case AArch64::BI__incx18dword:
11840 isIncrement =
true;
11842 case AArch64::BI__incx18qword:
11844 isIncrement =
true;
11848 isIncrement =
false;
11873 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
11874 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
11875 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
11876 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
11879 return Builder.CreateBitCast(Arg, RetTy);
11882 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11883 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11884 BuiltinID == AArch64::BI_CountLeadingZeros ||
11885 BuiltinID == AArch64::BI_CountLeadingZeros64) {
11887 llvm::Type *ArgType = Arg->
getType();
11889 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11890 BuiltinID == AArch64::BI_CountLeadingOnes64)
11891 Arg =
Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
11896 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11897 BuiltinID == AArch64::BI_CountLeadingZeros64)
11902 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
11903 BuiltinID == AArch64::BI_CountLeadingSigns64) {
11906 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
11911 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
11916 if (BuiltinID == AArch64::BI_CountOneBits ||
11917 BuiltinID == AArch64::BI_CountOneBits64) {
11919 llvm::Type *ArgType = ArgValue->
getType();
11923 if (BuiltinID == AArch64::BI_CountOneBits64)
11928 if (BuiltinID == AArch64::BI__prefetch) {
11937 if (BuiltinID == AArch64::BI__hlt) {
11943 return ConstantInt::get(
Builder.getInt32Ty(), 0);
11946 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
11947 return Builder.CreateFPTrunc(
11954 if (std::optional<MSVCIntrin> MsvcIntId =
11960 return P.first == BuiltinID;
11963 BuiltinID = It->second;
11967 unsigned ICEArguments = 0;
11974 for (
unsigned i = 0, e =
E->getNumArgs() - 1; i != e; i++) {
11976 switch (BuiltinID) {
11977 case NEON::BI__builtin_neon_vld1_v:
11978 case NEON::BI__builtin_neon_vld1q_v:
11979 case NEON::BI__builtin_neon_vld1_dup_v:
11980 case NEON::BI__builtin_neon_vld1q_dup_v:
11981 case NEON::BI__builtin_neon_vld1_lane_v:
11982 case NEON::BI__builtin_neon_vld1q_lane_v:
11983 case NEON::BI__builtin_neon_vst1_v:
11984 case NEON::BI__builtin_neon_vst1q_v:
11985 case NEON::BI__builtin_neon_vst1_lane_v:
11986 case NEON::BI__builtin_neon_vst1q_lane_v:
11987 case NEON::BI__builtin_neon_vldap1_lane_s64:
11988 case NEON::BI__builtin_neon_vldap1q_lane_s64:
11989 case NEON::BI__builtin_neon_vstl1_lane_s64:
11990 case NEON::BI__builtin_neon_vstl1q_lane_s64:
12008 assert(
Result &&
"SISD intrinsic should have been handled");
12012 const Expr *Arg =
E->getArg(
E->getNumArgs()-1);
12014 if (std::optional<llvm::APSInt>
Result =
12019 bool usgn =
Type.isUnsigned();
12020 bool quad =
Type.isQuad();
12023 switch (BuiltinID) {
12025 case NEON::BI__builtin_neon_vabsh_f16:
12028 case NEON::BI__builtin_neon_vaddq_p128: {
12031 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
12032 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
12033 Ops[0] =
Builder.CreateXor(Ops[0], Ops[1]);
12034 llvm::Type *Int128Ty = llvm::Type::getIntNTy(
getLLVMContext(), 128);
12035 return Builder.CreateBitCast(Ops[0], Int128Ty);
12037 case NEON::BI__builtin_neon_vldrq_p128: {
12038 llvm::Type *Int128Ty = llvm::Type::getIntNTy(
getLLVMContext(), 128);
12043 case NEON::BI__builtin_neon_vstrq_p128: {
12044 Value *Ptr = Ops[0];
12047 case NEON::BI__builtin_neon_vcvts_f32_u32:
12048 case NEON::BI__builtin_neon_vcvtd_f64_u64:
12051 case NEON::BI__builtin_neon_vcvts_f32_s32:
12052 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
12054 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
12057 Ops[0] =
Builder.CreateBitCast(Ops[0], InTy);
12059 return Builder.CreateUIToFP(Ops[0], FTy);
12060 return Builder.CreateSIToFP(Ops[0], FTy);
12062 case NEON::BI__builtin_neon_vcvth_f16_u16:
12063 case NEON::BI__builtin_neon_vcvth_f16_u32:
12064 case NEON::BI__builtin_neon_vcvth_f16_u64:
12067 case NEON::BI__builtin_neon_vcvth_f16_s16:
12068 case NEON::BI__builtin_neon_vcvth_f16_s32:
12069 case NEON::BI__builtin_neon_vcvth_f16_s64: {
12071 llvm::Type *FTy =
HalfTy;
12073 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
12075 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
12079 Ops[0] =
Builder.CreateBitCast(Ops[0], InTy);
12081 return Builder.CreateUIToFP(Ops[0], FTy);
12082 return Builder.CreateSIToFP(Ops[0], FTy);
12084 case NEON::BI__builtin_neon_vcvtah_u16_f16:
12085 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
12086 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
12087 case NEON::BI__builtin_neon_vcvtph_u16_f16:
12088 case NEON::BI__builtin_neon_vcvth_u16_f16:
12089 case NEON::BI__builtin_neon_vcvtah_s16_f16:
12090 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
12091 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
12092 case NEON::BI__builtin_neon_vcvtph_s16_f16:
12093 case NEON::BI__builtin_neon_vcvth_s16_f16: {
12096 llvm::Type* FTy =
HalfTy;
12097 llvm::Type *Tys[2] = {InTy, FTy};
12099 switch (BuiltinID) {
12100 default: llvm_unreachable(
"missing builtin ID in switch!");
12101 case NEON::BI__builtin_neon_vcvtah_u16_f16:
12102 Int = Intrinsic::aarch64_neon_fcvtau;
break;
12103 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
12104 Int = Intrinsic::aarch64_neon_fcvtmu;
break;
12105 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
12106 Int = Intrinsic::aarch64_neon_fcvtnu;
break;
12107 case NEON::BI__builtin_neon_vcvtph_u16_f16:
12108 Int = Intrinsic::aarch64_neon_fcvtpu;
break;
12109 case NEON::BI__builtin_neon_vcvth_u16_f16:
12110 Int = Intrinsic::aarch64_neon_fcvtzu;
break;
12111 case NEON::BI__builtin_neon_vcvtah_s16_f16:
12112 Int = Intrinsic::aarch64_neon_fcvtas;
break;
12113 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
12114 Int = Intrinsic::aarch64_neon_fcvtms;
break;
12115 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
12116 Int = Intrinsic::aarch64_neon_fcvtns;
break;
12117 case NEON::BI__builtin_neon_vcvtph_s16_f16:
12118 Int = Intrinsic::aarch64_neon_fcvtps;
break;
12119 case NEON::BI__builtin_neon_vcvth_s16_f16:
12120 Int = Intrinsic::aarch64_neon_fcvtzs;
break;
12125 case NEON::BI__builtin_neon_vcaleh_f16:
12126 case NEON::BI__builtin_neon_vcalth_f16:
12127 case NEON::BI__builtin_neon_vcageh_f16:
12128 case NEON::BI__builtin_neon_vcagth_f16: {
12131 llvm::Type* FTy =
HalfTy;
12132 llvm::Type *Tys[2] = {InTy, FTy};
12134 switch (BuiltinID) {
12135 default: llvm_unreachable(
"missing builtin ID in switch!");
12136 case NEON::BI__builtin_neon_vcageh_f16:
12137 Int = Intrinsic::aarch64_neon_facge;
break;
12138 case NEON::BI__builtin_neon_vcagth_f16:
12139 Int = Intrinsic::aarch64_neon_facgt;
break;
12140 case NEON::BI__builtin_neon_vcaleh_f16:
12141 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]);
break;
12142 case NEON::BI__builtin_neon_vcalth_f16:
12143 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]);
break;
12148 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
12149 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
12152 llvm::Type* FTy =
HalfTy;
12153 llvm::Type *Tys[2] = {InTy, FTy};
12155 switch (BuiltinID) {
12156 default: llvm_unreachable(
"missing builtin ID in switch!");
12157 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
12158 Int = Intrinsic::aarch64_neon_vcvtfp2fxs;
break;
12159 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
12160 Int = Intrinsic::aarch64_neon_vcvtfp2fxu;
break;
12165 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
12166 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
12168 llvm::Type* FTy =
HalfTy;
12170 llvm::Type *Tys[2] = {FTy, InTy};
12172 switch (BuiltinID) {
12173 default: llvm_unreachable(
"missing builtin ID in switch!");
12174 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
12175 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
12176 Ops[0] =
Builder.CreateSExt(Ops[0], InTy,
"sext");
12178 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
12179 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
12180 Ops[0] =
Builder.CreateZExt(Ops[0], InTy);
12185 case NEON::BI__builtin_neon_vpaddd_s64: {
12186 auto *Ty = llvm::FixedVectorType::get(
Int64Ty, 2);
12189 Vec =
Builder.CreateBitCast(Vec, Ty,
"v2i64");
12190 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
12191 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
12192 Value *Op0 =
Builder.CreateExtractElement(Vec, Idx0,
"lane0");
12193 Value *Op1 =
Builder.CreateExtractElement(Vec, Idx1,
"lane1");
12195 return Builder.CreateAdd(Op0, Op1,
"vpaddd");
12197 case NEON::BI__builtin_neon_vpaddd_f64: {
12198 auto *Ty = llvm::FixedVectorType::get(
DoubleTy, 2);
12201 Vec =
Builder.CreateBitCast(Vec, Ty,
"v2f64");
12202 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
12203 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
12204 Value *Op0 =
Builder.CreateExtractElement(Vec, Idx0,
"lane0");
12205 Value *Op1 =
Builder.CreateExtractElement(Vec, Idx1,
"lane1");
12207 return Builder.CreateFAdd(Op0, Op1,
"vpaddd");
12209 case NEON::BI__builtin_neon_vpadds_f32: {
12210 auto *Ty = llvm::FixedVectorType::get(
FloatTy, 2);
12213 Vec =
Builder.CreateBitCast(Vec, Ty,
"v2f32");
12214 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
12215 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
12216 Value *Op0 =
Builder.CreateExtractElement(Vec, Idx0,
"lane0");
12217 Value *Op1 =
Builder.CreateExtractElement(Vec, Idx1,
"lane1");
12219 return Builder.CreateFAdd(Op0, Op1,
"vpaddd");
12221 case NEON::BI__builtin_neon_vceqzd_s64:
12222 case NEON::BI__builtin_neon_vceqzd_f64:
12223 case NEON::BI__builtin_neon_vceqzs_f32:
12224 case NEON::BI__builtin_neon_vceqzh_f16:
12228 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ,
"vceqz");
12229 case NEON::BI__builtin_neon_vcgezd_s64:
12230 case NEON::BI__builtin_neon_vcgezd_f64:
12231 case NEON::BI__builtin_neon_vcgezs_f32:
12232 case NEON::BI__builtin_neon_vcgezh_f16:
12236 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE,
"vcgez");
12237 case NEON::BI__builtin_neon_vclezd_s64:
12238 case NEON::BI__builtin_neon_vclezd_f64:
12239 case NEON::BI__builtin_neon_vclezs_f32:
12240 case NEON::BI__builtin_neon_vclezh_f16:
12244 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE,
"vclez");
12245 case NEON::BI__builtin_neon_vcgtzd_s64:
12246 case NEON::BI__builtin_neon_vcgtzd_f64:
12247 case NEON::BI__builtin_neon_vcgtzs_f32:
12248 case NEON::BI__builtin_neon_vcgtzh_f16:
12252 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT,
"vcgtz");
12253 case NEON::BI__builtin_neon_vcltzd_s64:
12254 case NEON::BI__builtin_neon_vcltzd_f64:
12255 case NEON::BI__builtin_neon_vcltzs_f32:
12256 case NEON::BI__builtin_neon_vcltzh_f16:
12260 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT,
"vcltz");
12262 case NEON::BI__builtin_neon_vceqzd_u64: {
12266 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(
Int64Ty));
12269 case NEON::BI__builtin_neon_vceqd_f64:
12270 case NEON::BI__builtin_neon_vcled_f64:
12271 case NEON::BI__builtin_neon_vcltd_f64:
12272 case NEON::BI__builtin_neon_vcged_f64:
12273 case NEON::BI__builtin_neon_vcgtd_f64: {
12274 llvm::CmpInst::Predicate
P;
12275 switch (BuiltinID) {
12276 default: llvm_unreachable(
"missing builtin ID in switch!");
12277 case NEON::BI__builtin_neon_vceqd_f64:
P = llvm::FCmpInst::FCMP_OEQ;
break;
12278 case NEON::BI__builtin_neon_vcled_f64:
P = llvm::FCmpInst::FCMP_OLE;
break;
12279 case NEON::BI__builtin_neon_vcltd_f64:
P = llvm::FCmpInst::FCMP_OLT;
break;
12280 case NEON::BI__builtin_neon_vcged_f64:
P = llvm::FCmpInst::FCMP_OGE;
break;
12281 case NEON::BI__builtin_neon_vcgtd_f64:
P = llvm::FCmpInst::FCMP_OGT;
break;
12286 if (
P == llvm::FCmpInst::FCMP_OEQ)
12287 Ops[0] =
Builder.CreateFCmp(
P, Ops[0], Ops[1]);
12289 Ops[0] =
Builder.CreateFCmpS(
P, Ops[0], Ops[1]);
12292 case NEON::BI__builtin_neon_vceqs_f32:
12293 case NEON::BI__builtin_neon_vcles_f32:
12294 case NEON::BI__builtin_neon_vclts_f32:
12295 case NEON::BI__builtin_neon_vcges_f32:
12296 case NEON::BI__builtin_neon_vcgts_f32: {
12297 llvm::CmpInst::Predicate
P;
12298 switch (BuiltinID) {
12299 default: llvm_unreachable(
"missing builtin ID in switch!");
12300 case NEON::BI__builtin_neon_vceqs_f32:
P = llvm::FCmpInst::FCMP_OEQ;
break;
12301 case NEON::BI__builtin_neon_vcles_f32:
P = llvm::FCmpInst::FCMP_OLE;
break;
12302 case NEON::BI__builtin_neon_vclts_f32:
P = llvm::FCmpInst::FCMP_OLT;
break;
12303 case NEON::BI__builtin_neon_vcges_f32:
P = llvm::FCmpInst::FCMP_OGE;
break;
12304 case NEON::BI__builtin_neon_vcgts_f32:
P = llvm::FCmpInst::FCMP_OGT;
break;
12309 if (
P == llvm::FCmpInst::FCMP_OEQ)
12310 Ops[0] =
Builder.CreateFCmp(
P, Ops[0], Ops[1]);
12312 Ops[0] =
Builder.CreateFCmpS(
P, Ops[0], Ops[1]);
12315 case NEON::BI__builtin_neon_vceqh_f16:
12316 case NEON::BI__builtin_neon_vcleh_f16:
12317 case NEON::BI__builtin_neon_vclth_f16:
12318 case NEON::BI__builtin_neon_vcgeh_f16:
12319 case NEON::BI__builtin_neon_vcgth_f16: {
12320 llvm::CmpInst::Predicate
P;
12321 switch (BuiltinID) {
12322 default: llvm_unreachable(
"missing builtin ID in switch!");
12323 case NEON::BI__builtin_neon_vceqh_f16:
P = llvm::FCmpInst::FCMP_OEQ;
break;
12324 case NEON::BI__builtin_neon_vcleh_f16:
P = llvm::FCmpInst::FCMP_OLE;
break;
12325 case NEON::BI__builtin_neon_vclth_f16:
P = llvm::FCmpInst::FCMP_OLT;
break;
12326 case NEON::BI__builtin_neon_vcgeh_f16:
P = llvm::FCmpInst::FCMP_OGE;
break;
12327 case NEON::BI__builtin_neon_vcgth_f16:
P = llvm::FCmpInst::FCMP_OGT;
break;
12332 if (
P == llvm::FCmpInst::FCMP_OEQ)
12333 Ops[0] =
Builder.CreateFCmp(
P, Ops[0], Ops[1]);
12335 Ops[0] =
Builder.CreateFCmpS(
P, Ops[0], Ops[1]);
12338 case NEON::BI__builtin_neon_vceqd_s64:
12339 case NEON::BI__builtin_neon_vceqd_u64:
12340 case NEON::BI__builtin_neon_vcgtd_s64:
12341 case NEON::BI__builtin_neon_vcgtd_u64:
12342 case NEON::BI__builtin_neon_vcltd_s64:
12343 case NEON::BI__builtin_neon_vcltd_u64:
12344 case NEON::BI__builtin_neon_vcged_u64:
12345 case NEON::BI__builtin_neon_vcged_s64:
12346 case NEON::BI__builtin_neon_vcled_u64:
12347 case NEON::BI__builtin_neon_vcled_s64: {
12348 llvm::CmpInst::Predicate
P;
12349 switch (BuiltinID) {
12350 default: llvm_unreachable(
"missing builtin ID in switch!");
12351 case NEON::BI__builtin_neon_vceqd_s64:
12352 case NEON::BI__builtin_neon_vceqd_u64:
P = llvm::ICmpInst::ICMP_EQ;
break;
12353 case NEON::BI__builtin_neon_vcgtd_s64:
P = llvm::ICmpInst::ICMP_SGT;
break;
12354 case NEON::BI__builtin_neon_vcgtd_u64:
P = llvm::ICmpInst::ICMP_UGT;
break;
12355 case NEON::BI__builtin_neon_vcltd_s64:
P = llvm::ICmpInst::ICMP_SLT;
break;
12356 case NEON::BI__builtin_neon_vcltd_u64:
P = llvm::ICmpInst::ICMP_ULT;
break;
12357 case NEON::BI__builtin_neon_vcged_u64:
P = llvm::ICmpInst::ICMP_UGE;
break;
12358 case NEON::BI__builtin_neon_vcged_s64:
P = llvm::ICmpInst::ICMP_SGE;
break;
12359 case NEON::BI__builtin_neon_vcled_u64:
P = llvm::ICmpInst::ICMP_ULE;
break;
12360 case NEON::BI__builtin_neon_vcled_s64:
P = llvm::ICmpInst::ICMP_SLE;
break;
12365 Ops[0] =
Builder.CreateICmp(
P, Ops[0], Ops[1]);
12368 case NEON::BI__builtin_neon_vtstd_s64:
12369 case NEON::BI__builtin_neon_vtstd_u64: {
12373 Ops[0] =
Builder.CreateAnd(Ops[0], Ops[1]);
12374 Ops[0] =
Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
12375 llvm::Constant::getNullValue(
Int64Ty));
12378 case NEON::BI__builtin_neon_vset_lane_i8:
12379 case NEON::BI__builtin_neon_vset_lane_i16:
12380 case NEON::BI__builtin_neon_vset_lane_i32:
12381 case NEON::BI__builtin_neon_vset_lane_i64:
12382 case NEON::BI__builtin_neon_vset_lane_bf16:
12383 case NEON::BI__builtin_neon_vset_lane_f32:
12384 case NEON::BI__builtin_neon_vsetq_lane_i8:
12385 case NEON::BI__builtin_neon_vsetq_lane_i16:
12386 case NEON::BI__builtin_neon_vsetq_lane_i32:
12387 case NEON::BI__builtin_neon_vsetq_lane_i64:
12388 case NEON::BI__builtin_neon_vsetq_lane_bf16:
12389 case NEON::BI__builtin_neon_vsetq_lane_f32:
12391 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
12392 case NEON::BI__builtin_neon_vset_lane_f64:
12395 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(
DoubleTy, 1));
12397 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
12398 case NEON::BI__builtin_neon_vsetq_lane_f64:
12401 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(
DoubleTy, 2));
12403 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
12405 case NEON::BI__builtin_neon_vget_lane_i8:
12406 case NEON::BI__builtin_neon_vdupb_lane_i8:
12408 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int8Ty, 8));
12411 case NEON::BI__builtin_neon_vgetq_lane_i8:
12412 case NEON::BI__builtin_neon_vdupb_laneq_i8:
12414 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int8Ty, 16));
12417 case NEON::BI__builtin_neon_vget_lane_i16:
12418 case NEON::BI__builtin_neon_vduph_lane_i16:
12420 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int16Ty, 4));
12423 case NEON::BI__builtin_neon_vgetq_lane_i16:
12424 case NEON::BI__builtin_neon_vduph_laneq_i16:
12426 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int16Ty, 8));
12429 case NEON::BI__builtin_neon_vget_lane_i32:
12430 case NEON::BI__builtin_neon_vdups_lane_i32:
12432 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int32Ty, 2));
12435 case NEON::BI__builtin_neon_vdups_lane_f32:
12437 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 2));
12440 case NEON::BI__builtin_neon_vgetq_lane_i32:
12441 case NEON::BI__builtin_neon_vdups_laneq_i32:
12443 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int32Ty, 4));
12446 case NEON::BI__builtin_neon_vget_lane_i64:
12447 case NEON::BI__builtin_neon_vdupd_lane_i64:
12449 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int64Ty, 1));
12452 case NEON::BI__builtin_neon_vdupd_lane_f64:
12454 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 1));
12457 case NEON::BI__builtin_neon_vgetq_lane_i64:
12458 case NEON::BI__builtin_neon_vdupd_laneq_i64:
12460 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int64Ty, 2));
12463 case NEON::BI__builtin_neon_vget_lane_f32:
12465 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 2));
12468 case NEON::BI__builtin_neon_vget_lane_f64:
12470 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 1));
12473 case NEON::BI__builtin_neon_vgetq_lane_f32:
12474 case NEON::BI__builtin_neon_vdups_laneq_f32:
12476 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 4));
12479 case NEON::BI__builtin_neon_vgetq_lane_f64:
12480 case NEON::BI__builtin_neon_vdupd_laneq_f64:
12482 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 2));
12485 case NEON::BI__builtin_neon_vaddh_f16:
12487 return Builder.CreateFAdd(Ops[0], Ops[1],
"vaddh");
12488 case NEON::BI__builtin_neon_vsubh_f16:
12490 return Builder.CreateFSub(Ops[0], Ops[1],
"vsubh");
12491 case NEON::BI__builtin_neon_vmulh_f16:
12493 return Builder.CreateFMul(Ops[0], Ops[1],
"vmulh");
12494 case NEON::BI__builtin_neon_vdivh_f16:
12496 return Builder.CreateFDiv(Ops[0], Ops[1],
"vdivh");
12497 case NEON::BI__builtin_neon_vfmah_f16:
12500 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
HalfTy,
12502 case NEON::BI__builtin_neon_vfmsh_f16: {
12507 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
HalfTy,
12510 case NEON::BI__builtin_neon_vaddd_s64:
12511 case NEON::BI__builtin_neon_vaddd_u64:
12513 case NEON::BI__builtin_neon_vsubd_s64:
12514 case NEON::BI__builtin_neon_vsubd_u64:
12516 case NEON::BI__builtin_neon_vqdmlalh_s16:
12517 case NEON::BI__builtin_neon_vqdmlslh_s16: {
12521 auto *VTy = llvm::FixedVectorType::get(
Int32Ty, 4);
12523 ProductOps,
"vqdmlXl");
12524 Constant *CI = ConstantInt::get(
SizeTy, 0);
12525 Ops[1] =
Builder.CreateExtractElement(Ops[1], CI,
"lane0");
12527 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
12528 ? Intrinsic::aarch64_neon_sqadd
12529 : Intrinsic::aarch64_neon_sqsub;
12532 case NEON::BI__builtin_neon_vqshlud_n_s64: {
12538 case NEON::BI__builtin_neon_vqshld_n_u64:
12539 case NEON::BI__builtin_neon_vqshld_n_s64: {
12540 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
12541 ? Intrinsic::aarch64_neon_uqshl
12542 : Intrinsic::aarch64_neon_sqshl;
12547 case NEON::BI__builtin_neon_vrshrd_n_u64:
12548 case NEON::BI__builtin_neon_vrshrd_n_s64: {
12549 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
12550 ? Intrinsic::aarch64_neon_urshl
12551 : Intrinsic::aarch64_neon_srshl;
12553 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
12554 Ops[1] = ConstantInt::get(
Int64Ty, -SV);
12557 case NEON::BI__builtin_neon_vrsrad_n_u64:
12558 case NEON::BI__builtin_neon_vrsrad_n_s64: {
12559 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
12560 ? Intrinsic::aarch64_neon_urshl
12561 : Intrinsic::aarch64_neon_srshl;
12565 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
12568 case NEON::BI__builtin_neon_vshld_n_s64:
12569 case NEON::BI__builtin_neon_vshld_n_u64: {
12570 llvm::ConstantInt *Amt = cast<ConstantInt>(
EmitScalarExpr(
E->getArg(1)));
12572 Ops[0], ConstantInt::get(
Int64Ty, Amt->getZExtValue()),
"shld_n");
12574 case NEON::BI__builtin_neon_vshrd_n_s64: {
12575 llvm::ConstantInt *Amt = cast<ConstantInt>(
EmitScalarExpr(
E->getArg(1)));
12577 Ops[0], ConstantInt::get(
Int64Ty, std::min(
static_cast<uint64_t>(63),
12578 Amt->getZExtValue())),
12581 case NEON::BI__builtin_neon_vshrd_n_u64: {
12582 llvm::ConstantInt *Amt = cast<ConstantInt>(
EmitScalarExpr(
E->getArg(1)));
12583 uint64_t ShiftAmt = Amt->getZExtValue();
12585 if (ShiftAmt == 64)
12586 return ConstantInt::get(
Int64Ty, 0);
12587 return Builder.CreateLShr(Ops[0], ConstantInt::get(
Int64Ty, ShiftAmt),
12590 case NEON::BI__builtin_neon_vsrad_n_s64: {
12591 llvm::ConstantInt *Amt = cast<ConstantInt>(
EmitScalarExpr(
E->getArg(2)));
12593 Ops[1], ConstantInt::get(
Int64Ty, std::min(
static_cast<uint64_t>(63),
12594 Amt->getZExtValue())),
12596 return Builder.CreateAdd(Ops[0], Ops[1]);
12598 case NEON::BI__builtin_neon_vsrad_n_u64: {
12599 llvm::ConstantInt *Amt = cast<ConstantInt>(
EmitScalarExpr(
E->getArg(2)));
12600 uint64_t ShiftAmt = Amt->getZExtValue();
12603 if (ShiftAmt == 64)
12605 Ops[1] =
Builder.CreateLShr(Ops[1], ConstantInt::get(
Int64Ty, ShiftAmt),
12607 return Builder.CreateAdd(Ops[0], Ops[1]);
12609 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
12610 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
12611 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
12612 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
12618 auto *VTy = llvm::FixedVectorType::get(
Int32Ty, 4);
12620 ProductOps,
"vqdmlXl");
12621 Constant *CI = ConstantInt::get(
SizeTy, 0);
12622 Ops[1] =
Builder.CreateExtractElement(Ops[1], CI,
"lane0");
12625 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
12626 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
12627 ? Intrinsic::aarch64_neon_sqadd
12628 : Intrinsic::aarch64_neon_sqsub;
12631 case NEON::BI__builtin_neon_vqdmlals_s32:
12632 case NEON::BI__builtin_neon_vqdmlsls_s32: {
12634 ProductOps.push_back(Ops[1]);
12638 ProductOps,
"vqdmlXl");
12640 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
12641 ? Intrinsic::aarch64_neon_sqadd
12642 : Intrinsic::aarch64_neon_sqsub;
12645 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
12646 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
12647 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
12648 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
12652 ProductOps.push_back(Ops[1]);
12653 ProductOps.push_back(Ops[2]);
12656 ProductOps,
"vqdmlXl");
12659 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
12660 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
12661 ? Intrinsic::aarch64_neon_sqadd
12662 : Intrinsic::aarch64_neon_sqsub;
12665 case NEON::BI__builtin_neon_vget_lane_bf16:
12666 case NEON::BI__builtin_neon_vduph_lane_bf16:
12667 case NEON::BI__builtin_neon_vduph_lane_f16: {
12671 case NEON::BI__builtin_neon_vgetq_lane_bf16:
12672 case NEON::BI__builtin_neon_vduph_laneq_bf16:
12673 case NEON::BI__builtin_neon_vduph_laneq_f16: {
12677 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
12678 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
12679 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
12680 return Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
12682 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
12684 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
12685 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
12686 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
12687 llvm::Value *Trunc =
12688 Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
12689 return Builder.CreateShuffleVector(
12690 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
12692 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
12694 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
12696 std::iota(LoMask.begin(), LoMask.end(), 0);
12697 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
12698 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
12699 llvm::Type *V8BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 8);
12700 llvm::Value *Inactive =
Builder.CreateShuffleVector(
12701 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
12702 llvm::Value *Trunc =
12703 Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
12704 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
12707 case clang::AArch64::BI_InterlockedAdd:
12708 case clang::AArch64::BI_InterlockedAdd64: {
12711 AtomicRMWInst *RMWI =
12713 llvm::AtomicOrdering::SequentiallyConsistent);
12714 return Builder.CreateAdd(RMWI, Val);
12719 llvm::Type *Ty = VTy;
12730 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
12731 Builtin->NameHint, Builtin->TypeModifier,
E, Ops,
12738 switch (BuiltinID) {
12739 default:
return nullptr;
12740 case NEON::BI__builtin_neon_vbsl_v:
12741 case NEON::BI__builtin_neon_vbslq_v: {
12742 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
12743 Ops[0] =
Builder.CreateBitCast(Ops[0], BitTy,
"vbsl");
12744 Ops[1] =
Builder.CreateBitCast(Ops[1], BitTy,
"vbsl");
12745 Ops[2] =
Builder.CreateBitCast(Ops[2], BitTy,
"vbsl");
12747 Ops[1] =
Builder.CreateAnd(Ops[0], Ops[1],
"vbsl");
12748 Ops[2] =
Builder.CreateAnd(
Builder.CreateNot(Ops[0]), Ops[2],
"vbsl");
12749 Ops[0] =
Builder.CreateOr(Ops[1], Ops[2],
"vbsl");
12750 return Builder.CreateBitCast(Ops[0], Ty);
12752 case NEON::BI__builtin_neon_vfma_lane_v:
12753 case NEON::BI__builtin_neon_vfmaq_lane_v: {
12756 Value *Addend = Ops[0];
12757 Value *Multiplicand = Ops[1];
12758 Value *LaneSource = Ops[2];
12759 Ops[0] = Multiplicand;
12760 Ops[1] = LaneSource;
12764 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
12765 ? llvm::FixedVectorType::get(VTy->getElementType(),
12766 VTy->getNumElements() / 2)
12768 llvm::Constant *cst = cast<Constant>(Ops[3]);
12769 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
12770 Ops[1] =
Builder.CreateBitCast(Ops[1], SourceTy);
12771 Ops[1] =
Builder.CreateShuffleVector(Ops[1], Ops[1], SV,
"lane");
12774 Int =
Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12778 case NEON::BI__builtin_neon_vfma_laneq_v: {
12779 auto *VTy = cast<llvm::FixedVectorType>(Ty);
12781 if (VTy && VTy->getElementType() ==
DoubleTy) {
12784 llvm::FixedVectorType *VTy =
12786 Ops[2] =
Builder.CreateBitCast(Ops[2], VTy);
12787 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"extract");
12790 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
12791 DoubleTy, {Ops[1], Ops[2], Ops[0]});
12794 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
12795 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
12797 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
12798 VTy->getNumElements() * 2);
12799 Ops[2] =
Builder.CreateBitCast(Ops[2], STy);
12800 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
12801 cast<ConstantInt>(Ops[3]));
12802 Ops[2] =
Builder.CreateShuffleVector(Ops[2], Ops[2], SV,
"lane");
12805 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12806 {Ops[2], Ops[1], Ops[0]});
12808 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
12809 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
12810 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
12812 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
12815 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12816 {Ops[2], Ops[1], Ops[0]});
12818 case NEON::BI__builtin_neon_vfmah_lane_f16:
12819 case NEON::BI__builtin_neon_vfmas_lane_f32:
12820 case NEON::BI__builtin_neon_vfmah_laneq_f16:
12821 case NEON::BI__builtin_neon_vfmas_laneq_f32:
12822 case NEON::BI__builtin_neon_vfmad_lane_f64:
12823 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
12826 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"extract");
12828 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12829 {Ops[1], Ops[2], Ops[0]});
12831 case NEON::BI__builtin_neon_vmull_v:
12833 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
12834 if (
Type.isPoly())
Int = Intrinsic::aarch64_neon_pmull;
12836 case NEON::BI__builtin_neon_vmax_v:
12837 case NEON::BI__builtin_neon_vmaxq_v:
12839 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
12840 if (Ty->isFPOrFPVectorTy())
Int = Intrinsic::aarch64_neon_fmax;
12842 case NEON::BI__builtin_neon_vmaxh_f16: {
12844 Int = Intrinsic::aarch64_neon_fmax;
12847 case NEON::BI__builtin_neon_vmin_v:
12848 case NEON::BI__builtin_neon_vminq_v:
12850 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
12851 if (Ty->isFPOrFPVectorTy())
Int = Intrinsic::aarch64_neon_fmin;
12853 case NEON::BI__builtin_neon_vminh_f16: {
12855 Int = Intrinsic::aarch64_neon_fmin;
12858 case NEON::BI__builtin_neon_vabd_v:
12859 case NEON::BI__builtin_neon_vabdq_v:
12861 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
12862 if (Ty->isFPOrFPVectorTy())
Int = Intrinsic::aarch64_neon_fabd;
12864 case NEON::BI__builtin_neon_vpadal_v:
12865 case NEON::BI__builtin_neon_vpadalq_v: {
12866 unsigned ArgElts = VTy->getNumElements();
12867 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
12868 unsigned BitWidth = EltTy->getBitWidth();
12869 auto *ArgTy = llvm::FixedVectorType::get(
12870 llvm::IntegerType::get(
getLLVMContext(), BitWidth / 2), 2 * ArgElts);
12871 llvm::Type* Tys[2] = { VTy, ArgTy };
12872 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
12874 TmpOps.push_back(Ops[1]);
12877 llvm::Value *addend =
Builder.CreateBitCast(Ops[0], tmp->getType());
12878 return Builder.CreateAdd(tmp, addend);
12880 case NEON::BI__builtin_neon_vpmin_v:
12881 case NEON::BI__builtin_neon_vpminq_v:
12883 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
12884 if (Ty->isFPOrFPVectorTy())
Int = Intrinsic::aarch64_neon_fminp;
12886 case NEON::BI__builtin_neon_vpmax_v:
12887 case NEON::BI__builtin_neon_vpmaxq_v:
12889 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
12890 if (Ty->isFPOrFPVectorTy())
Int = Intrinsic::aarch64_neon_fmaxp;
12892 case NEON::BI__builtin_neon_vminnm_v:
12893 case NEON::BI__builtin_neon_vminnmq_v:
12894 Int = Intrinsic::aarch64_neon_fminnm;
12896 case NEON::BI__builtin_neon_vminnmh_f16:
12898 Int = Intrinsic::aarch64_neon_fminnm;
12900 case NEON::BI__builtin_neon_vmaxnm_v:
12901 case NEON::BI__builtin_neon_vmaxnmq_v:
12902 Int = Intrinsic::aarch64_neon_fmaxnm;
12904 case NEON::BI__builtin_neon_vmaxnmh_f16:
12906 Int = Intrinsic::aarch64_neon_fmaxnm;
12908 case NEON::BI__builtin_neon_vrecpss_f32: {
12913 case NEON::BI__builtin_neon_vrecpsd_f64:
12917 case NEON::BI__builtin_neon_vrecpsh_f16:
12921 case NEON::BI__builtin_neon_vqshrun_n_v:
12922 Int = Intrinsic::aarch64_neon_sqshrun;
12924 case NEON::BI__builtin_neon_vqrshrun_n_v:
12925 Int = Intrinsic::aarch64_neon_sqrshrun;
12927 case NEON::BI__builtin_neon_vqshrn_n_v:
12928 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
12930 case NEON::BI__builtin_neon_vrshrn_n_v:
12931 Int = Intrinsic::aarch64_neon_rshrn;
12933 case NEON::BI__builtin_neon_vqrshrn_n_v:
12934 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
12936 case NEON::BI__builtin_neon_vrndah_f16: {
12939 ? Intrinsic::experimental_constrained_round
12940 : Intrinsic::round;
12943 case NEON::BI__builtin_neon_vrnda_v:
12944 case NEON::BI__builtin_neon_vrndaq_v: {
12946 ? Intrinsic::experimental_constrained_round
12947 : Intrinsic::round;
12950 case NEON::BI__builtin_neon_vrndih_f16: {
12953 ? Intrinsic::experimental_constrained_nearbyint
12954 : Intrinsic::nearbyint;
12957 case NEON::BI__builtin_neon_vrndmh_f16: {
12960 ? Intrinsic::experimental_constrained_floor
12961 : Intrinsic::floor;
12964 case NEON::BI__builtin_neon_vrndm_v:
12965 case NEON::BI__builtin_neon_vrndmq_v: {
12967 ? Intrinsic::experimental_constrained_floor
12968 : Intrinsic::floor;
12971 case NEON::BI__builtin_neon_vrndnh_f16: {
12974 ? Intrinsic::experimental_constrained_roundeven
12975 : Intrinsic::roundeven;
12978 case NEON::BI__builtin_neon_vrndn_v:
12979 case NEON::BI__builtin_neon_vrndnq_v: {
12981 ? Intrinsic::experimental_constrained_roundeven
12982 : Intrinsic::roundeven;
12985 case NEON::BI__builtin_neon_vrndns_f32: {
12988 ? Intrinsic::experimental_constrained_roundeven
12989 : Intrinsic::roundeven;
12992 case NEON::BI__builtin_neon_vrndph_f16: {
12995 ? Intrinsic::experimental_constrained_ceil
12999 case NEON::BI__builtin_neon_vrndp_v:
13000 case NEON::BI__builtin_neon_vrndpq_v: {
13002 ? Intrinsic::experimental_constrained_ceil
13006 case NEON::BI__builtin_neon_vrndxh_f16: {
13009 ? Intrinsic::experimental_constrained_rint
13013 case NEON::BI__builtin_neon_vrndx_v:
13014 case NEON::BI__builtin_neon_vrndxq_v: {
13016 ? Intrinsic::experimental_constrained_rint
13020 case NEON::BI__builtin_neon_vrndh_f16: {
13023 ? Intrinsic::experimental_constrained_trunc
13024 : Intrinsic::trunc;
13027 case NEON::BI__builtin_neon_vrnd32x_f32:
13028 case NEON::BI__builtin_neon_vrnd32xq_f32:
13029 case NEON::BI__builtin_neon_vrnd32x_f64:
13030 case NEON::BI__builtin_neon_vrnd32xq_f64: {
13032 Int = Intrinsic::aarch64_neon_frint32x;
13035 case NEON::BI__builtin_neon_vrnd32z_f32:
13036 case NEON::BI__builtin_neon_vrnd32zq_f32:
13037 case NEON::BI__builtin_neon_vrnd32z_f64:
13038 case NEON::BI__builtin_neon_vrnd32zq_f64: {
13040 Int = Intrinsic::aarch64_neon_frint32z;
13043 case NEON::BI__builtin_neon_vrnd64x_f32:
13044 case NEON::BI__builtin_neon_vrnd64xq_f32:
13045 case NEON::BI__builtin_neon_vrnd64x_f64:
13046 case NEON::BI__builtin_neon_vrnd64xq_f64: {
13048 Int = Intrinsic::aarch64_neon_frint64x;
13051 case NEON::BI__builtin_neon_vrnd64z_f32:
13052 case NEON::BI__builtin_neon_vrnd64zq_f32:
13053 case NEON::BI__builtin_neon_vrnd64z_f64:
13054 case NEON::BI__builtin_neon_vrnd64zq_f64: {
13056 Int = Intrinsic::aarch64_neon_frint64z;
13059 case NEON::BI__builtin_neon_vrnd_v:
13060 case NEON::BI__builtin_neon_vrndq_v: {
13062 ? Intrinsic::experimental_constrained_trunc
13063 : Intrinsic::trunc;
13066 case NEON::BI__builtin_neon_vcvt_f64_v:
13067 case NEON::BI__builtin_neon_vcvtq_f64_v:
13068 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
13070 return usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
13071 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
13072 case NEON::BI__builtin_neon_vcvt_f64_f32: {
13074 "unexpected vcvt_f64_f32 builtin");
13078 return Builder.CreateFPExt(Ops[0], Ty,
"vcvt");
13080 case NEON::BI__builtin_neon_vcvt_f32_f64: {
13082 "unexpected vcvt_f32_f64 builtin");
13086 return Builder.CreateFPTrunc(Ops[0], Ty,
"vcvt");
13088 case NEON::BI__builtin_neon_vcvt_s32_v:
13089 case NEON::BI__builtin_neon_vcvt_u32_v:
13090 case NEON::BI__builtin_neon_vcvt_s64_v:
13091 case NEON::BI__builtin_neon_vcvt_u64_v:
13092 case NEON::BI__builtin_neon_vcvt_s16_f16:
13093 case NEON::BI__builtin_neon_vcvt_u16_f16:
13094 case NEON::BI__builtin_neon_vcvtq_s32_v:
13095 case NEON::BI__builtin_neon_vcvtq_u32_v:
13096 case NEON::BI__builtin_neon_vcvtq_s64_v:
13097 case NEON::BI__builtin_neon_vcvtq_u64_v:
13098 case NEON::BI__builtin_neon_vcvtq_s16_f16:
13099 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
13101 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
13105 case NEON::BI__builtin_neon_vcvta_s16_f16:
13106 case NEON::BI__builtin_neon_vcvta_u16_f16:
13107 case NEON::BI__builtin_neon_vcvta_s32_v:
13108 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
13109 case NEON::BI__builtin_neon_vcvtaq_s32_v:
13110 case NEON::BI__builtin_neon_vcvta_u32_v:
13111 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
13112 case NEON::BI__builtin_neon_vcvtaq_u32_v:
13113 case NEON::BI__builtin_neon_vcvta_s64_v:
13114 case NEON::BI__builtin_neon_vcvtaq_s64_v:
13115 case NEON::BI__builtin_neon_vcvta_u64_v:
13116 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
13117 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
13121 case NEON::BI__builtin_neon_vcvtm_s16_f16:
13122 case NEON::BI__builtin_neon_vcvtm_s32_v:
13123 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
13124 case NEON::BI__builtin_neon_vcvtmq_s32_v:
13125 case NEON::BI__builtin_neon_vcvtm_u16_f16:
13126 case NEON::BI__builtin_neon_vcvtm_u32_v:
13127 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
13128 case NEON::BI__builtin_neon_vcvtmq_u32_v:
13129 case NEON::BI__builtin_neon_vcvtm_s64_v:
13130 case NEON::BI__builtin_neon_vcvtmq_s64_v:
13131 case NEON::BI__builtin_neon_vcvtm_u64_v:
13132 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
13133 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
13137 case NEON::BI__builtin_neon_vcvtn_s16_f16:
13138 case NEON::BI__builtin_neon_vcvtn_s32_v:
13139 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
13140 case NEON::BI__builtin_neon_vcvtnq_s32_v:
13141 case NEON::BI__builtin_neon_vcvtn_u16_f16:
13142 case NEON::BI__builtin_neon_vcvtn_u32_v:
13143 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
13144 case NEON::BI__builtin_neon_vcvtnq_u32_v:
13145 case NEON::BI__builtin_neon_vcvtn_s64_v:
13146 case NEON::BI__builtin_neon_vcvtnq_s64_v:
13147 case NEON::BI__builtin_neon_vcvtn_u64_v:
13148 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
13149 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
13153 case NEON::BI__builtin_neon_vcvtp_s16_f16:
13154 case NEON::BI__builtin_neon_vcvtp_s32_v:
13155 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
13156 case NEON::BI__builtin_neon_vcvtpq_s32_v:
13157 case NEON::BI__builtin_neon_vcvtp_u16_f16:
13158 case NEON::BI__builtin_neon_vcvtp_u32_v:
13159 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
13160 case NEON::BI__builtin_neon_vcvtpq_u32_v:
13161 case NEON::BI__builtin_neon_vcvtp_s64_v:
13162 case NEON::BI__builtin_neon_vcvtpq_s64_v:
13163 case NEON::BI__builtin_neon_vcvtp_u64_v:
13164 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
13165 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
13169 case NEON::BI__builtin_neon_vmulx_v:
13170 case NEON::BI__builtin_neon_vmulxq_v: {
13171 Int = Intrinsic::aarch64_neon_fmulx;
13174 case NEON::BI__builtin_neon_vmulxh_lane_f16:
13175 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
13179 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2],
"extract");
13181 Int = Intrinsic::aarch64_neon_fmulx;
13184 case NEON::BI__builtin_neon_vmul_lane_v:
13185 case NEON::BI__builtin_neon_vmul_laneq_v: {
13188 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
13191 llvm::FixedVectorType *VTy =
13193 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
13194 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2],
"extract");
13198 case NEON::BI__builtin_neon_vnegd_s64:
13200 case NEON::BI__builtin_neon_vnegh_f16:
13202 case NEON::BI__builtin_neon_vpmaxnm_v:
13203 case NEON::BI__builtin_neon_vpmaxnmq_v: {
13204 Int = Intrinsic::aarch64_neon_fmaxnmp;
13207 case NEON::BI__builtin_neon_vpminnm_v:
13208 case NEON::BI__builtin_neon_vpminnmq_v: {
13209 Int = Intrinsic::aarch64_neon_fminnmp;
13212 case NEON::BI__builtin_neon_vsqrth_f16: {
13215 ? Intrinsic::experimental_constrained_sqrt
13219 case NEON::BI__builtin_neon_vsqrt_v:
13220 case NEON::BI__builtin_neon_vsqrtq_v: {
13222 ? Intrinsic::experimental_constrained_sqrt
13224 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
13227 case NEON::BI__builtin_neon_vrbit_v:
13228 case NEON::BI__builtin_neon_vrbitq_v: {
13229 Int = Intrinsic::bitreverse;
13232 case NEON::BI__builtin_neon_vaddv_u8:
13236 case NEON::BI__builtin_neon_vaddv_s8: {
13237 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13239 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
13240 llvm::Type *Tys[2] = { Ty, VTy };
13245 case NEON::BI__builtin_neon_vaddv_u16:
13248 case NEON::BI__builtin_neon_vaddv_s16: {
13249 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13251 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
13252 llvm::Type *Tys[2] = { Ty, VTy };
13257 case NEON::BI__builtin_neon_vaddvq_u8:
13260 case NEON::BI__builtin_neon_vaddvq_s8: {
13261 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13263 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
13264 llvm::Type *Tys[2] = { Ty, VTy };
13269 case NEON::BI__builtin_neon_vaddvq_u16:
13272 case NEON::BI__builtin_neon_vaddvq_s16: {
13273 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13275 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
13276 llvm::Type *Tys[2] = { Ty, VTy };
13281 case NEON::BI__builtin_neon_vmaxv_u8: {
13282 Int = Intrinsic::aarch64_neon_umaxv;
13284 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
13285 llvm::Type *Tys[2] = { Ty, VTy };
13290 case NEON::BI__builtin_neon_vmaxv_u16: {
13291 Int = Intrinsic::aarch64_neon_umaxv;
13293 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
13294 llvm::Type *Tys[2] = { Ty, VTy };
13299 case NEON::BI__builtin_neon_vmaxvq_u8: {
13300 Int = Intrinsic::aarch64_neon_umaxv;
13302 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
13303 llvm::Type *Tys[2] = { Ty, VTy };
13308 case NEON::BI__builtin_neon_vmaxvq_u16: {
13309 Int = Intrinsic::aarch64_neon_umaxv;
13311 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
13312 llvm::Type *Tys[2] = { Ty, VTy };
13317 case NEON::BI__builtin_neon_vmaxv_s8: {
13318 Int = Intrinsic::aarch64_neon_smaxv;
13320 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
13321 llvm::Type *Tys[2] = { Ty, VTy };
13326 case NEON::BI__builtin_neon_vmaxv_s16: {
13327 Int = Intrinsic::aarch64_neon_smaxv;
13329 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
13330 llvm::Type *Tys[2] = { Ty, VTy };
13335 case NEON::BI__builtin_neon_vmaxvq_s8: {
13336 Int = Intrinsic::aarch64_neon_smaxv;
13338 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
13339 llvm::Type *Tys[2] = { Ty, VTy };
13344 case NEON::BI__builtin_neon_vmaxvq_s16: {
13345 Int = Intrinsic::aarch64_neon_smaxv;
13347 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
13348 llvm::Type *Tys[2] = { Ty, VTy };
13353 case NEON::BI__builtin_neon_vmaxv_f16: {
13354 Int = Intrinsic::aarch64_neon_fmaxv;
13356 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
13357 llvm::Type *Tys[2] = { Ty, VTy };
13362 case NEON::BI__builtin_neon_vmaxvq_f16: {
13363 Int = Intrinsic::aarch64_neon_fmaxv;
13365 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
13366 llvm::Type *Tys[2] = { Ty, VTy };
13371 case NEON::BI__builtin_neon_vminv_u8: {
13372 Int = Intrinsic::aarch64_neon_uminv;
13374 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
13375 llvm::Type *Tys[2] = { Ty, VTy };
13380 case NEON::BI__builtin_neon_vminv_u16: {
13381 Int = Intrinsic::aarch64_neon_uminv;
13383 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
13384 llvm::Type *Tys[2] = { Ty, VTy };
13389 case NEON::BI__builtin_neon_vminvq_u8: {
13390 Int = Intrinsic::aarch64_neon_uminv;
13392 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
13393 llvm::Type *Tys[2] = { Ty, VTy };
13398 case NEON::BI__builtin_neon_vminvq_u16: {
13399 Int = Intrinsic::aarch64_neon_uminv;
13401 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
13402 llvm::Type *Tys[2] = { Ty, VTy };
13407 case NEON::BI__builtin_neon_vminv_s8: {
13408 Int = Intrinsic::aarch64_neon_sminv;
13410 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
13411 llvm::Type *Tys[2] = { Ty, VTy };
13416 case NEON::BI__builtin_neon_vminv_s16: {
13417 Int = Intrinsic::aarch64_neon_sminv;
13419 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
13420 llvm::Type *Tys[2] = { Ty, VTy };
13425 case NEON::BI__builtin_neon_vminvq_s8: {
13426 Int = Intrinsic::aarch64_neon_sminv;
13428 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
13429 llvm::Type *Tys[2] = { Ty, VTy };
13434 case NEON::BI__builtin_neon_vminvq_s16: {
13435 Int = Intrinsic::aarch64_neon_sminv;
13437 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
13438 llvm::Type *Tys[2] = { Ty, VTy };
13443 case NEON::BI__builtin_neon_vminv_f16: {
13444 Int = Intrinsic::aarch64_neon_fminv;
13446 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
13447 llvm::Type *Tys[2] = { Ty, VTy };
13452 case NEON::BI__builtin_neon_vminvq_f16: {
13453 Int = Intrinsic::aarch64_neon_fminv;
13455 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
13456 llvm::Type *Tys[2] = { Ty, VTy };
13461 case NEON::BI__builtin_neon_vmaxnmv_f16: {
13462 Int = Intrinsic::aarch64_neon_fmaxnmv;
13464 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
13465 llvm::Type *Tys[2] = { Ty, VTy };
13470 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
13471 Int = Intrinsic::aarch64_neon_fmaxnmv;
13473 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
13474 llvm::Type *Tys[2] = { Ty, VTy };
13479 case NEON::BI__builtin_neon_vminnmv_f16: {
13480 Int = Intrinsic::aarch64_neon_fminnmv;
13482 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
13483 llvm::Type *Tys[2] = { Ty, VTy };
13488 case NEON::BI__builtin_neon_vminnmvq_f16: {
13489 Int = Intrinsic::aarch64_neon_fminnmv;
13491 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
13492 llvm::Type *Tys[2] = { Ty, VTy };
13497 case NEON::BI__builtin_neon_vmul_n_f64: {
13500 return Builder.CreateFMul(Ops[0], RHS);
13502 case NEON::BI__builtin_neon_vaddlv_u8: {
13503 Int = Intrinsic::aarch64_neon_uaddlv;
13505 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
13506 llvm::Type *Tys[2] = { Ty, VTy };
13511 case NEON::BI__builtin_neon_vaddlv_u16: {
13512 Int = Intrinsic::aarch64_neon_uaddlv;
13514 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
13515 llvm::Type *Tys[2] = { Ty, VTy };
13519 case NEON::BI__builtin_neon_vaddlvq_u8: {
13520 Int = Intrinsic::aarch64_neon_uaddlv;
13522 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
13523 llvm::Type *Tys[2] = { Ty, VTy };
13528 case NEON::BI__builtin_neon_vaddlvq_u16: {
13529 Int = Intrinsic::aarch64_neon_uaddlv;
13531 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
13532 llvm::Type *Tys[2] = { Ty, VTy };
13536 case NEON::BI__builtin_neon_vaddlv_s8: {
13537 Int = Intrinsic::aarch64_neon_saddlv;
13539 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
13540 llvm::Type *Tys[2] = { Ty, VTy };
13545 case NEON::BI__builtin_neon_vaddlv_s16: {
13546 Int = Intrinsic::aarch64_neon_saddlv;
13548 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
13549 llvm::Type *Tys[2] = { Ty, VTy };
13553 case NEON::BI__builtin_neon_vaddlvq_s8: {
13554 Int = Intrinsic::aarch64_neon_saddlv;
13556 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
13557 llvm::Type *Tys[2] = { Ty, VTy };
13562 case NEON::BI__builtin_neon_vaddlvq_s16: {
13563 Int = Intrinsic::aarch64_neon_saddlv;
13565 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
13566 llvm::Type *Tys[2] = { Ty, VTy };
13570 case NEON::BI__builtin_neon_vsri_n_v:
13571 case NEON::BI__builtin_neon_vsriq_n_v: {
13572 Int = Intrinsic::aarch64_neon_vsri;
13576 case NEON::BI__builtin_neon_vsli_n_v:
13577 case NEON::BI__builtin_neon_vsliq_n_v: {
13578 Int = Intrinsic::aarch64_neon_vsli;
13582 case NEON::BI__builtin_neon_vsra_n_v:
13583 case NEON::BI__builtin_neon_vsraq_n_v:
13584 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
13586 return Builder.CreateAdd(Ops[0], Ops[1]);
13587 case NEON::BI__builtin_neon_vrsra_n_v:
13588 case NEON::BI__builtin_neon_vrsraq_n_v: {
13589 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
13591 TmpOps.push_back(Ops[1]);
13592 TmpOps.push_back(Ops[2]);
13594 llvm::Value *tmp =
EmitNeonCall(F, TmpOps,
"vrshr_n", 1,
true);
13595 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
13596 return Builder.CreateAdd(Ops[0], tmp);
13598 case NEON::BI__builtin_neon_vld1_v:
13599 case NEON::BI__builtin_neon_vld1q_v: {
13602 case NEON::BI__builtin_neon_vst1_v:
13603 case NEON::BI__builtin_neon_vst1q_v:
13604 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
13606 case NEON::BI__builtin_neon_vld1_lane_v:
13607 case NEON::BI__builtin_neon_vld1q_lane_v: {
13608 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
13611 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vld1_lane");
13613 case NEON::BI__builtin_neon_vldap1_lane_s64:
13614 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
13615 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
13617 VTy->getElementType(), Ops[0], PtrOp0.
getAlignment());
13618 LI->setAtomic(llvm::AtomicOrdering::Acquire);
13620 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vldap1_lane");
13622 case NEON::BI__builtin_neon_vld1_dup_v:
13623 case NEON::BI__builtin_neon_vld1q_dup_v: {
13624 Value *
V = PoisonValue::get(Ty);
13627 llvm::Constant *CI = ConstantInt::get(
Int32Ty, 0);
13628 Ops[0] =
Builder.CreateInsertElement(
V, Ops[0], CI);
13631 case NEON::BI__builtin_neon_vst1_lane_v:
13632 case NEON::BI__builtin_neon_vst1q_lane_v:
13633 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
13634 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
13636 case NEON::BI__builtin_neon_vstl1_lane_s64:
13637 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
13638 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
13639 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
13640 llvm::StoreInst *SI =
13642 SI->setAtomic(llvm::AtomicOrdering::Release);
13645 case NEON::BI__builtin_neon_vld2_v:
13646 case NEON::BI__builtin_neon_vld2q_v: {
13649 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld2");
13652 case NEON::BI__builtin_neon_vld3_v:
13653 case NEON::BI__builtin_neon_vld3q_v: {
13656 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld3");
13659 case NEON::BI__builtin_neon_vld4_v:
13660 case NEON::BI__builtin_neon_vld4q_v: {
13663 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld4");
13666 case NEON::BI__builtin_neon_vld2_dup_v:
13667 case NEON::BI__builtin_neon_vld2q_dup_v: {
13670 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld2");
13673 case NEON::BI__builtin_neon_vld3_dup_v:
13674 case NEON::BI__builtin_neon_vld3q_dup_v: {
13677 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld3");
13680 case NEON::BI__builtin_neon_vld4_dup_v:
13681 case NEON::BI__builtin_neon_vld4q_dup_v: {
13684 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld4");
13687 case NEON::BI__builtin_neon_vld2_lane_v:
13688 case NEON::BI__builtin_neon_vld2q_lane_v: {
13689 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13691 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13692 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
13693 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
13698 case NEON::BI__builtin_neon_vld3_lane_v:
13699 case NEON::BI__builtin_neon_vld3q_lane_v: {
13700 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13702 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13703 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
13704 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
13705 Ops[3] =
Builder.CreateBitCast(Ops[3], Ty);
13710 case NEON::BI__builtin_neon_vld4_lane_v:
13711 case NEON::BI__builtin_neon_vld4q_lane_v: {
13712 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13714 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13715 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
13716 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
13717 Ops[3] =
Builder.CreateBitCast(Ops[3], Ty);
13718 Ops[4] =
Builder.CreateBitCast(Ops[4], Ty);
13723 case NEON::BI__builtin_neon_vst2_v:
13724 case NEON::BI__builtin_neon_vst2q_v: {
13725 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13726 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
13730 case NEON::BI__builtin_neon_vst2_lane_v:
13731 case NEON::BI__builtin_neon_vst2q_lane_v: {
13732 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13734 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13738 case NEON::BI__builtin_neon_vst3_v:
13739 case NEON::BI__builtin_neon_vst3q_v: {
13740 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13741 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13745 case NEON::BI__builtin_neon_vst3_lane_v:
13746 case NEON::BI__builtin_neon_vst3q_lane_v: {
13747 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13749 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13753 case NEON::BI__builtin_neon_vst4_v:
13754 case NEON::BI__builtin_neon_vst4q_v: {
13755 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13756 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13760 case NEON::BI__builtin_neon_vst4_lane_v:
13761 case NEON::BI__builtin_neon_vst4q_lane_v: {
13762 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13764 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
13768 case NEON::BI__builtin_neon_vtrn_v:
13769 case NEON::BI__builtin_neon_vtrnq_v: {
13770 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
13771 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
13772 Value *SV =
nullptr;
13774 for (
unsigned vi = 0; vi != 2; ++vi) {
13776 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13777 Indices.push_back(i+vi);
13778 Indices.push_back(i+e+vi);
13780 Value *Addr =
Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13781 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vtrn");
13786 case NEON::BI__builtin_neon_vuzp_v:
13787 case NEON::BI__builtin_neon_vuzpq_v: {
13788 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
13789 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
13790 Value *SV =
nullptr;
13792 for (
unsigned vi = 0; vi != 2; ++vi) {
13794 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
13795 Indices.push_back(2*i+vi);
13797 Value *Addr =
Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13798 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vuzp");
13803 case NEON::BI__builtin_neon_vzip_v:
13804 case NEON::BI__builtin_neon_vzipq_v: {
13805 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
13806 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
13807 Value *SV =
nullptr;
13809 for (
unsigned vi = 0; vi != 2; ++vi) {
13811 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13812 Indices.push_back((i + vi*e) >> 1);
13813 Indices.push_back(((i + vi*e) >> 1)+e);
13815 Value *Addr =
Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13816 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vzip");
13821 case NEON::BI__builtin_neon_vqtbl1q_v: {
13825 case NEON::BI__builtin_neon_vqtbl2q_v: {
13829 case NEON::BI__builtin_neon_vqtbl3q_v: {
13833 case NEON::BI__builtin_neon_vqtbl4q_v: {
13837 case NEON::BI__builtin_neon_vqtbx1q_v: {
13841 case NEON::BI__builtin_neon_vqtbx2q_v: {
13845 case NEON::BI__builtin_neon_vqtbx3q_v: {
13849 case NEON::BI__builtin_neon_vqtbx4q_v: {
13853 case NEON::BI__builtin_neon_vsqadd_v:
13854 case NEON::BI__builtin_neon_vsqaddq_v: {
13855 Int = Intrinsic::aarch64_neon_usqadd;
13858 case NEON::BI__builtin_neon_vuqadd_v:
13859 case NEON::BI__builtin_neon_vuqaddq_v: {
13860 Int = Intrinsic::aarch64_neon_suqadd;
13864 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
13865 case NEON::BI__builtin_neon_vluti2_laneq_f16:
13866 case NEON::BI__builtin_neon_vluti2_laneq_p16:
13867 case NEON::BI__builtin_neon_vluti2_laneq_p8:
13868 case NEON::BI__builtin_neon_vluti2_laneq_s16:
13869 case NEON::BI__builtin_neon_vluti2_laneq_s8:
13870 case NEON::BI__builtin_neon_vluti2_laneq_u16:
13871 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
13872 Int = Intrinsic::aarch64_neon_vluti2_laneq;
13873 llvm::Type *Tys[2];
13879 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
13880 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
13881 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
13882 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
13883 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
13884 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
13885 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
13886 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
13887 Int = Intrinsic::aarch64_neon_vluti2_laneq;
13888 llvm::Type *Tys[2];
13894 case NEON::BI__builtin_neon_vluti2_lane_bf16:
13895 case NEON::BI__builtin_neon_vluti2_lane_f16:
13896 case NEON::BI__builtin_neon_vluti2_lane_p16:
13897 case NEON::BI__builtin_neon_vluti2_lane_p8:
13898 case NEON::BI__builtin_neon_vluti2_lane_s16:
13899 case NEON::BI__builtin_neon_vluti2_lane_s8:
13900 case NEON::BI__builtin_neon_vluti2_lane_u16:
13901 case NEON::BI__builtin_neon_vluti2_lane_u8: {
13902 Int = Intrinsic::aarch64_neon_vluti2_lane;
13903 llvm::Type *Tys[2];
13909 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
13910 case NEON::BI__builtin_neon_vluti2q_lane_f16:
13911 case NEON::BI__builtin_neon_vluti2q_lane_p16:
13912 case NEON::BI__builtin_neon_vluti2q_lane_p8:
13913 case NEON::BI__builtin_neon_vluti2q_lane_s16:
13914 case NEON::BI__builtin_neon_vluti2q_lane_s8:
13915 case NEON::BI__builtin_neon_vluti2q_lane_u16:
13916 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
13917 Int = Intrinsic::aarch64_neon_vluti2_lane;
13918 llvm::Type *Tys[2];
13924 case NEON::BI__builtin_neon_vluti4q_lane_p8:
13925 case NEON::BI__builtin_neon_vluti4q_lane_s8:
13926 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
13927 Int = Intrinsic::aarch64_neon_vluti4q_lane;
13930 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
13931 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
13932 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
13933 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
13936 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
13937 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
13938 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
13939 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
13940 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
13941 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
13944 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
13945 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
13946 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
13947 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
13948 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
13949 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
13953 case NEON::BI__builtin_neon_vamin_f16:
13954 case NEON::BI__builtin_neon_vaminq_f16:
13955 case NEON::BI__builtin_neon_vamin_f32:
13956 case NEON::BI__builtin_neon_vaminq_f32:
13957 case NEON::BI__builtin_neon_vaminq_f64: {
13958 Int = Intrinsic::aarch64_neon_famin;
13961 case NEON::BI__builtin_neon_vamax_f16:
13962 case NEON::BI__builtin_neon_vamaxq_f16:
13963 case NEON::BI__builtin_neon_vamax_f32:
13964 case NEON::BI__builtin_neon_vamaxq_f32:
13965 case NEON::BI__builtin_neon_vamaxq_f64: {
13966 Int = Intrinsic::aarch64_neon_famax;
13969 case NEON::BI__builtin_neon_vscale_f16:
13970 case NEON::BI__builtin_neon_vscaleq_f16:
13971 case NEON::BI__builtin_neon_vscale_f32:
13972 case NEON::BI__builtin_neon_vscaleq_f32:
13973 case NEON::BI__builtin_neon_vscaleq_f64: {
13974 Int = Intrinsic::aarch64_neon_fp8_fscale;
13982 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
13983 BuiltinID == BPF::BI__builtin_btf_type_id ||
13984 BuiltinID == BPF::BI__builtin_preserve_type_info ||
13985 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
13986 "unexpected BPF builtin");
13993 switch (BuiltinID) {
13995 llvm_unreachable(
"Unexpected BPF builtin");
13996 case BPF::BI__builtin_preserve_field_info: {
13997 const Expr *Arg =
E->getArg(0);
14002 "using __builtin_preserve_field_info() without -g");
14015 Value *InfoKind = ConstantInt::get(
Int64Ty,
C->getSExtValue());
14018 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getOrInsertDeclaration(
14019 &
CGM.
getModule(), llvm::Intrinsic::bpf_preserve_field_info,
14020 {FieldAddr->getType()});
14021 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
14023 case BPF::BI__builtin_btf_type_id:
14024 case BPF::BI__builtin_preserve_type_info: {
14030 const Expr *Arg0 =
E->getArg(0);
14035 Value *FlagValue = ConstantInt::get(
Int64Ty, Flag->getSExtValue());
14036 Value *SeqNumVal = ConstantInt::get(
Int32Ty, BuiltinSeqNum++);
14038 llvm::Function *FnDecl;
14039 if (BuiltinID == BPF::BI__builtin_btf_type_id)
14040 FnDecl = llvm::Intrinsic::getOrInsertDeclaration(
14041 &
CGM.
getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
14043 FnDecl = llvm::Intrinsic::getOrInsertDeclaration(
14044 &
CGM.
getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
14045 CallInst *
Fn =
Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
14046 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
14049 case BPF::BI__builtin_preserve_enum_value: {
14055 const Expr *Arg0 =
E->getArg(0);
14060 const auto *UO = cast<UnaryOperator>(Arg0->
IgnoreParens());
14061 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
14062 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
14063 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
14065 auto InitVal = Enumerator->getInitVal();
14066 std::string InitValStr;
14067 if (InitVal.isNegative() || InitVal >
uint64_t(INT64_MAX))
14068 InitValStr = std::to_string(InitVal.getSExtValue());
14070 InitValStr = std::to_string(InitVal.getZExtValue());
14071 std::string EnumStr = Enumerator->getNameAsString() +
":" + InitValStr;
14072 Value *EnumStrVal =
Builder.CreateGlobalString(EnumStr);
14075 Value *FlagValue = ConstantInt::get(
Int64Ty, Flag->getSExtValue());
14076 Value *SeqNumVal = ConstantInt::get(
Int32Ty, BuiltinSeqNum++);
14078 llvm::Function *IntrinsicFn = llvm::Intrinsic::getOrInsertDeclaration(
14079 &
CGM.
getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
14081 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
14082 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
14090 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
14091 "Not a power-of-two sized vector!");
14092 bool AllConstants =
true;
14093 for (
unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
14094 AllConstants &= isa<Constant>(Ops[i]);
14097 if (AllConstants) {
14099 for (
unsigned i = 0, e = Ops.size(); i != e; ++i)
14100 CstOps.push_back(cast<Constant>(Ops[i]));
14101 return llvm::ConstantVector::get(CstOps);
14106 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
14108 for (
unsigned i = 0, e = Ops.size(); i != e; ++i)
14116 unsigned NumElts) {
14118 auto *MaskTy = llvm::FixedVectorType::get(
14120 cast<IntegerType>(Mask->
getType())->getBitWidth());
14121 Value *MaskVec = CGF.
Builder.CreateBitCast(Mask, MaskTy);
14127 for (
unsigned i = 0; i != NumElts; ++i)
14129 MaskVec = CGF.
Builder.CreateShuffleVector(
14130 MaskVec, MaskVec,
ArrayRef(Indices, NumElts),
"extract");
14137 Value *Ptr = Ops[0];
14141 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
14143 return CGF.
Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
14148 llvm::Type *Ty = Ops[1]->getType();
14149 Value *Ptr = Ops[0];
14152 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
14154 return CGF.
Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
14159 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
14160 Value *Ptr = Ops[0];
14163 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
14165 llvm::Function *F = CGF.
CGM.
getIntrinsic(Intrinsic::masked_expandload,
14167 return CGF.
Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
14173 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
14177 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
14178 : Intrinsic::x86_avx512_mask_expand;
14180 return CGF.
Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
14185 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
14186 Value *Ptr = Ops[0];
14190 llvm::Function *F = CGF.
CGM.
getIntrinsic(Intrinsic::masked_compressstore,
14192 return CGF.
Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
14197 bool InvertLHS =
false) {
14198 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14203 LHS = CGF.
Builder.CreateNot(LHS);
14205 return CGF.
Builder.CreateBitCast(CGF.
Builder.CreateBinOp(Opc, LHS, RHS),
14206 Ops[0]->getType());
14210 Value *Amt,
bool IsRight) {
14211 llvm::Type *Ty = Op0->
getType();
14217 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
14218 Amt = CGF.
Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
14219 Amt = CGF.
Builder.CreateVectorSplat(NumElts, Amt);
14222 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
14224 return CGF.
Builder.CreateCall(F, {Op0, Op1, Amt});
14229 Value *Op0 = Ops[0];
14230 Value *Op1 = Ops[1];
14231 llvm::Type *Ty = Op0->
getType();
14232 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
14234 CmpInst::Predicate Pred;
14237 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
14240 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
14243 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
14246 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
14249 Pred = ICmpInst::ICMP_EQ;
14252 Pred = ICmpInst::ICMP_NE;
14255 return llvm::Constant::getNullValue(Ty);
14257 return llvm::Constant::getAllOnesValue(Ty);
14259 llvm_unreachable(
"Unexpected XOP vpcom/vpcomu predicate");
14271 if (
const auto *
C = dyn_cast<Constant>(Mask))
14272 if (
C->isAllOnesValue())
14276 CGF, Mask, cast<llvm::FixedVectorType>(Op0->
getType())->getNumElements());
14278 return CGF.
Builder.CreateSelect(Mask, Op0, Op1);
14284 if (
const auto *
C = dyn_cast<Constant>(Mask))
14285 if (
C->isAllOnesValue())
14288 auto *MaskTy = llvm::FixedVectorType::get(
14289 CGF.
Builder.getInt1Ty(), Mask->
getType()->getIntegerBitWidth());
14290 Mask = CGF.
Builder.CreateBitCast(Mask, MaskTy);
14291 Mask = CGF.
Builder.CreateExtractElement(Mask, (uint64_t)0);
14292 return CGF.
Builder.CreateSelect(Mask, Op0, Op1);
14296 unsigned NumElts,
Value *MaskIn) {
14298 const auto *
C = dyn_cast<Constant>(MaskIn);
14299 if (!
C || !
C->isAllOnesValue())
14305 for (
unsigned i = 0; i != NumElts; ++i)
14307 for (
unsigned i = NumElts; i != 8; ++i)
14308 Indices[i] = i % NumElts + NumElts;
14309 Cmp = CGF.
Builder.CreateShuffleVector(
14310 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
14313 return CGF.
Builder.CreateBitCast(Cmp,
14315 std::max(NumElts, 8U)));
14320 assert((Ops.size() == 2 || Ops.size() == 4) &&
14321 "Unexpected number of arguments");
14323 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14327 Cmp = Constant::getNullValue(
14328 llvm::FixedVectorType::get(CGF.
Builder.getInt1Ty(), NumElts));
14329 }
else if (CC == 7) {
14330 Cmp = Constant::getAllOnesValue(
14331 llvm::FixedVectorType::get(CGF.
Builder.getInt1Ty(), NumElts));
14333 ICmpInst::Predicate Pred;
14335 default: llvm_unreachable(
"Unknown condition code");
14336 case 0: Pred = ICmpInst::ICMP_EQ;
break;
14337 case 1: Pred =
Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
break;
14338 case 2: Pred =
Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
break;
14339 case 4: Pred = ICmpInst::ICMP_NE;
break;
14340 case 5: Pred =
Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
break;
14341 case 6: Pred =
Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
break;
14343 Cmp = CGF.
Builder.CreateICmp(Pred, Ops[0], Ops[1]);
14346 Value *MaskIn =
nullptr;
14347 if (Ops.size() == 4)
14354 Value *Zero = Constant::getNullValue(In->getType());
14360 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
14361 llvm::Type *Ty = Ops[1]->getType();
14365 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
14366 : Intrinsic::x86_avx512_uitofp_round;
14368 Res = CGF.
Builder.CreateCall(F, { Ops[0], Ops[3] });
14370 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF,
E);
14371 Res = IsSigned ? CGF.
Builder.CreateSIToFP(Ops[0], Ty)
14372 : CGF.
Builder.CreateUIToFP(Ops[0], Ty);
14383 bool Subtract =
false;
14384 Intrinsic::ID IID = Intrinsic::not_intrinsic;
14385 switch (BuiltinID) {
14387 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14390 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14391 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14392 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14393 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
14395 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14398 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14399 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14400 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14401 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
14403 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14406 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14407 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14408 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14409 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512;
break;
14410 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14413 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14414 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14415 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14416 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512;
break;
14417 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14420 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14421 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14422 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14423 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
14425 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14428 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14429 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14430 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14431 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
14433 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14436 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14437 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14438 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14439 IID = llvm::Intrinsic::x86_avx10_vfmaddph256;
14441 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14444 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14445 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14446 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14447 IID = llvm::Intrinsic::x86_avx10_vfmaddsubph256;
14449 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14452 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14453 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14454 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14455 IID = llvm::Intrinsic::x86_avx10_vfmaddps256;
14457 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14460 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14461 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14462 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14463 IID = llvm::Intrinsic::x86_avx10_vfmaddpd256;
14465 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14468 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14469 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14470 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14471 IID = llvm::Intrinsic::x86_avx10_vfmaddsubps256;
14473 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14476 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14477 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14478 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14479 IID = llvm::Intrinsic::x86_avx10_vfmaddsubpd256;
14493 if (IID != Intrinsic::not_intrinsic &&
14494 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
14497 Res = CGF.
Builder.CreateCall(Intr, {A, B,
C, Ops.back() });
14499 llvm::Type *Ty = A->
getType();
14501 if (CGF.
Builder.getIsFPConstrained()) {
14502 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF,
E);
14503 FMA = CGF.
CGM.
getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
14504 Res = CGF.
Builder.CreateConstrainedFPCall(FMA, {A, B,
C});
14507 Res = CGF.
Builder.CreateCall(FMA, {A, B,
C});
14512 Value *MaskFalseVal =
nullptr;
14513 switch (BuiltinID) {
14514 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14515 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14516 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14517 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14518 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14519 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14520 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14521 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14522 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14523 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14524 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14525 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14526 MaskFalseVal = Ops[0];
14528 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14529 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14530 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14531 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14532 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14533 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14534 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14535 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14536 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14537 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14538 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14539 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14540 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
14542 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14543 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14544 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14545 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14546 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14547 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14548 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14549 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14550 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14551 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14552 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14553 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14554 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14555 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14556 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14557 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14558 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14559 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14560 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14561 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14562 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14563 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14564 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14565 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14566 MaskFalseVal = Ops[2];
14578 bool ZeroMask =
false,
unsigned PTIdx = 0,
14579 bool NegAcc =
false) {
14581 if (Ops.size() > 4)
14582 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
14585 Ops[2] = CGF.
Builder.CreateFNeg(Ops[2]);
14587 Ops[0] = CGF.
Builder.CreateExtractElement(Ops[0], (uint64_t)0);
14588 Ops[1] = CGF.
Builder.CreateExtractElement(Ops[1], (uint64_t)0);
14589 Ops[2] = CGF.
Builder.CreateExtractElement(Ops[2], (uint64_t)0);
14594 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
14596 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
14599 IID = Intrinsic::x86_avx512_vfmadd_f32;
14602 IID = Intrinsic::x86_avx512_vfmadd_f64;
14605 llvm_unreachable(
"Unexpected size");
14608 {Ops[0], Ops[1], Ops[2], Ops[4]});
14609 }
else if (CGF.
Builder.getIsFPConstrained()) {
14610 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF,
E);
14612 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
14613 Res = CGF.
Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
14616 Res = CGF.
Builder.CreateCall(FMA, Ops.slice(0, 3));
14619 if (Ops.size() > 3) {
14620 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->
getType())
14626 if (NegAcc && PTIdx == 2)
14627 PassThru = CGF.
Builder.CreateExtractElement(Upper, (uint64_t)0);
14631 return CGF.
Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
14636 llvm::Type *Ty = Ops[0]->getType();
14638 Ty = llvm::FixedVectorType::get(CGF.
Int64Ty,
14639 Ty->getPrimitiveSizeInBits() / 64);
14645 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
14646 LHS = CGF.
Builder.CreateShl(LHS, ShiftAmt);
14647 LHS = CGF.
Builder.CreateAShr(LHS, ShiftAmt);
14648 RHS = CGF.
Builder.CreateShl(RHS, ShiftAmt);
14649 RHS = CGF.
Builder.CreateAShr(RHS, ShiftAmt);
14652 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
14653 LHS = CGF.
Builder.CreateAnd(LHS, Mask);
14654 RHS = CGF.
Builder.CreateAnd(RHS, Mask);
14657 return CGF.
Builder.CreateMul(LHS, RHS);
14665 llvm::Type *Ty = Ops[0]->getType();
14667 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
14668 unsigned EltWidth = Ty->getScalarSizeInBits();
14670 if (VecWidth == 128 && EltWidth == 32)
14671 IID = Intrinsic::x86_avx512_pternlog_d_128;
14672 else if (VecWidth == 256 && EltWidth == 32)
14673 IID = Intrinsic::x86_avx512_pternlog_d_256;
14674 else if (VecWidth == 512 && EltWidth == 32)
14675 IID = Intrinsic::x86_avx512_pternlog_d_512;
14676 else if (VecWidth == 128 && EltWidth == 64)
14677 IID = Intrinsic::x86_avx512_pternlog_q_128;
14678 else if (VecWidth == 256 && EltWidth == 64)
14679 IID = Intrinsic::x86_avx512_pternlog_q_256;
14680 else if (VecWidth == 512 && EltWidth == 64)
14681 IID = Intrinsic::x86_avx512_pternlog_q_512;
14683 llvm_unreachable(
"Unexpected intrinsic");
14687 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
14692 llvm::Type *DstTy) {
14693 unsigned NumberOfElements =
14694 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14696 return CGF.
Builder.CreateSExt(Mask, DstTy,
"vpmovm2");
14701 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
14702 return EmitX86CpuIs(CPUStr);
14708 llvm::Type *DstTy) {
14709 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
14710 "Unknown cvtph2ps intrinsic");
14713 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
14716 return CGF.
Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
14719 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14720 Value *Src = Ops[0];
14724 cast<llvm::FixedVectorType>(Src->
getType())->getNumElements()) {
14725 assert(NumDstElts == 4 &&
"Unexpected vector size");
14730 auto *HalfTy = llvm::FixedVectorType::get(
14732 Src = CGF.
Builder.CreateBitCast(Src, HalfTy);
14735 Value *Res = CGF.
Builder.CreateFPExt(Src, DstTy,
"cvtph2ps");
14737 if (Ops.size() >= 3)
14742Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
14753 llvm::ArrayType::get(
Int32Ty, 1));
14757 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(
true);
14763 std::tie(Index,
Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
14765 .Case(STRING, {0u,
static_cast<unsigned>(llvm::X86::ENUM)})
14767 .Case(
ALIAS, {1u,
static_cast<unsigned>(llvm::X86::ENUM)})
14769 .Case(STR, {1u,
static_cast<unsigned>(llvm::X86::ENUM)})
14771 .Case(
ALIAS, {2u,
static_cast<unsigned>(llvm::X86::ENUM)})
14773 .Case(STR, {2u,
static_cast<unsigned>(llvm::X86::ENUM)})
14774#include
"llvm/TargetParser/X86TargetParser.def"
14776 assert(
Value != 0 &&
"Invalid CPUStr passed to CpuIs");
14779 llvm::Value *Idxs[] = {ConstantInt::get(
Int32Ty, 0),
14780 ConstantInt::get(
Int32Ty, Index)};
14786 return Builder.CreateICmpEQ(CpuValue,
14792 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14793 if (!
getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14795 return EmitX86CpuSupports(FeatureStr);
14799 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
14803CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
14805 if (FeatureMask[0] != 0) {
14813 llvm::ArrayType::get(
Int32Ty, 1));
14817 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(
true);
14834 llvm::Type *ATy = llvm::ArrayType::get(
Int32Ty, 3);
14835 llvm::Constant *CpuFeatures2 =
14837 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(
true);
14838 for (
int i = 1; i != 4; ++i) {
14839 const uint32_t M = FeatureMask[i];
14856Value *CodeGenFunction::EmitAArch64CpuInit() {
14857 llvm::FunctionType *FTy = llvm::FunctionType::get(
VoidTy,
false);
14858 llvm::FunctionCallee
Func =
14860 cast<llvm::GlobalValue>(
Func.getCallee())->setDSOLocal(
true);
14861 cast<llvm::GlobalValue>(
Func.getCallee())
14862 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14867 llvm::FunctionType *FTy = llvm::FunctionType::get(
VoidTy, {
VoidPtrTy},
false);
14868 llvm::FunctionCallee
Func =
14870 auto *CalleeGV = cast<llvm::GlobalValue>(
Func.getCallee());
14871 CalleeGV->setDSOLocal(
true);
14872 CalleeGV->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14876Value *CodeGenFunction::EmitX86CpuInit() {
14877 llvm::FunctionType *FTy = llvm::FunctionType::get(
VoidTy,
14879 llvm::FunctionCallee
Func =
14881 cast<llvm::GlobalValue>(
Func.getCallee())->setDSOLocal(
true);
14882 cast<llvm::GlobalValue>(
Func.getCallee())
14883 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14887Value *CodeGenFunction::EmitAArch64CpuSupports(
const CallExpr *
E) {
14889 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
14891 ArgStr.split(Features,
"+");
14892 for (
auto &Feature : Features) {
14893 Feature = Feature.trim();
14894 if (!llvm::AArch64::parseFMVExtension(Feature))
14896 if (Feature !=
"default")
14897 Features.push_back(Feature);
14899 return EmitAArch64CpuSupports(Features);
14904 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
14906 if (FeaturesMask != 0) {
14911 llvm::Type *STy = llvm::StructType::get(
Int64Ty);
14912 llvm::Constant *AArch64CPUFeatures =
14914 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(
true);
14916 STy, AArch64CPUFeatures,
14931 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14932 if (!
getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14940 llvm::Type *Int32Ty = Builder.getInt32Ty();
14941 llvm::Type *Int64Ty = Builder.getInt64Ty();
14942 llvm::ArrayType *ArrayOfInt64Ty =
14943 llvm::ArrayType::get(Int64Ty, llvm::RISCVISAInfo::FeatureBitSize);
14944 llvm::Type *StructTy = llvm::StructType::get(Int32Ty, ArrayOfInt64Ty);
14945 llvm::Constant *RISCVFeaturesBits =
14947 cast<llvm::GlobalValue>(RISCVFeaturesBits)->setDSOLocal(
true);
14948 Value *IndexVal = llvm::ConstantInt::get(Int32Ty, Index);
14949 llvm::Value *GEPIndices[] = {Builder.getInt32(0), Builder.getInt32(1),
14952 Builder.CreateInBoundsGEP(StructTy, RISCVFeaturesBits, GEPIndices);
14953 Value *FeaturesBit =
14955 return FeaturesBit;
14959 const unsigned RISCVFeatureLength = llvm::RISCVISAInfo::FeatureBitSize;
14960 uint64_t RequireBitMasks[RISCVFeatureLength] = {0};
14962 for (
auto Feat : FeaturesStrs) {
14963 auto [GroupID, BitPos] = RISCVISAInfo::getRISCVFeaturesBitsInfo(Feat);
14970 RequireBitMasks[GroupID] |= (1ULL << BitPos);
14974 for (
unsigned Idx = 0; Idx < RISCVFeatureLength; Idx++) {
14975 if (RequireBitMasks[Idx] == 0)
14985 assert(
Result &&
"Should have value here.");
14992 if (BuiltinID == Builtin::BI__builtin_cpu_is)
14993 return EmitX86CpuIs(
E);
14994 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
14995 return EmitX86CpuSupports(
E);
14996 if (BuiltinID == Builtin::BI__builtin_cpu_init)
14997 return EmitX86CpuInit();
15005 bool IsMaskFCmp =
false;
15006 bool IsConjFMA =
false;
15009 unsigned ICEArguments = 0;
15014 for (
unsigned i = 0, e =
E->getNumArgs(); i != e; i++) {
15024 auto getCmpIntrinsicCall = [
this, &Ops](Intrinsic::ID
ID,
unsigned Imm) {
15025 Ops.push_back(llvm::ConstantInt::get(
Int8Ty, Imm));
15027 return Builder.CreateCall(F, Ops);
15035 auto getVectorFCmpIR = [
this, &Ops,
E](CmpInst::Predicate Pred,
15036 bool IsSignaling) {
15037 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*
this,
E);
15040 Cmp =
Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
15042 Cmp =
Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
15043 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
15044 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
15046 return Builder.CreateBitCast(Sext, FPVecTy);
15049 switch (BuiltinID) {
15050 default:
return nullptr;
15051 case X86::BI_mm_prefetch: {
15053 ConstantInt *
C = cast<ConstantInt>(Ops[1]);
15054 Value *RW = ConstantInt::get(
Int32Ty, (
C->getZExtValue() >> 2) & 0x1);
15055 Value *Locality = ConstantInt::get(
Int32Ty,
C->getZExtValue() & 0x3);
15060 case X86::BI_mm_clflush: {
15064 case X86::BI_mm_lfence: {
15067 case X86::BI_mm_mfence: {
15070 case X86::BI_mm_sfence: {
15073 case X86::BI_mm_pause: {
15076 case X86::BI__rdtsc: {
15079 case X86::BI__builtin_ia32_rdtscp: {
15085 case X86::BI__builtin_ia32_lzcnt_u16:
15086 case X86::BI__builtin_ia32_lzcnt_u32:
15087 case X86::BI__builtin_ia32_lzcnt_u64: {
15091 case X86::BI__builtin_ia32_tzcnt_u16:
15092 case X86::BI__builtin_ia32_tzcnt_u32:
15093 case X86::BI__builtin_ia32_tzcnt_u64: {
15097 case X86::BI__builtin_ia32_undef128:
15098 case X86::BI__builtin_ia32_undef256:
15099 case X86::BI__builtin_ia32_undef512:
15106 case X86::BI__builtin_ia32_vec_ext_v4hi:
15107 case X86::BI__builtin_ia32_vec_ext_v16qi:
15108 case X86::BI__builtin_ia32_vec_ext_v8hi:
15109 case X86::BI__builtin_ia32_vec_ext_v4si:
15110 case X86::BI__builtin_ia32_vec_ext_v4sf:
15111 case X86::BI__builtin_ia32_vec_ext_v2di:
15112 case X86::BI__builtin_ia32_vec_ext_v32qi:
15113 case X86::BI__builtin_ia32_vec_ext_v16hi:
15114 case X86::BI__builtin_ia32_vec_ext_v8si:
15115 case X86::BI__builtin_ia32_vec_ext_v4di: {
15117 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15118 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15119 Index &= NumElts - 1;
15122 return Builder.CreateExtractElement(Ops[0], Index);
15124 case X86::BI__builtin_ia32_vec_set_v4hi:
15125 case X86::BI__builtin_ia32_vec_set_v16qi:
15126 case X86::BI__builtin_ia32_vec_set_v8hi:
15127 case X86::BI__builtin_ia32_vec_set_v4si:
15128 case X86::BI__builtin_ia32_vec_set_v2di:
15129 case X86::BI__builtin_ia32_vec_set_v32qi:
15130 case X86::BI__builtin_ia32_vec_set_v16hi:
15131 case X86::BI__builtin_ia32_vec_set_v8si:
15132 case X86::BI__builtin_ia32_vec_set_v4di: {
15134 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15135 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15136 Index &= NumElts - 1;
15139 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
15141 case X86::BI_mm_setcsr:
15142 case X86::BI__builtin_ia32_ldmxcsr: {
15148 case X86::BI_mm_getcsr:
15149 case X86::BI__builtin_ia32_stmxcsr: {
15155 case X86::BI__builtin_ia32_xsave:
15156 case X86::BI__builtin_ia32_xsave64:
15157 case X86::BI__builtin_ia32_xrstor:
15158 case X86::BI__builtin_ia32_xrstor64:
15159 case X86::BI__builtin_ia32_xsaveopt:
15160 case X86::BI__builtin_ia32_xsaveopt64:
15161 case X86::BI__builtin_ia32_xrstors:
15162 case X86::BI__builtin_ia32_xrstors64:
15163 case X86::BI__builtin_ia32_xsavec:
15164 case X86::BI__builtin_ia32_xsavec64:
15165 case X86::BI__builtin_ia32_xsaves:
15166 case X86::BI__builtin_ia32_xsaves64:
15167 case X86::BI__builtin_ia32_xsetbv:
15168 case X86::BI_xsetbv: {
15170#define INTRINSIC_X86_XSAVE_ID(NAME) \
15171 case X86::BI__builtin_ia32_##NAME: \
15172 ID = Intrinsic::x86_##NAME; \
15174 switch (BuiltinID) {
15175 default: llvm_unreachable(
"Unsupported intrinsic!");
15189 case X86::BI_xsetbv:
15190 ID = Intrinsic::x86_xsetbv;
15193#undef INTRINSIC_X86_XSAVE_ID
15198 Ops.push_back(Mlo);
15201 case X86::BI__builtin_ia32_xgetbv:
15202 case X86::BI_xgetbv:
15204 case X86::BI__builtin_ia32_storedqudi128_mask:
15205 case X86::BI__builtin_ia32_storedqusi128_mask:
15206 case X86::BI__builtin_ia32_storedquhi128_mask:
15207 case X86::BI__builtin_ia32_storedquqi128_mask:
15208 case X86::BI__builtin_ia32_storeupd128_mask:
15209 case X86::BI__builtin_ia32_storeups128_mask:
15210 case X86::BI__builtin_ia32_storedqudi256_mask:
15211 case X86::BI__builtin_ia32_storedqusi256_mask:
15212 case X86::BI__builtin_ia32_storedquhi256_mask:
15213 case X86::BI__builtin_ia32_storedquqi256_mask:
15214 case X86::BI__builtin_ia32_storeupd256_mask:
15215 case X86::BI__builtin_ia32_storeups256_mask:
15216 case X86::BI__builtin_ia32_storedqudi512_mask:
15217 case X86::BI__builtin_ia32_storedqusi512_mask:
15218 case X86::BI__builtin_ia32_storedquhi512_mask:
15219 case X86::BI__builtin_ia32_storedquqi512_mask:
15220 case X86::BI__builtin_ia32_storeupd512_mask:
15221 case X86::BI__builtin_ia32_storeups512_mask:
15224 case X86::BI__builtin_ia32_storesbf16128_mask:
15225 case X86::BI__builtin_ia32_storesh128_mask:
15226 case X86::BI__builtin_ia32_storess128_mask:
15227 case X86::BI__builtin_ia32_storesd128_mask:
15230 case X86::BI__builtin_ia32_cvtmask2b128:
15231 case X86::BI__builtin_ia32_cvtmask2b256:
15232 case X86::BI__builtin_ia32_cvtmask2b512:
15233 case X86::BI__builtin_ia32_cvtmask2w128:
15234 case X86::BI__builtin_ia32_cvtmask2w256:
15235 case X86::BI__builtin_ia32_cvtmask2w512:
15236 case X86::BI__builtin_ia32_cvtmask2d128:
15237 case X86::BI__builtin_ia32_cvtmask2d256:
15238 case X86::BI__builtin_ia32_cvtmask2d512:
15239 case X86::BI__builtin_ia32_cvtmask2q128:
15240 case X86::BI__builtin_ia32_cvtmask2q256:
15241 case X86::BI__builtin_ia32_cvtmask2q512:
15244 case X86::BI__builtin_ia32_cvtb2mask128:
15245 case X86::BI__builtin_ia32_cvtb2mask256:
15246 case X86::BI__builtin_ia32_cvtb2mask512:
15247 case X86::BI__builtin_ia32_cvtw2mask128:
15248 case X86::BI__builtin_ia32_cvtw2mask256:
15249 case X86::BI__builtin_ia32_cvtw2mask512:
15250 case X86::BI__builtin_ia32_cvtd2mask128:
15251 case X86::BI__builtin_ia32_cvtd2mask256:
15252 case X86::BI__builtin_ia32_cvtd2mask512:
15253 case X86::BI__builtin_ia32_cvtq2mask128:
15254 case X86::BI__builtin_ia32_cvtq2mask256:
15255 case X86::BI__builtin_ia32_cvtq2mask512:
15258 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
15259 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
15260 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
15261 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
15262 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
15263 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
15264 case X86::BI__builtin_ia32_vcvtdq2ph256_round_mask:
15265 case X86::BI__builtin_ia32_vcvtdq2ps256_round_mask:
15266 case X86::BI__builtin_ia32_vcvtqq2pd256_round_mask:
15267 case X86::BI__builtin_ia32_vcvtqq2ph256_round_mask:
15268 case X86::BI__builtin_ia32_vcvtqq2ps256_round_mask:
15269 case X86::BI__builtin_ia32_vcvtw2ph256_round_mask:
15271 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
15272 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
15273 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
15274 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
15275 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
15276 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
15277 case X86::BI__builtin_ia32_vcvtudq2ph256_round_mask:
15278 case X86::BI__builtin_ia32_vcvtudq2ps256_round_mask:
15279 case X86::BI__builtin_ia32_vcvtuqq2pd256_round_mask:
15280 case X86::BI__builtin_ia32_vcvtuqq2ph256_round_mask:
15281 case X86::BI__builtin_ia32_vcvtuqq2ps256_round_mask:
15282 case X86::BI__builtin_ia32_vcvtuw2ph256_round_mask:
15285 case X86::BI__builtin_ia32_vfmaddss3:
15286 case X86::BI__builtin_ia32_vfmaddsd3:
15287 case X86::BI__builtin_ia32_vfmaddsh3_mask:
15288 case X86::BI__builtin_ia32_vfmaddss3_mask:
15289 case X86::BI__builtin_ia32_vfmaddsd3_mask:
15291 case X86::BI__builtin_ia32_vfmaddss:
15292 case X86::BI__builtin_ia32_vfmaddsd:
15294 Constant::getNullValue(Ops[0]->getType()));
15295 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
15296 case X86::BI__builtin_ia32_vfmaddss3_maskz:
15297 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
15299 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
15300 case X86::BI__builtin_ia32_vfmaddss3_mask3:
15301 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
15303 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
15304 case X86::BI__builtin_ia32_vfmsubss3_mask3:
15305 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
15308 case X86::BI__builtin_ia32_vfmaddph:
15309 case X86::BI__builtin_ia32_vfmaddps:
15310 case X86::BI__builtin_ia32_vfmaddpd:
15311 case X86::BI__builtin_ia32_vfmaddph256:
15312 case X86::BI__builtin_ia32_vfmaddps256:
15313 case X86::BI__builtin_ia32_vfmaddpd256:
15314 case X86::BI__builtin_ia32_vfmaddph512_mask:
15315 case X86::BI__builtin_ia32_vfmaddph512_maskz:
15316 case X86::BI__builtin_ia32_vfmaddph512_mask3:
15317 case X86::BI__builtin_ia32_vfmaddnepbh128:
15318 case X86::BI__builtin_ia32_vfmaddnepbh256:
15319 case X86::BI__builtin_ia32_vfmaddnepbh512:
15320 case X86::BI__builtin_ia32_vfmaddps512_mask:
15321 case X86::BI__builtin_ia32_vfmaddps512_maskz:
15322 case X86::BI__builtin_ia32_vfmaddps512_mask3:
15323 case X86::BI__builtin_ia32_vfmsubps512_mask3:
15324 case X86::BI__builtin_ia32_vfmaddpd512_mask:
15325 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
15326 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
15327 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
15328 case X86::BI__builtin_ia32_vfmsubph512_mask3:
15329 case X86::BI__builtin_ia32_vfmaddph256_round_mask:
15330 case X86::BI__builtin_ia32_vfmaddph256_round_maskz:
15331 case X86::BI__builtin_ia32_vfmaddph256_round_mask3:
15332 case X86::BI__builtin_ia32_vfmaddps256_round_mask:
15333 case X86::BI__builtin_ia32_vfmaddps256_round_maskz:
15334 case X86::BI__builtin_ia32_vfmaddps256_round_mask3:
15335 case X86::BI__builtin_ia32_vfmsubps256_round_mask3:
15336 case X86::BI__builtin_ia32_vfmaddpd256_round_mask:
15337 case X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
15338 case X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
15339 case X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
15340 case X86::BI__builtin_ia32_vfmsubph256_round_mask3:
15342 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
15343 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
15344 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
15345 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
15346 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
15347 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
15348 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
15349 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
15350 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
15351 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
15352 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
15353 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
15354 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
15355 case X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
15356 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
15357 case X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
15358 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
15359 case X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
15360 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
15361 case X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
15362 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
15363 case X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
15364 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
15365 case X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
15368 case X86::BI__builtin_ia32_movdqa32store128_mask:
15369 case X86::BI__builtin_ia32_movdqa64store128_mask:
15370 case X86::BI__builtin_ia32_storeaps128_mask:
15371 case X86::BI__builtin_ia32_storeapd128_mask:
15372 case X86::BI__builtin_ia32_movdqa32store256_mask:
15373 case X86::BI__builtin_ia32_movdqa64store256_mask:
15374 case X86::BI__builtin_ia32_storeaps256_mask:
15375 case X86::BI__builtin_ia32_storeapd256_mask:
15376 case X86::BI__builtin_ia32_movdqa32store512_mask:
15377 case X86::BI__builtin_ia32_movdqa64store512_mask:
15378 case X86::BI__builtin_ia32_storeaps512_mask:
15379 case X86::BI__builtin_ia32_storeapd512_mask:
15384 case X86::BI__builtin_ia32_loadups128_mask:
15385 case X86::BI__builtin_ia32_loadups256_mask:
15386 case X86::BI__builtin_ia32_loadups512_mask:
15387 case X86::BI__builtin_ia32_loadupd128_mask:
15388 case X86::BI__builtin_ia32_loadupd256_mask:
15389 case X86::BI__builtin_ia32_loadupd512_mask:
15390 case X86::BI__builtin_ia32_loaddquqi128_mask:
15391 case X86::BI__builtin_ia32_loaddquqi256_mask:
15392 case X86::BI__builtin_ia32_loaddquqi512_mask:
15393 case X86::BI__builtin_ia32_loaddquhi128_mask:
15394 case X86::BI__builtin_ia32_loaddquhi256_mask:
15395 case X86::BI__builtin_ia32_loaddquhi512_mask:
15396 case X86::BI__builtin_ia32_loaddqusi128_mask:
15397 case X86::BI__builtin_ia32_loaddqusi256_mask:
15398 case X86::BI__builtin_ia32_loaddqusi512_mask:
15399 case X86::BI__builtin_ia32_loaddqudi128_mask:
15400 case X86::BI__builtin_ia32_loaddqudi256_mask:
15401 case X86::BI__builtin_ia32_loaddqudi512_mask:
15404 case X86::BI__builtin_ia32_loadsbf16128_mask:
15405 case X86::BI__builtin_ia32_loadsh128_mask:
15406 case X86::BI__builtin_ia32_loadss128_mask:
15407 case X86::BI__builtin_ia32_loadsd128_mask:
15410 case X86::BI__builtin_ia32_loadaps128_mask:
15411 case X86::BI__builtin_ia32_loadaps256_mask:
15412 case X86::BI__builtin_ia32_loadaps512_mask:
15413 case X86::BI__builtin_ia32_loadapd128_mask:
15414 case X86::BI__builtin_ia32_loadapd256_mask:
15415 case X86::BI__builtin_ia32_loadapd512_mask:
15416 case X86::BI__builtin_ia32_movdqa32load128_mask:
15417 case X86::BI__builtin_ia32_movdqa32load256_mask:
15418 case X86::BI__builtin_ia32_movdqa32load512_mask:
15419 case X86::BI__builtin_ia32_movdqa64load128_mask:
15420 case X86::BI__builtin_ia32_movdqa64load256_mask:
15421 case X86::BI__builtin_ia32_movdqa64load512_mask:
15426 case X86::BI__builtin_ia32_expandloaddf128_mask:
15427 case X86::BI__builtin_ia32_expandloaddf256_mask:
15428 case X86::BI__builtin_ia32_expandloaddf512_mask:
15429 case X86::BI__builtin_ia32_expandloadsf128_mask:
15430 case X86::BI__builtin_ia32_expandloadsf256_mask:
15431 case X86::BI__builtin_ia32_expandloadsf512_mask:
15432 case X86::BI__builtin_ia32_expandloaddi128_mask:
15433 case X86::BI__builtin_ia32_expandloaddi256_mask:
15434 case X86::BI__builtin_ia32_expandloaddi512_mask:
15435 case X86::BI__builtin_ia32_expandloadsi128_mask:
15436 case X86::BI__builtin_ia32_expandloadsi256_mask:
15437 case X86::BI__builtin_ia32_expandloadsi512_mask:
15438 case X86::BI__builtin_ia32_expandloadhi128_mask:
15439 case X86::BI__builtin_ia32_expandloadhi256_mask:
15440 case X86::BI__builtin_ia32_expandloadhi512_mask:
15441 case X86::BI__builtin_ia32_expandloadqi128_mask:
15442 case X86::BI__builtin_ia32_expandloadqi256_mask:
15443 case X86::BI__builtin_ia32_expandloadqi512_mask:
15446 case X86::BI__builtin_ia32_compressstoredf128_mask:
15447 case X86::BI__builtin_ia32_compressstoredf256_mask:
15448 case X86::BI__builtin_ia32_compressstoredf512_mask:
15449 case X86::BI__builtin_ia32_compressstoresf128_mask:
15450 case X86::BI__builtin_ia32_compressstoresf256_mask:
15451 case X86::BI__builtin_ia32_compressstoresf512_mask:
15452 case X86::BI__builtin_ia32_compressstoredi128_mask:
15453 case X86::BI__builtin_ia32_compressstoredi256_mask:
15454 case X86::BI__builtin_ia32_compressstoredi512_mask:
15455 case X86::BI__builtin_ia32_compressstoresi128_mask:
15456 case X86::BI__builtin_ia32_compressstoresi256_mask:
15457 case X86::BI__builtin_ia32_compressstoresi512_mask:
15458 case X86::BI__builtin_ia32_compressstorehi128_mask:
15459 case X86::BI__builtin_ia32_compressstorehi256_mask:
15460 case X86::BI__builtin_ia32_compressstorehi512_mask:
15461 case X86::BI__builtin_ia32_compressstoreqi128_mask:
15462 case X86::BI__builtin_ia32_compressstoreqi256_mask:
15463 case X86::BI__builtin_ia32_compressstoreqi512_mask:
15466 case X86::BI__builtin_ia32_expanddf128_mask:
15467 case X86::BI__builtin_ia32_expanddf256_mask:
15468 case X86::BI__builtin_ia32_expanddf512_mask:
15469 case X86::BI__builtin_ia32_expandsf128_mask:
15470 case X86::BI__builtin_ia32_expandsf256_mask:
15471 case X86::BI__builtin_ia32_expandsf512_mask:
15472 case X86::BI__builtin_ia32_expanddi128_mask:
15473 case X86::BI__builtin_ia32_expanddi256_mask:
15474 case X86::BI__builtin_ia32_expanddi512_mask:
15475 case X86::BI__builtin_ia32_expandsi128_mask:
15476 case X86::BI__builtin_ia32_expandsi256_mask:
15477 case X86::BI__builtin_ia32_expandsi512_mask:
15478 case X86::BI__builtin_ia32_expandhi128_mask:
15479 case X86::BI__builtin_ia32_expandhi256_mask:
15480 case X86::BI__builtin_ia32_expandhi512_mask:
15481 case X86::BI__builtin_ia32_expandqi128_mask:
15482 case X86::BI__builtin_ia32_expandqi256_mask:
15483 case X86::BI__builtin_ia32_expandqi512_mask:
15486 case X86::BI__builtin_ia32_compressdf128_mask:
15487 case X86::BI__builtin_ia32_compressdf256_mask:
15488 case X86::BI__builtin_ia32_compressdf512_mask:
15489 case X86::BI__builtin_ia32_compresssf128_mask:
15490 case X86::BI__builtin_ia32_compresssf256_mask:
15491 case X86::BI__builtin_ia32_compresssf512_mask:
15492 case X86::BI__builtin_ia32_compressdi128_mask:
15493 case X86::BI__builtin_ia32_compressdi256_mask:
15494 case X86::BI__builtin_ia32_compressdi512_mask:
15495 case X86::BI__builtin_ia32_compresssi128_mask:
15496 case X86::BI__builtin_ia32_compresssi256_mask:
15497 case X86::BI__builtin_ia32_compresssi512_mask:
15498 case X86::BI__builtin_ia32_compresshi128_mask:
15499 case X86::BI__builtin_ia32_compresshi256_mask:
15500 case X86::BI__builtin_ia32_compresshi512_mask:
15501 case X86::BI__builtin_ia32_compressqi128_mask:
15502 case X86::BI__builtin_ia32_compressqi256_mask:
15503 case X86::BI__builtin_ia32_compressqi512_mask:
15506 case X86::BI__builtin_ia32_gather3div2df:
15507 case X86::BI__builtin_ia32_gather3div2di:
15508 case X86::BI__builtin_ia32_gather3div4df:
15509 case X86::BI__builtin_ia32_gather3div4di:
15510 case X86::BI__builtin_ia32_gather3div4sf:
15511 case X86::BI__builtin_ia32_gather3div4si:
15512 case X86::BI__builtin_ia32_gather3div8sf:
15513 case X86::BI__builtin_ia32_gather3div8si:
15514 case X86::BI__builtin_ia32_gather3siv2df:
15515 case X86::BI__builtin_ia32_gather3siv2di:
15516 case X86::BI__builtin_ia32_gather3siv4df:
15517 case X86::BI__builtin_ia32_gather3siv4di:
15518 case X86::BI__builtin_ia32_gather3siv4sf:
15519 case X86::BI__builtin_ia32_gather3siv4si:
15520 case X86::BI__builtin_ia32_gather3siv8sf:
15521 case X86::BI__builtin_ia32_gather3siv8si:
15522 case X86::BI__builtin_ia32_gathersiv8df:
15523 case X86::BI__builtin_ia32_gathersiv16sf:
15524 case X86::BI__builtin_ia32_gatherdiv8df:
15525 case X86::BI__builtin_ia32_gatherdiv16sf:
15526 case X86::BI__builtin_ia32_gathersiv8di:
15527 case X86::BI__builtin_ia32_gathersiv16si:
15528 case X86::BI__builtin_ia32_gatherdiv8di:
15529 case X86::BI__builtin_ia32_gatherdiv16si: {
15531 switch (BuiltinID) {
15532 default: llvm_unreachable(
"Unexpected builtin");
15533 case X86::BI__builtin_ia32_gather3div2df:
15534 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
15536 case X86::BI__builtin_ia32_gather3div2di:
15537 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
15539 case X86::BI__builtin_ia32_gather3div4df:
15540 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
15542 case X86::BI__builtin_ia32_gather3div4di:
15543 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
15545 case X86::BI__builtin_ia32_gather3div4sf:
15546 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
15548 case X86::BI__builtin_ia32_gather3div4si:
15549 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
15551 case X86::BI__builtin_ia32_gather3div8sf:
15552 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
15554 case X86::BI__builtin_ia32_gather3div8si:
15555 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
15557 case X86::BI__builtin_ia32_gather3siv2df:
15558 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
15560 case X86::BI__builtin_ia32_gather3siv2di:
15561 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
15563 case X86::BI__builtin_ia32_gather3siv4df:
15564 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
15566 case X86::BI__builtin_ia32_gather3siv4di:
15567 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
15569 case X86::BI__builtin_ia32_gather3siv4sf:
15570 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
15572 case X86::BI__builtin_ia32_gather3siv4si:
15573 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
15575 case X86::BI__builtin_ia32_gather3siv8sf:
15576 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
15578 case X86::BI__builtin_ia32_gather3siv8si:
15579 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
15581 case X86::BI__builtin_ia32_gathersiv8df:
15582 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
15584 case X86::BI__builtin_ia32_gathersiv16sf:
15585 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
15587 case X86::BI__builtin_ia32_gatherdiv8df:
15588 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
15590 case X86::BI__builtin_ia32_gatherdiv16sf:
15591 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
15593 case X86::BI__builtin_ia32_gathersiv8di:
15594 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
15596 case X86::BI__builtin_ia32_gathersiv16si:
15597 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
15599 case X86::BI__builtin_ia32_gatherdiv8di:
15600 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
15602 case X86::BI__builtin_ia32_gatherdiv16si:
15603 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
15607 unsigned MinElts = std::min(
15608 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
15609 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
15612 return Builder.CreateCall(Intr, Ops);
15615 case X86::BI__builtin_ia32_scattersiv8df:
15616 case X86::BI__builtin_ia32_scattersiv16sf:
15617 case X86::BI__builtin_ia32_scatterdiv8df:
15618 case X86::BI__builtin_ia32_scatterdiv16sf:
15619 case X86::BI__builtin_ia32_scattersiv8di:
15620 case X86::BI__builtin_ia32_scattersiv16si:
15621 case X86::BI__builtin_ia32_scatterdiv8di:
15622 case X86::BI__builtin_ia32_scatterdiv16si:
15623 case X86::BI__builtin_ia32_scatterdiv2df:
15624 case X86::BI__builtin_ia32_scatterdiv2di:
15625 case X86::BI__builtin_ia32_scatterdiv4df:
15626 case X86::BI__builtin_ia32_scatterdiv4di:
15627 case X86::BI__builtin_ia32_scatterdiv4sf:
15628 case X86::BI__builtin_ia32_scatterdiv4si:
15629 case X86::BI__builtin_ia32_scatterdiv8sf:
15630 case X86::BI__builtin_ia32_scatterdiv8si:
15631 case X86::BI__builtin_ia32_scattersiv2df:
15632 case X86::BI__builtin_ia32_scattersiv2di:
15633 case X86::BI__builtin_ia32_scattersiv4df:
15634 case X86::BI__builtin_ia32_scattersiv4di:
15635 case X86::BI__builtin_ia32_scattersiv4sf:
15636 case X86::BI__builtin_ia32_scattersiv4si:
15637 case X86::BI__builtin_ia32_scattersiv8sf:
15638 case X86::BI__builtin_ia32_scattersiv8si: {
15640 switch (BuiltinID) {
15641 default: llvm_unreachable(
"Unexpected builtin");
15642 case X86::BI__builtin_ia32_scattersiv8df:
15643 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
15645 case X86::BI__builtin_ia32_scattersiv16sf:
15646 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
15648 case X86::BI__builtin_ia32_scatterdiv8df:
15649 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
15651 case X86::BI__builtin_ia32_scatterdiv16sf:
15652 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
15654 case X86::BI__builtin_ia32_scattersiv8di:
15655 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
15657 case X86::BI__builtin_ia32_scattersiv16si:
15658 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
15660 case X86::BI__builtin_ia32_scatterdiv8di:
15661 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
15663 case X86::BI__builtin_ia32_scatterdiv16si:
15664 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
15666 case X86::BI__builtin_ia32_scatterdiv2df:
15667 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
15669 case X86::BI__builtin_ia32_scatterdiv2di:
15670 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
15672 case X86::BI__builtin_ia32_scatterdiv4df:
15673 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
15675 case X86::BI__builtin_ia32_scatterdiv4di:
15676 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
15678 case X86::BI__builtin_ia32_scatterdiv4sf:
15679 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
15681 case X86::BI__builtin_ia32_scatterdiv4si:
15682 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
15684 case X86::BI__builtin_ia32_scatterdiv8sf:
15685 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
15687 case X86::BI__builtin_ia32_scatterdiv8si:
15688 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
15690 case X86::BI__builtin_ia32_scattersiv2df:
15691 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
15693 case X86::BI__builtin_ia32_scattersiv2di:
15694 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
15696 case X86::BI__builtin_ia32_scattersiv4df:
15697 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
15699 case X86::BI__builtin_ia32_scattersiv4di:
15700 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
15702 case X86::BI__builtin_ia32_scattersiv4sf:
15703 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
15705 case X86::BI__builtin_ia32_scattersiv4si:
15706 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
15708 case X86::BI__builtin_ia32_scattersiv8sf:
15709 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
15711 case X86::BI__builtin_ia32_scattersiv8si:
15712 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
15716 unsigned MinElts = std::min(
15717 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
15718 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
15721 return Builder.CreateCall(Intr, Ops);
15724 case X86::BI__builtin_ia32_vextractf128_pd256:
15725 case X86::BI__builtin_ia32_vextractf128_ps256:
15726 case X86::BI__builtin_ia32_vextractf128_si256:
15727 case X86::BI__builtin_ia32_extract128i256:
15728 case X86::BI__builtin_ia32_extractf64x4_mask:
15729 case X86::BI__builtin_ia32_extractf32x4_mask:
15730 case X86::BI__builtin_ia32_extracti64x4_mask:
15731 case X86::BI__builtin_ia32_extracti32x4_mask:
15732 case X86::BI__builtin_ia32_extractf32x8_mask:
15733 case X86::BI__builtin_ia32_extracti32x8_mask:
15734 case X86::BI__builtin_ia32_extractf32x4_256_mask:
15735 case X86::BI__builtin_ia32_extracti32x4_256_mask:
15736 case X86::BI__builtin_ia32_extractf64x2_256_mask:
15737 case X86::BI__builtin_ia32_extracti64x2_256_mask:
15738 case X86::BI__builtin_ia32_extractf64x2_512_mask:
15739 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
15741 unsigned NumElts = DstTy->getNumElements();
15742 unsigned SrcNumElts =
15743 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15744 unsigned SubVectors = SrcNumElts / NumElts;
15745 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15746 assert(llvm::isPowerOf2_32(SubVectors) &&
"Expected power of 2 subvectors");
15747 Index &= SubVectors - 1;
15751 for (
unsigned i = 0; i != NumElts; ++i)
15752 Indices[i] = i + Index;
15757 if (Ops.size() == 4)
15762 case X86::BI__builtin_ia32_vinsertf128_pd256:
15763 case X86::BI__builtin_ia32_vinsertf128_ps256:
15764 case X86::BI__builtin_ia32_vinsertf128_si256:
15765 case X86::BI__builtin_ia32_insert128i256:
15766 case X86::BI__builtin_ia32_insertf64x4:
15767 case X86::BI__builtin_ia32_insertf32x4:
15768 case X86::BI__builtin_ia32_inserti64x4:
15769 case X86::BI__builtin_ia32_inserti32x4:
15770 case X86::BI__builtin_ia32_insertf32x8:
15771 case X86::BI__builtin_ia32_inserti32x8:
15772 case X86::BI__builtin_ia32_insertf32x4_256:
15773 case X86::BI__builtin_ia32_inserti32x4_256:
15774 case X86::BI__builtin_ia32_insertf64x2_256:
15775 case X86::BI__builtin_ia32_inserti64x2_256:
15776 case X86::BI__builtin_ia32_insertf64x2_512:
15777 case X86::BI__builtin_ia32_inserti64x2_512: {
15778 unsigned DstNumElts =
15779 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15780 unsigned SrcNumElts =
15781 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
15782 unsigned SubVectors = DstNumElts / SrcNumElts;
15783 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15784 assert(llvm::isPowerOf2_32(SubVectors) &&
"Expected power of 2 subvectors");
15785 Index &= SubVectors - 1;
15786 Index *= SrcNumElts;
15789 for (
unsigned i = 0; i != DstNumElts; ++i)
15790 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
15793 Ops[1],
ArrayRef(Indices, DstNumElts),
"widen");
15795 for (
unsigned i = 0; i != DstNumElts; ++i) {
15796 if (i >= Index && i < (Index + SrcNumElts))
15797 Indices[i] = (i - Index) + DstNumElts;
15802 return Builder.CreateShuffleVector(Ops[0], Op1,
15803 ArrayRef(Indices, DstNumElts),
"insert");
15805 case X86::BI__builtin_ia32_pmovqd512_mask:
15806 case X86::BI__builtin_ia32_pmovwb512_mask: {
15807 Value *Res =
Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15810 case X86::BI__builtin_ia32_pmovdb512_mask:
15811 case X86::BI__builtin_ia32_pmovdw512_mask:
15812 case X86::BI__builtin_ia32_pmovqw512_mask: {
15813 if (
const auto *
C = dyn_cast<Constant>(Ops[2]))
15814 if (
C->isAllOnesValue())
15815 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15818 switch (BuiltinID) {
15819 default: llvm_unreachable(
"Unsupported intrinsic!");
15820 case X86::BI__builtin_ia32_pmovdb512_mask:
15821 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
15823 case X86::BI__builtin_ia32_pmovdw512_mask:
15824 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
15826 case X86::BI__builtin_ia32_pmovqw512_mask:
15827 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
15832 return Builder.CreateCall(Intr, Ops);
15834 case X86::BI__builtin_ia32_pblendw128:
15835 case X86::BI__builtin_ia32_blendpd:
15836 case X86::BI__builtin_ia32_blendps:
15837 case X86::BI__builtin_ia32_blendpd256:
15838 case X86::BI__builtin_ia32_blendps256:
15839 case X86::BI__builtin_ia32_pblendw256:
15840 case X86::BI__builtin_ia32_pblendd128:
15841 case X86::BI__builtin_ia32_pblendd256: {
15843 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15844 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15849 for (
unsigned i = 0; i != NumElts; ++i)
15850 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
15852 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15853 ArrayRef(Indices, NumElts),
"blend");
15855 case X86::BI__builtin_ia32_pshuflw:
15856 case X86::BI__builtin_ia32_pshuflw256:
15857 case X86::BI__builtin_ia32_pshuflw512: {
15858 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15859 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15860 unsigned NumElts = Ty->getNumElements();
15863 Imm = (Imm & 0xff) * 0x01010101;
15866 for (
unsigned l = 0; l != NumElts; l += 8) {
15867 for (
unsigned i = 0; i != 4; ++i) {
15868 Indices[l + i] = l + (Imm & 3);
15871 for (
unsigned i = 4; i != 8; ++i)
15872 Indices[l + i] = l + i;
15875 return Builder.CreateShuffleVector(Ops[0],
ArrayRef(Indices, NumElts),
15878 case X86::BI__builtin_ia32_pshufhw:
15879 case X86::BI__builtin_ia32_pshufhw256:
15880 case X86::BI__builtin_ia32_pshufhw512: {
15881 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15882 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15883 unsigned NumElts = Ty->getNumElements();
15886 Imm = (Imm & 0xff) * 0x01010101;
15889 for (
unsigned l = 0; l != NumElts; l += 8) {
15890 for (
unsigned i = 0; i != 4; ++i)
15891 Indices[l + i] = l + i;
15892 for (
unsigned i = 4; i != 8; ++i) {
15893 Indices[l + i] = l + 4 + (Imm & 3);
15898 return Builder.CreateShuffleVector(Ops[0],
ArrayRef(Indices, NumElts),
15901 case X86::BI__builtin_ia32_pshufd:
15902 case X86::BI__builtin_ia32_pshufd256:
15903 case X86::BI__builtin_ia32_pshufd512:
15904 case X86::BI__builtin_ia32_vpermilpd:
15905 case X86::BI__builtin_ia32_vpermilps:
15906 case X86::BI__builtin_ia32_vpermilpd256:
15907 case X86::BI__builtin_ia32_vpermilps256:
15908 case X86::BI__builtin_ia32_vpermilpd512:
15909 case X86::BI__builtin_ia32_vpermilps512: {
15910 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15911 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15912 unsigned NumElts = Ty->getNumElements();
15913 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15914 unsigned NumLaneElts = NumElts / NumLanes;
15917 Imm = (Imm & 0xff) * 0x01010101;
15920 for (
unsigned l = 0; l != NumElts; l += NumLaneElts) {
15921 for (
unsigned i = 0; i != NumLaneElts; ++i) {
15922 Indices[i + l] = (Imm % NumLaneElts) + l;
15923 Imm /= NumLaneElts;
15927 return Builder.CreateShuffleVector(Ops[0],
ArrayRef(Indices, NumElts),
15930 case X86::BI__builtin_ia32_shufpd:
15931 case X86::BI__builtin_ia32_shufpd256:
15932 case X86::BI__builtin_ia32_shufpd512:
15933 case X86::BI__builtin_ia32_shufps:
15934 case X86::BI__builtin_ia32_shufps256:
15935 case X86::BI__builtin_ia32_shufps512: {
15936 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15937 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15938 unsigned NumElts = Ty->getNumElements();
15939 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15940 unsigned NumLaneElts = NumElts / NumLanes;
15943 Imm = (Imm & 0xff) * 0x01010101;
15946 for (
unsigned l = 0; l != NumElts; l += NumLaneElts) {
15947 for (
unsigned i = 0; i != NumLaneElts; ++i) {
15948 unsigned Index = Imm % NumLaneElts;
15949 Imm /= NumLaneElts;
15950 if (i >= (NumLaneElts / 2))
15952 Indices[l + i] = l + Index;
15956 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15957 ArrayRef(Indices, NumElts),
"shufp");
15959 case X86::BI__builtin_ia32_permdi256:
15960 case X86::BI__builtin_ia32_permdf256:
15961 case X86::BI__builtin_ia32_permdi512:
15962 case X86::BI__builtin_ia32_permdf512: {
15963 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15964 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15965 unsigned NumElts = Ty->getNumElements();
15969 for (
unsigned l = 0; l != NumElts; l += 4)
15970 for (
unsigned i = 0; i != 4; ++i)
15971 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
15973 return Builder.CreateShuffleVector(Ops[0],
ArrayRef(Indices, NumElts),
15976 case X86::BI__builtin_ia32_palignr128:
15977 case X86::BI__builtin_ia32_palignr256:
15978 case X86::BI__builtin_ia32_palignr512: {
15979 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15982 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15983 assert(NumElts % 16 == 0);
15987 if (ShiftVal >= 32)
15992 if (ShiftVal > 16) {
15995 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
16000 for (
unsigned l = 0; l != NumElts; l += 16) {
16001 for (
unsigned i = 0; i != 16; ++i) {
16002 unsigned Idx = ShiftVal + i;
16004 Idx += NumElts - 16;
16005 Indices[l + i] = Idx + l;
16009 return Builder.CreateShuffleVector(Ops[1], Ops[0],
16010 ArrayRef(Indices, NumElts),
"palignr");
16012 case X86::BI__builtin_ia32_alignd128:
16013 case X86::BI__builtin_ia32_alignd256:
16014 case X86::BI__builtin_ia32_alignd512:
16015 case X86::BI__builtin_ia32_alignq128:
16016 case X86::BI__builtin_ia32_alignq256:
16017 case X86::BI__builtin_ia32_alignq512: {
16019 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16020 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
16023 ShiftVal &= NumElts - 1;
16026 for (
unsigned i = 0; i != NumElts; ++i)
16027 Indices[i] = i + ShiftVal;
16029 return Builder.CreateShuffleVector(Ops[1], Ops[0],
16030 ArrayRef(Indices, NumElts),
"valign");
16032 case X86::BI__builtin_ia32_shuf_f32x4_256:
16033 case X86::BI__builtin_ia32_shuf_f64x2_256:
16034 case X86::BI__builtin_ia32_shuf_i32x4_256:
16035 case X86::BI__builtin_ia32_shuf_i64x2_256:
16036 case X86::BI__builtin_ia32_shuf_f32x4:
16037 case X86::BI__builtin_ia32_shuf_f64x2:
16038 case X86::BI__builtin_ia32_shuf_i32x4:
16039 case X86::BI__builtin_ia32_shuf_i64x2: {
16040 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16041 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16042 unsigned NumElts = Ty->getNumElements();
16043 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
16044 unsigned NumLaneElts = NumElts / NumLanes;
16047 for (
unsigned l = 0; l != NumElts; l += NumLaneElts) {
16048 unsigned Index = (Imm % NumLanes) * NumLaneElts;
16050 if (l >= (NumElts / 2))
16052 for (
unsigned i = 0; i != NumLaneElts; ++i) {
16053 Indices[l + i] = Index + i;
16057 return Builder.CreateShuffleVector(Ops[0], Ops[1],
16058 ArrayRef(Indices, NumElts),
"shuf");
16061 case X86::BI__builtin_ia32_vperm2f128_pd256:
16062 case X86::BI__builtin_ia32_vperm2f128_ps256:
16063 case X86::BI__builtin_ia32_vperm2f128_si256:
16064 case X86::BI__builtin_ia32_permti256: {
16065 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16067 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16076 for (
unsigned l = 0; l != 2; ++l) {
16078 if (Imm & (1 << ((l * 4) + 3)))
16079 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
16080 else if (Imm & (1 << ((l * 4) + 1)))
16081 OutOps[l] = Ops[1];
16083 OutOps[l] = Ops[0];
16085 for (
unsigned i = 0; i != NumElts/2; ++i) {
16087 unsigned Idx = (l * NumElts) + i;
16090 if (Imm & (1 << (l * 4)))
16092 Indices[(l * (NumElts/2)) + i] = Idx;
16096 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
16097 ArrayRef(Indices, NumElts),
"vperm");
16100 case X86::BI__builtin_ia32_pslldqi128_byteshift:
16101 case X86::BI__builtin_ia32_pslldqi256_byteshift:
16102 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
16103 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16104 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
16106 unsigned NumElts = ResultType->getNumElements() * 8;
16109 if (ShiftVal >= 16)
16110 return llvm::Constant::getNullValue(ResultType);
16114 for (
unsigned l = 0; l != NumElts; l += 16) {
16115 for (
unsigned i = 0; i != 16; ++i) {
16116 unsigned Idx = NumElts + i - ShiftVal;
16117 if (Idx < NumElts) Idx -= NumElts - 16;
16118 Indices[l + i] = Idx + l;
16122 auto *VecTy = llvm::FixedVectorType::get(
Int8Ty, NumElts);
16124 Value *
Zero = llvm::Constant::getNullValue(VecTy);
16126 Zero, Cast,
ArrayRef(Indices, NumElts),
"pslldq");
16127 return Builder.CreateBitCast(SV, Ops[0]->getType(),
"cast");
16129 case X86::BI__builtin_ia32_psrldqi128_byteshift:
16130 case X86::BI__builtin_ia32_psrldqi256_byteshift:
16131 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
16132 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16133 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
16135 unsigned NumElts = ResultType->getNumElements() * 8;
16138 if (ShiftVal >= 16)
16139 return llvm::Constant::getNullValue(ResultType);
16143 for (
unsigned l = 0; l != NumElts; l += 16) {
16144 for (
unsigned i = 0; i != 16; ++i) {
16145 unsigned Idx = i + ShiftVal;
16146 if (Idx >= 16) Idx += NumElts - 16;
16147 Indices[l + i] = Idx + l;
16151 auto *VecTy = llvm::FixedVectorType::get(
Int8Ty, NumElts);
16153 Value *
Zero = llvm::Constant::getNullValue(VecTy);
16155 Cast, Zero,
ArrayRef(Indices, NumElts),
"psrldq");
16156 return Builder.CreateBitCast(SV, ResultType,
"cast");
16158 case X86::BI__builtin_ia32_kshiftliqi:
16159 case X86::BI__builtin_ia32_kshiftlihi:
16160 case X86::BI__builtin_ia32_kshiftlisi:
16161 case X86::BI__builtin_ia32_kshiftlidi: {
16162 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16163 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16165 if (ShiftVal >= NumElts)
16166 return llvm::Constant::getNullValue(Ops[0]->getType());
16171 for (
unsigned i = 0; i != NumElts; ++i)
16172 Indices[i] = NumElts + i - ShiftVal;
16174 Value *
Zero = llvm::Constant::getNullValue(
In->getType());
16176 Zero, In,
ArrayRef(Indices, NumElts),
"kshiftl");
16177 return Builder.CreateBitCast(SV, Ops[0]->getType());
16179 case X86::BI__builtin_ia32_kshiftriqi:
16180 case X86::BI__builtin_ia32_kshiftrihi:
16181 case X86::BI__builtin_ia32_kshiftrisi:
16182 case X86::BI__builtin_ia32_kshiftridi: {
16183 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16184 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16186 if (ShiftVal >= NumElts)
16187 return llvm::Constant::getNullValue(Ops[0]->getType());
16192 for (
unsigned i = 0; i != NumElts; ++i)
16193 Indices[i] = i + ShiftVal;
16195 Value *
Zero = llvm::Constant::getNullValue(
In->getType());
16197 In, Zero,
ArrayRef(Indices, NumElts),
"kshiftr");
16198 return Builder.CreateBitCast(SV, Ops[0]->getType());
16200 case X86::BI__builtin_ia32_movnti:
16201 case X86::BI__builtin_ia32_movnti64:
16202 case X86::BI__builtin_ia32_movntsd:
16203 case X86::BI__builtin_ia32_movntss: {
16204 llvm::MDNode *
Node = llvm::MDNode::get(
16207 Value *Ptr = Ops[0];
16208 Value *Src = Ops[1];
16211 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
16212 BuiltinID == X86::BI__builtin_ia32_movntss)
16213 Src =
Builder.CreateExtractElement(Src, (uint64_t)0,
"extract");
16217 SI->setMetadata(llvm::LLVMContext::MD_nontemporal,
Node);
16218 SI->setAlignment(llvm::Align(1));
16222 case X86::BI__builtin_ia32_vprotb:
16223 case X86::BI__builtin_ia32_vprotw:
16224 case X86::BI__builtin_ia32_vprotd:
16225 case X86::BI__builtin_ia32_vprotq:
16226 case X86::BI__builtin_ia32_vprotbi:
16227 case X86::BI__builtin_ia32_vprotwi:
16228 case X86::BI__builtin_ia32_vprotdi:
16229 case X86::BI__builtin_ia32_vprotqi:
16230 case X86::BI__builtin_ia32_prold128:
16231 case X86::BI__builtin_ia32_prold256:
16232 case X86::BI__builtin_ia32_prold512:
16233 case X86::BI__builtin_ia32_prolq128:
16234 case X86::BI__builtin_ia32_prolq256:
16235 case X86::BI__builtin_ia32_prolq512:
16236 case X86::BI__builtin_ia32_prolvd128:
16237 case X86::BI__builtin_ia32_prolvd256:
16238 case X86::BI__builtin_ia32_prolvd512:
16239 case X86::BI__builtin_ia32_prolvq128:
16240 case X86::BI__builtin_ia32_prolvq256:
16241 case X86::BI__builtin_ia32_prolvq512:
16243 case X86::BI__builtin_ia32_prord128:
16244 case X86::BI__builtin_ia32_prord256:
16245 case X86::BI__builtin_ia32_prord512:
16246 case X86::BI__builtin_ia32_prorq128:
16247 case X86::BI__builtin_ia32_prorq256:
16248 case X86::BI__builtin_ia32_prorq512:
16249 case X86::BI__builtin_ia32_prorvd128:
16250 case X86::BI__builtin_ia32_prorvd256:
16251 case X86::BI__builtin_ia32_prorvd512:
16252 case X86::BI__builtin_ia32_prorvq128:
16253 case X86::BI__builtin_ia32_prorvq256:
16254 case X86::BI__builtin_ia32_prorvq512:
16256 case X86::BI__builtin_ia32_selectb_128:
16257 case X86::BI__builtin_ia32_selectb_256:
16258 case X86::BI__builtin_ia32_selectb_512:
16259 case X86::BI__builtin_ia32_selectw_128:
16260 case X86::BI__builtin_ia32_selectw_256:
16261 case X86::BI__builtin_ia32_selectw_512:
16262 case X86::BI__builtin_ia32_selectd_128:
16263 case X86::BI__builtin_ia32_selectd_256:
16264 case X86::BI__builtin_ia32_selectd_512:
16265 case X86::BI__builtin_ia32_selectq_128:
16266 case X86::BI__builtin_ia32_selectq_256:
16267 case X86::BI__builtin_ia32_selectq_512:
16268 case X86::BI__builtin_ia32_selectph_128:
16269 case X86::BI__builtin_ia32_selectph_256:
16270 case X86::BI__builtin_ia32_selectph_512:
16271 case X86::BI__builtin_ia32_selectpbf_128:
16272 case X86::BI__builtin_ia32_selectpbf_256:
16273 case X86::BI__builtin_ia32_selectpbf_512:
16274 case X86::BI__builtin_ia32_selectps_128:
16275 case X86::BI__builtin_ia32_selectps_256:
16276 case X86::BI__builtin_ia32_selectps_512:
16277 case X86::BI__builtin_ia32_selectpd_128:
16278 case X86::BI__builtin_ia32_selectpd_256:
16279 case X86::BI__builtin_ia32_selectpd_512:
16281 case X86::BI__builtin_ia32_selectsh_128:
16282 case X86::BI__builtin_ia32_selectsbf_128:
16283 case X86::BI__builtin_ia32_selectss_128:
16284 case X86::BI__builtin_ia32_selectsd_128: {
16285 Value *A =
Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16286 Value *B =
Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16288 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
16290 case X86::BI__builtin_ia32_cmpb128_mask:
16291 case X86::BI__builtin_ia32_cmpb256_mask:
16292 case X86::BI__builtin_ia32_cmpb512_mask:
16293 case X86::BI__builtin_ia32_cmpw128_mask:
16294 case X86::BI__builtin_ia32_cmpw256_mask:
16295 case X86::BI__builtin_ia32_cmpw512_mask:
16296 case X86::BI__builtin_ia32_cmpd128_mask:
16297 case X86::BI__builtin_ia32_cmpd256_mask:
16298 case X86::BI__builtin_ia32_cmpd512_mask:
16299 case X86::BI__builtin_ia32_cmpq128_mask:
16300 case X86::BI__builtin_ia32_cmpq256_mask:
16301 case X86::BI__builtin_ia32_cmpq512_mask: {
16302 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
16305 case X86::BI__builtin_ia32_ucmpb128_mask:
16306 case X86::BI__builtin_ia32_ucmpb256_mask:
16307 case X86::BI__builtin_ia32_ucmpb512_mask:
16308 case X86::BI__builtin_ia32_ucmpw128_mask:
16309 case X86::BI__builtin_ia32_ucmpw256_mask:
16310 case X86::BI__builtin_ia32_ucmpw512_mask:
16311 case X86::BI__builtin_ia32_ucmpd128_mask:
16312 case X86::BI__builtin_ia32_ucmpd256_mask:
16313 case X86::BI__builtin_ia32_ucmpd512_mask:
16314 case X86::BI__builtin_ia32_ucmpq128_mask:
16315 case X86::BI__builtin_ia32_ucmpq256_mask:
16316 case X86::BI__builtin_ia32_ucmpq512_mask: {
16317 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
16320 case X86::BI__builtin_ia32_vpcomb:
16321 case X86::BI__builtin_ia32_vpcomw:
16322 case X86::BI__builtin_ia32_vpcomd:
16323 case X86::BI__builtin_ia32_vpcomq:
16325 case X86::BI__builtin_ia32_vpcomub:
16326 case X86::BI__builtin_ia32_vpcomuw:
16327 case X86::BI__builtin_ia32_vpcomud:
16328 case X86::BI__builtin_ia32_vpcomuq:
16331 case X86::BI__builtin_ia32_kortestcqi:
16332 case X86::BI__builtin_ia32_kortestchi:
16333 case X86::BI__builtin_ia32_kortestcsi:
16334 case X86::BI__builtin_ia32_kortestcdi: {
16336 Value *
C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
16340 case X86::BI__builtin_ia32_kortestzqi:
16341 case X86::BI__builtin_ia32_kortestzhi:
16342 case X86::BI__builtin_ia32_kortestzsi:
16343 case X86::BI__builtin_ia32_kortestzdi: {
16345 Value *
C = llvm::Constant::getNullValue(Ops[0]->getType());
16350 case X86::BI__builtin_ia32_ktestcqi:
16351 case X86::BI__builtin_ia32_ktestzqi:
16352 case X86::BI__builtin_ia32_ktestchi:
16353 case X86::BI__builtin_ia32_ktestzhi:
16354 case X86::BI__builtin_ia32_ktestcsi:
16355 case X86::BI__builtin_ia32_ktestzsi:
16356 case X86::BI__builtin_ia32_ktestcdi:
16357 case X86::BI__builtin_ia32_ktestzdi: {
16359 switch (BuiltinID) {
16360 default: llvm_unreachable(
"Unsupported intrinsic!");
16361 case X86::BI__builtin_ia32_ktestcqi:
16362 IID = Intrinsic::x86_avx512_ktestc_b;
16364 case X86::BI__builtin_ia32_ktestzqi:
16365 IID = Intrinsic::x86_avx512_ktestz_b;
16367 case X86::BI__builtin_ia32_ktestchi:
16368 IID = Intrinsic::x86_avx512_ktestc_w;
16370 case X86::BI__builtin_ia32_ktestzhi:
16371 IID = Intrinsic::x86_avx512_ktestz_w;
16373 case X86::BI__builtin_ia32_ktestcsi:
16374 IID = Intrinsic::x86_avx512_ktestc_d;
16376 case X86::BI__builtin_ia32_ktestzsi:
16377 IID = Intrinsic::x86_avx512_ktestz_d;
16379 case X86::BI__builtin_ia32_ktestcdi:
16380 IID = Intrinsic::x86_avx512_ktestc_q;
16382 case X86::BI__builtin_ia32_ktestzdi:
16383 IID = Intrinsic::x86_avx512_ktestz_q;
16387 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16391 return Builder.CreateCall(Intr, {LHS, RHS});
16394 case X86::BI__builtin_ia32_kaddqi:
16395 case X86::BI__builtin_ia32_kaddhi:
16396 case X86::BI__builtin_ia32_kaddsi:
16397 case X86::BI__builtin_ia32_kadddi: {
16399 switch (BuiltinID) {
16400 default: llvm_unreachable(
"Unsupported intrinsic!");
16401 case X86::BI__builtin_ia32_kaddqi:
16402 IID = Intrinsic::x86_avx512_kadd_b;
16404 case X86::BI__builtin_ia32_kaddhi:
16405 IID = Intrinsic::x86_avx512_kadd_w;
16407 case X86::BI__builtin_ia32_kaddsi:
16408 IID = Intrinsic::x86_avx512_kadd_d;
16410 case X86::BI__builtin_ia32_kadddi:
16411 IID = Intrinsic::x86_avx512_kadd_q;
16415 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16420 return Builder.CreateBitCast(Res, Ops[0]->getType());
16422 case X86::BI__builtin_ia32_kandqi:
16423 case X86::BI__builtin_ia32_kandhi:
16424 case X86::BI__builtin_ia32_kandsi:
16425 case X86::BI__builtin_ia32_kanddi:
16427 case X86::BI__builtin_ia32_kandnqi:
16428 case X86::BI__builtin_ia32_kandnhi:
16429 case X86::BI__builtin_ia32_kandnsi:
16430 case X86::BI__builtin_ia32_kandndi:
16432 case X86::BI__builtin_ia32_korqi:
16433 case X86::BI__builtin_ia32_korhi:
16434 case X86::BI__builtin_ia32_korsi:
16435 case X86::BI__builtin_ia32_kordi:
16437 case X86::BI__builtin_ia32_kxnorqi:
16438 case X86::BI__builtin_ia32_kxnorhi:
16439 case X86::BI__builtin_ia32_kxnorsi:
16440 case X86::BI__builtin_ia32_kxnordi:
16442 case X86::BI__builtin_ia32_kxorqi:
16443 case X86::BI__builtin_ia32_kxorhi:
16444 case X86::BI__builtin_ia32_kxorsi:
16445 case X86::BI__builtin_ia32_kxordi:
16447 case X86::BI__builtin_ia32_knotqi:
16448 case X86::BI__builtin_ia32_knothi:
16449 case X86::BI__builtin_ia32_knotsi:
16450 case X86::BI__builtin_ia32_knotdi: {
16451 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16454 Ops[0]->getType());
16456 case X86::BI__builtin_ia32_kmovb:
16457 case X86::BI__builtin_ia32_kmovw:
16458 case X86::BI__builtin_ia32_kmovd:
16459 case X86::BI__builtin_ia32_kmovq: {
16463 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16465 return Builder.CreateBitCast(Res, Ops[0]->getType());
16468 case X86::BI__builtin_ia32_kunpckdi:
16469 case X86::BI__builtin_ia32_kunpcksi:
16470 case X86::BI__builtin_ia32_kunpckhi: {
16471 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16475 for (
unsigned i = 0; i != NumElts; ++i)
16480 LHS =
Builder.CreateShuffleVector(LHS, LHS,
ArrayRef(Indices, NumElts / 2));
16481 RHS =
Builder.CreateShuffleVector(RHS, RHS,
ArrayRef(Indices, NumElts / 2));
16486 return Builder.CreateBitCast(Res, Ops[0]->getType());
16489 case X86::BI__builtin_ia32_vplzcntd_128:
16490 case X86::BI__builtin_ia32_vplzcntd_256:
16491 case X86::BI__builtin_ia32_vplzcntd_512:
16492 case X86::BI__builtin_ia32_vplzcntq_128:
16493 case X86::BI__builtin_ia32_vplzcntq_256:
16494 case X86::BI__builtin_ia32_vplzcntq_512: {
16498 case X86::BI__builtin_ia32_sqrtss:
16499 case X86::BI__builtin_ia32_sqrtsd: {
16500 Value *A =
Builder.CreateExtractElement(Ops[0], (uint64_t)0);
16502 if (
Builder.getIsFPConstrained()) {
16503 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*
this,
E);
16506 A =
Builder.CreateConstrainedFPCall(F, {A});
16509 A =
Builder.CreateCall(F, {A});
16511 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16513 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16514 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16515 case X86::BI__builtin_ia32_sqrtss_round_mask: {
16516 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
16522 switch (BuiltinID) {
16524 llvm_unreachable(
"Unsupported intrinsic!");
16525 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16526 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
16528 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16529 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
16531 case X86::BI__builtin_ia32_sqrtss_round_mask:
16532 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
16537 Value *A =
Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16539 if (
Builder.getIsFPConstrained()) {
16540 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*
this,
E);
16543 A =
Builder.CreateConstrainedFPCall(F, A);
16546 A =
Builder.CreateCall(F, A);
16548 Value *Src =
Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16550 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16552 case X86::BI__builtin_ia32_sqrtpd256:
16553 case X86::BI__builtin_ia32_sqrtpd:
16554 case X86::BI__builtin_ia32_sqrtps256:
16555 case X86::BI__builtin_ia32_sqrtps:
16556 case X86::BI__builtin_ia32_sqrtph256:
16557 case X86::BI__builtin_ia32_sqrtph:
16558 case X86::BI__builtin_ia32_sqrtph512:
16559 case X86::BI__builtin_ia32_vsqrtnepbf16256:
16560 case X86::BI__builtin_ia32_vsqrtnepbf16:
16561 case X86::BI__builtin_ia32_vsqrtnepbf16512:
16562 case X86::BI__builtin_ia32_sqrtps512:
16563 case X86::BI__builtin_ia32_sqrtpd512: {
16564 if (Ops.size() == 2) {
16565 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16571 switch (BuiltinID) {
16573 llvm_unreachable(
"Unsupported intrinsic!");
16574 case X86::BI__builtin_ia32_sqrtph512:
16575 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
16577 case X86::BI__builtin_ia32_sqrtps512:
16578 IID = Intrinsic::x86_avx512_sqrt_ps_512;
16580 case X86::BI__builtin_ia32_sqrtpd512:
16581 IID = Intrinsic::x86_avx512_sqrt_pd_512;
16587 if (
Builder.getIsFPConstrained()) {
16588 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*
this,
E);
16590 Ops[0]->getType());
16591 return Builder.CreateConstrainedFPCall(F, Ops[0]);
16594 return Builder.CreateCall(F, Ops[0]);
16598 case X86::BI__builtin_ia32_pmuludq128:
16599 case X86::BI__builtin_ia32_pmuludq256:
16600 case X86::BI__builtin_ia32_pmuludq512:
16603 case X86::BI__builtin_ia32_pmuldq128:
16604 case X86::BI__builtin_ia32_pmuldq256:
16605 case X86::BI__builtin_ia32_pmuldq512:
16608 case X86::BI__builtin_ia32_pternlogd512_mask:
16609 case X86::BI__builtin_ia32_pternlogq512_mask:
16610 case X86::BI__builtin_ia32_pternlogd128_mask:
16611 case X86::BI__builtin_ia32_pternlogd256_mask:
16612 case X86::BI__builtin_ia32_pternlogq128_mask:
16613 case X86::BI__builtin_ia32_pternlogq256_mask:
16616 case X86::BI__builtin_ia32_pternlogd512_maskz:
16617 case X86::BI__builtin_ia32_pternlogq512_maskz:
16618 case X86::BI__builtin_ia32_pternlogd128_maskz:
16619 case X86::BI__builtin_ia32_pternlogd256_maskz:
16620 case X86::BI__builtin_ia32_pternlogq128_maskz:
16621 case X86::BI__builtin_ia32_pternlogq256_maskz:
16624 case X86::BI__builtin_ia32_vpshldd128:
16625 case X86::BI__builtin_ia32_vpshldd256:
16626 case X86::BI__builtin_ia32_vpshldd512:
16627 case X86::BI__builtin_ia32_vpshldq128:
16628 case X86::BI__builtin_ia32_vpshldq256:
16629 case X86::BI__builtin_ia32_vpshldq512:
16630 case X86::BI__builtin_ia32_vpshldw128:
16631 case X86::BI__builtin_ia32_vpshldw256:
16632 case X86::BI__builtin_ia32_vpshldw512:
16635 case X86::BI__builtin_ia32_vpshrdd128:
16636 case X86::BI__builtin_ia32_vpshrdd256:
16637 case X86::BI__builtin_ia32_vpshrdd512:
16638 case X86::BI__builtin_ia32_vpshrdq128:
16639 case X86::BI__builtin_ia32_vpshrdq256:
16640 case X86::BI__builtin_ia32_vpshrdq512:
16641 case X86::BI__builtin_ia32_vpshrdw128:
16642 case X86::BI__builtin_ia32_vpshrdw256:
16643 case X86::BI__builtin_ia32_vpshrdw512:
16647 case X86::BI__builtin_ia32_vpshldvd128:
16648 case X86::BI__builtin_ia32_vpshldvd256:
16649 case X86::BI__builtin_ia32_vpshldvd512:
16650 case X86::BI__builtin_ia32_vpshldvq128:
16651 case X86::BI__builtin_ia32_vpshldvq256:
16652 case X86::BI__builtin_ia32_vpshldvq512:
16653 case X86::BI__builtin_ia32_vpshldvw128:
16654 case X86::BI__builtin_ia32_vpshldvw256:
16655 case X86::BI__builtin_ia32_vpshldvw512:
16658 case X86::BI__builtin_ia32_vpshrdvd128:
16659 case X86::BI__builtin_ia32_vpshrdvd256:
16660 case X86::BI__builtin_ia32_vpshrdvd512:
16661 case X86::BI__builtin_ia32_vpshrdvq128:
16662 case X86::BI__builtin_ia32_vpshrdvq256:
16663 case X86::BI__builtin_ia32_vpshrdvq512:
16664 case X86::BI__builtin_ia32_vpshrdvw128:
16665 case X86::BI__builtin_ia32_vpshrdvw256:
16666 case X86::BI__builtin_ia32_vpshrdvw512:
16671 case X86::BI__builtin_ia32_reduce_fadd_pd512:
16672 case X86::BI__builtin_ia32_reduce_fadd_ps512:
16673 case X86::BI__builtin_ia32_reduce_fadd_ph512:
16674 case X86::BI__builtin_ia32_reduce_fadd_ph256:
16675 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
16678 IRBuilder<>::FastMathFlagGuard FMFGuard(
Builder);
16679 Builder.getFastMathFlags().setAllowReassoc();
16680 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16682 case X86::BI__builtin_ia32_reduce_fmul_pd512:
16683 case X86::BI__builtin_ia32_reduce_fmul_ps512:
16684 case X86::BI__builtin_ia32_reduce_fmul_ph512:
16685 case X86::BI__builtin_ia32_reduce_fmul_ph256:
16686 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
16689 IRBuilder<>::FastMathFlagGuard FMFGuard(
Builder);
16690 Builder.getFastMathFlags().setAllowReassoc();
16691 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16693 case X86::BI__builtin_ia32_reduce_fmax_pd512:
16694 case X86::BI__builtin_ia32_reduce_fmax_ps512:
16695 case X86::BI__builtin_ia32_reduce_fmax_ph512:
16696 case X86::BI__builtin_ia32_reduce_fmax_ph256:
16697 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
16700 IRBuilder<>::FastMathFlagGuard FMFGuard(
Builder);
16701 Builder.getFastMathFlags().setNoNaNs();
16702 return Builder.CreateCall(F, {Ops[0]});
16704 case X86::BI__builtin_ia32_reduce_fmin_pd512:
16705 case X86::BI__builtin_ia32_reduce_fmin_ps512:
16706 case X86::BI__builtin_ia32_reduce_fmin_ph512:
16707 case X86::BI__builtin_ia32_reduce_fmin_ph256:
16708 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
16711 IRBuilder<>::FastMathFlagGuard FMFGuard(
Builder);
16712 Builder.getFastMathFlags().setNoNaNs();
16713 return Builder.CreateCall(F, {Ops[0]});
16716 case X86::BI__builtin_ia32_rdrand16_step:
16717 case X86::BI__builtin_ia32_rdrand32_step:
16718 case X86::BI__builtin_ia32_rdrand64_step:
16719 case X86::BI__builtin_ia32_rdseed16_step:
16720 case X86::BI__builtin_ia32_rdseed32_step:
16721 case X86::BI__builtin_ia32_rdseed64_step: {
16723 switch (BuiltinID) {
16724 default: llvm_unreachable(
"Unsupported intrinsic!");
16725 case X86::BI__builtin_ia32_rdrand16_step:
16726 ID = Intrinsic::x86_rdrand_16;
16728 case X86::BI__builtin_ia32_rdrand32_step:
16729 ID = Intrinsic::x86_rdrand_32;
16731 case X86::BI__builtin_ia32_rdrand64_step:
16732 ID = Intrinsic::x86_rdrand_64;
16734 case X86::BI__builtin_ia32_rdseed16_step:
16735 ID = Intrinsic::x86_rdseed_16;
16737 case X86::BI__builtin_ia32_rdseed32_step:
16738 ID = Intrinsic::x86_rdseed_32;
16740 case X86::BI__builtin_ia32_rdseed64_step:
16741 ID = Intrinsic::x86_rdseed_64;
16750 case X86::BI__builtin_ia32_addcarryx_u32:
16751 case X86::BI__builtin_ia32_addcarryx_u64:
16752 case X86::BI__builtin_ia32_subborrow_u32:
16753 case X86::BI__builtin_ia32_subborrow_u64: {
16755 switch (BuiltinID) {
16756 default: llvm_unreachable(
"Unsupported intrinsic!");
16757 case X86::BI__builtin_ia32_addcarryx_u32:
16758 IID = Intrinsic::x86_addcarry_32;
16760 case X86::BI__builtin_ia32_addcarryx_u64:
16761 IID = Intrinsic::x86_addcarry_64;
16763 case X86::BI__builtin_ia32_subborrow_u32:
16764 IID = Intrinsic::x86_subborrow_32;
16766 case X86::BI__builtin_ia32_subborrow_u64:
16767 IID = Intrinsic::x86_subborrow_64;
16772 { Ops[0], Ops[1], Ops[2] });
16778 case X86::BI__builtin_ia32_fpclassps128_mask:
16779 case X86::BI__builtin_ia32_fpclassps256_mask:
16780 case X86::BI__builtin_ia32_fpclassps512_mask:
16781 case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16782 case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16783 case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
16784 case X86::BI__builtin_ia32_fpclassph128_mask:
16785 case X86::BI__builtin_ia32_fpclassph256_mask:
16786 case X86::BI__builtin_ia32_fpclassph512_mask:
16787 case X86::BI__builtin_ia32_fpclasspd128_mask:
16788 case X86::BI__builtin_ia32_fpclasspd256_mask:
16789 case X86::BI__builtin_ia32_fpclasspd512_mask: {
16791 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16792 Value *MaskIn = Ops[2];
16793 Ops.erase(&Ops[2]);
16796 switch (BuiltinID) {
16797 default: llvm_unreachable(
"Unsupported intrinsic!");
16798 case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16799 ID = Intrinsic::x86_avx10_fpclass_nepbf16_128;
16801 case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16802 ID = Intrinsic::x86_avx10_fpclass_nepbf16_256;
16804 case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
16805 ID = Intrinsic::x86_avx10_fpclass_nepbf16_512;
16807 case X86::BI__builtin_ia32_fpclassph128_mask:
16808 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
16810 case X86::BI__builtin_ia32_fpclassph256_mask:
16811 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
16813 case X86::BI__builtin_ia32_fpclassph512_mask:
16814 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
16816 case X86::BI__builtin_ia32_fpclassps128_mask:
16817 ID = Intrinsic::x86_avx512_fpclass_ps_128;
16819 case X86::BI__builtin_ia32_fpclassps256_mask:
16820 ID = Intrinsic::x86_avx512_fpclass_ps_256;
16822 case X86::BI__builtin_ia32_fpclassps512_mask:
16823 ID = Intrinsic::x86_avx512_fpclass_ps_512;
16825 case X86::BI__builtin_ia32_fpclasspd128_mask:
16826 ID = Intrinsic::x86_avx512_fpclass_pd_128;
16828 case X86::BI__builtin_ia32_fpclasspd256_mask:
16829 ID = Intrinsic::x86_avx512_fpclass_pd_256;
16831 case X86::BI__builtin_ia32_fpclasspd512_mask:
16832 ID = Intrinsic::x86_avx512_fpclass_pd_512;
16840 case X86::BI__builtin_ia32_vp2intersect_q_512:
16841 case X86::BI__builtin_ia32_vp2intersect_q_256:
16842 case X86::BI__builtin_ia32_vp2intersect_q_128:
16843 case X86::BI__builtin_ia32_vp2intersect_d_512:
16844 case X86::BI__builtin_ia32_vp2intersect_d_256:
16845 case X86::BI__builtin_ia32_vp2intersect_d_128: {
16847 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16850 switch (BuiltinID) {
16851 default: llvm_unreachable(
"Unsupported intrinsic!");
16852 case X86::BI__builtin_ia32_vp2intersect_q_512:
16853 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
16855 case X86::BI__builtin_ia32_vp2intersect_q_256:
16856 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
16858 case X86::BI__builtin_ia32_vp2intersect_q_128:
16859 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
16861 case X86::BI__builtin_ia32_vp2intersect_d_512:
16862 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
16864 case X86::BI__builtin_ia32_vp2intersect_d_256:
16865 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
16867 case X86::BI__builtin_ia32_vp2intersect_d_128:
16868 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
16882 case X86::BI__builtin_ia32_vpmultishiftqb128:
16883 case X86::BI__builtin_ia32_vpmultishiftqb256:
16884 case X86::BI__builtin_ia32_vpmultishiftqb512: {
16886 switch (BuiltinID) {
16887 default: llvm_unreachable(
"Unsupported intrinsic!");
16888 case X86::BI__builtin_ia32_vpmultishiftqb128:
16889 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
16891 case X86::BI__builtin_ia32_vpmultishiftqb256:
16892 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
16894 case X86::BI__builtin_ia32_vpmultishiftqb512:
16895 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
16902 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16903 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16904 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
16906 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16907 Value *MaskIn = Ops[2];
16908 Ops.erase(&Ops[2]);
16911 switch (BuiltinID) {
16912 default: llvm_unreachable(
"Unsupported intrinsic!");
16913 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16914 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
16916 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16917 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
16919 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
16920 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
16929 case X86::BI__builtin_ia32_cmpeqps:
16930 case X86::BI__builtin_ia32_cmpeqpd:
16931 return getVectorFCmpIR(CmpInst::FCMP_OEQ,
false);
16932 case X86::BI__builtin_ia32_cmpltps:
16933 case X86::BI__builtin_ia32_cmpltpd:
16934 return getVectorFCmpIR(CmpInst::FCMP_OLT,
true);
16935 case X86::BI__builtin_ia32_cmpleps:
16936 case X86::BI__builtin_ia32_cmplepd:
16937 return getVectorFCmpIR(CmpInst::FCMP_OLE,
true);
16938 case X86::BI__builtin_ia32_cmpunordps:
16939 case X86::BI__builtin_ia32_cmpunordpd:
16940 return getVectorFCmpIR(CmpInst::FCMP_UNO,
false);
16941 case X86::BI__builtin_ia32_cmpneqps:
16942 case X86::BI__builtin_ia32_cmpneqpd:
16943 return getVectorFCmpIR(CmpInst::FCMP_UNE,
false);
16944 case X86::BI__builtin_ia32_cmpnltps:
16945 case X86::BI__builtin_ia32_cmpnltpd:
16946 return getVectorFCmpIR(CmpInst::FCMP_UGE,
true);
16947 case X86::BI__builtin_ia32_cmpnleps:
16948 case X86::BI__builtin_ia32_cmpnlepd:
16949 return getVectorFCmpIR(CmpInst::FCMP_UGT,
true);
16950 case X86::BI__builtin_ia32_cmpordps:
16951 case X86::BI__builtin_ia32_cmpordpd:
16952 return getVectorFCmpIR(CmpInst::FCMP_ORD,
false);
16953 case X86::BI__builtin_ia32_cmpph128_mask:
16954 case X86::BI__builtin_ia32_cmpph256_mask:
16955 case X86::BI__builtin_ia32_cmpph512_mask:
16956 case X86::BI__builtin_ia32_cmpps128_mask:
16957 case X86::BI__builtin_ia32_cmpps256_mask:
16958 case X86::BI__builtin_ia32_cmpps512_mask:
16959 case X86::BI__builtin_ia32_cmppd128_mask:
16960 case X86::BI__builtin_ia32_cmppd256_mask:
16961 case X86::BI__builtin_ia32_cmppd512_mask:
16962 case X86::BI__builtin_ia32_vcmppd256_round_mask:
16963 case X86::BI__builtin_ia32_vcmpps256_round_mask:
16964 case X86::BI__builtin_ia32_vcmpph256_round_mask:
16965 case X86::BI__builtin_ia32_vcmppbf16512_mask:
16966 case X86::BI__builtin_ia32_vcmppbf16256_mask:
16967 case X86::BI__builtin_ia32_vcmppbf16128_mask:
16970 case X86::BI__builtin_ia32_cmpps:
16971 case X86::BI__builtin_ia32_cmpps256:
16972 case X86::BI__builtin_ia32_cmppd:
16973 case X86::BI__builtin_ia32_cmppd256: {
16981 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
16986 FCmpInst::Predicate Pred;
16990 switch (CC & 0xf) {
16991 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling =
false;
break;
16992 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling =
true;
break;
16993 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling =
true;
break;
16994 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling =
false;
break;
16995 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling =
false;
break;
16996 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling =
true;
break;
16997 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling =
true;
break;
16998 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling =
false;
break;
16999 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling =
false;
break;
17000 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling =
true;
break;
17001 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling =
true;
break;
17002 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling =
false;
break;
17003 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling =
false;
break;
17004 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling =
true;
break;
17005 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling =
true;
break;
17006 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling =
false;
break;
17007 default: llvm_unreachable(
"Unhandled CC");
17012 IsSignaling = !IsSignaling;
17019 if (
Builder.getIsFPConstrained() &&
17020 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
17024 switch (BuiltinID) {
17025 default: llvm_unreachable(
"Unexpected builtin");
17026 case X86::BI__builtin_ia32_cmpps:
17027 IID = Intrinsic::x86_sse_cmp_ps;
17029 case X86::BI__builtin_ia32_cmpps256:
17030 IID = Intrinsic::x86_avx_cmp_ps_256;
17032 case X86::BI__builtin_ia32_cmppd:
17033 IID = Intrinsic::x86_sse2_cmp_pd;
17035 case X86::BI__builtin_ia32_cmppd256:
17036 IID = Intrinsic::x86_avx_cmp_pd_256;
17038 case X86::BI__builtin_ia32_cmpph128_mask:
17039 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
17041 case X86::BI__builtin_ia32_cmpph256_mask:
17042 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
17044 case X86::BI__builtin_ia32_cmpph512_mask:
17045 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
17047 case X86::BI__builtin_ia32_cmpps512_mask:
17048 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
17050 case X86::BI__builtin_ia32_cmppd512_mask:
17051 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
17053 case X86::BI__builtin_ia32_cmpps128_mask:
17054 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
17056 case X86::BI__builtin_ia32_cmpps256_mask:
17057 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
17059 case X86::BI__builtin_ia32_cmppd128_mask:
17060 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
17062 case X86::BI__builtin_ia32_cmppd256_mask:
17063 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
17070 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17076 return Builder.CreateCall(Intr, Ops);
17087 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17090 Cmp =
Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
17092 Cmp =
Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
17096 return getVectorFCmpIR(Pred, IsSignaling);
17100 case X86::BI__builtin_ia32_cmpeqss:
17101 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
17102 case X86::BI__builtin_ia32_cmpltss:
17103 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
17104 case X86::BI__builtin_ia32_cmpless:
17105 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
17106 case X86::BI__builtin_ia32_cmpunordss:
17107 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
17108 case X86::BI__builtin_ia32_cmpneqss:
17109 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
17110 case X86::BI__builtin_ia32_cmpnltss:
17111 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
17112 case X86::BI__builtin_ia32_cmpnless:
17113 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
17114 case X86::BI__builtin_ia32_cmpordss:
17115 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
17116 case X86::BI__builtin_ia32_cmpeqsd:
17117 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
17118 case X86::BI__builtin_ia32_cmpltsd:
17119 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
17120 case X86::BI__builtin_ia32_cmplesd:
17121 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
17122 case X86::BI__builtin_ia32_cmpunordsd:
17123 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
17124 case X86::BI__builtin_ia32_cmpneqsd:
17125 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
17126 case X86::BI__builtin_ia32_cmpnltsd:
17127 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
17128 case X86::BI__builtin_ia32_cmpnlesd:
17129 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
17130 case X86::BI__builtin_ia32_cmpordsd:
17131 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
17134 case X86::BI__builtin_ia32_vcvtph2ps:
17135 case X86::BI__builtin_ia32_vcvtph2ps256:
17136 case X86::BI__builtin_ia32_vcvtph2ps_mask:
17137 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
17138 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
17139 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*
this,
E);
17144 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
17147 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
17148 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
17151 case X86::BI__builtin_ia32_cvtsbf162ss_32:
17154 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
17155 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
17157 switch (BuiltinID) {
17158 default: llvm_unreachable(
"Unsupported intrinsic!");
17159 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
17160 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
17162 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
17163 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
17170 case X86::BI__cpuid:
17171 case X86::BI__cpuidex: {
17173 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
17177 llvm::StructType *CpuidRetTy =
17179 llvm::FunctionType *FTy =
17182 StringRef
Asm, Constraints;
17183 if (
getTarget().getTriple().getArch() == llvm::Triple::x86) {
17185 Constraints =
"={ax},={bx},={cx},={dx},{ax},{cx}";
17188 Asm =
"xchgq %rbx, ${1:q}\n"
17190 "xchgq %rbx, ${1:q}";
17191 Constraints =
"={ax},=r,={cx},={dx},0,2";
17194 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy,
Asm, Constraints,
17196 Value *IACall =
Builder.CreateCall(IA, {FuncId, SubFuncId});
17199 for (
unsigned i = 0; i < 4; i++) {
17200 Value *Extracted =
Builder.CreateExtractValue(IACall, i);
17210 case X86::BI__emul:
17211 case X86::BI__emulu: {
17213 bool isSigned = (BuiltinID == X86::BI__emul);
17216 return Builder.CreateMul(LHS, RHS,
"", !isSigned, isSigned);
17218 case X86::BI__mulh:
17219 case X86::BI__umulh:
17220 case X86::BI_mul128:
17221 case X86::BI_umul128: {
17223 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
17225 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
17226 Value *LHS =
Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
17227 Value *RHS =
Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
17229 Value *MulResult, *HigherBits;
17231 MulResult =
Builder.CreateNSWMul(LHS, RHS);
17232 HigherBits =
Builder.CreateAShr(MulResult, 64);
17234 MulResult =
Builder.CreateNUWMul(LHS, RHS);
17235 HigherBits =
Builder.CreateLShr(MulResult, 64);
17237 HigherBits =
Builder.CreateIntCast(HigherBits, ResType, IsSigned);
17239 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
17244 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
17247 case X86::BI__faststorefence: {
17248 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
17249 llvm::SyncScope::System);
17251 case X86::BI__shiftleft128:
17252 case X86::BI__shiftright128: {
17254 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
17259 std::swap(Ops[0], Ops[1]);
17261 return Builder.CreateCall(F, Ops);
17263 case X86::BI_ReadWriteBarrier:
17264 case X86::BI_ReadBarrier:
17265 case X86::BI_WriteBarrier: {
17266 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
17267 llvm::SyncScope::SingleThread);
17270 case X86::BI_AddressOfReturnAddress: {
17273 return Builder.CreateCall(F);
17275 case X86::BI__stosb: {
17281 case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
17282 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
17283 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17284 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
17285 case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17286 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17287 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17288 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: {
17290 switch (BuiltinID) {
17292 llvm_unreachable(
"Unsupported intrinsic!");
17293 case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
17294 IID = Intrinsic::x86_t2rpntlvwz0_internal;
17296 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
17297 IID = Intrinsic::x86_t2rpntlvwz0rs_internal;
17299 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17300 IID = Intrinsic::x86_t2rpntlvwz0t1_internal;
17302 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
17303 IID = Intrinsic::x86_t2rpntlvwz0rst1_internal;
17305 case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17306 IID = Intrinsic::x86_t2rpntlvwz1_internal;
17308 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17309 IID = Intrinsic::x86_t2rpntlvwz1rs_internal;
17311 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17312 IID = Intrinsic::x86_t2rpntlvwz1t1_internal;
17314 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
17315 IID = Intrinsic::x86_t2rpntlvwz1rst1_internal;
17321 {Ops[0], Ops[1], Ops[2], Ops[5], Ops[6]});
17324 assert(PtrTy &&
"arg3 must be of pointer type");
17331 Value *VecT0 =
Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17337 Value *VecT1 =
Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17351 case X86::BI__int2c: {
17353 llvm::FunctionType *FTy = llvm::FunctionType::get(
VoidTy,
false);
17354 llvm::InlineAsm *IA =
17355 llvm::InlineAsm::get(FTy,
"int $$0x2c",
"",
true);
17356 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
17358 llvm::Attribute::NoReturn);
17359 llvm::CallInst *CI =
Builder.CreateCall(IA);
17360 CI->setAttributes(NoReturnAttr);
17363 case X86::BI__readfsbyte:
17364 case X86::BI__readfsword:
17365 case X86::BI__readfsdword:
17366 case X86::BI__readfsqword: {
17372 Load->setVolatile(
true);
17375 case X86::BI__readgsbyte:
17376 case X86::BI__readgsword:
17377 case X86::BI__readgsdword:
17378 case X86::BI__readgsqword: {
17384 Load->setVolatile(
true);
17387 case X86::BI__builtin_ia32_encodekey128_u32: {
17388 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
17392 for (
int i = 0; i < 3; ++i) {
17400 case X86::BI__builtin_ia32_encodekey256_u32: {
17401 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
17406 for (
int i = 0; i < 4; ++i) {
17414 case X86::BI__builtin_ia32_aesenc128kl_u8:
17415 case X86::BI__builtin_ia32_aesdec128kl_u8:
17416 case X86::BI__builtin_ia32_aesenc256kl_u8:
17417 case X86::BI__builtin_ia32_aesdec256kl_u8: {
17419 StringRef BlockName;
17420 switch (BuiltinID) {
17422 llvm_unreachable(
"Unexpected builtin");
17423 case X86::BI__builtin_ia32_aesenc128kl_u8:
17424 IID = Intrinsic::x86_aesenc128kl;
17425 BlockName =
"aesenc128kl";
17427 case X86::BI__builtin_ia32_aesdec128kl_u8:
17428 IID = Intrinsic::x86_aesdec128kl;
17429 BlockName =
"aesdec128kl";
17431 case X86::BI__builtin_ia32_aesenc256kl_u8:
17432 IID = Intrinsic::x86_aesenc256kl;
17433 BlockName =
"aesenc256kl";
17435 case X86::BI__builtin_ia32_aesdec256kl_u8:
17436 IID = Intrinsic::x86_aesdec256kl;
17437 BlockName =
"aesdec256kl";
17443 BasicBlock *NoError =
17451 Builder.CreateCondBr(Succ, NoError, Error);
17453 Builder.SetInsertPoint(NoError);
17457 Builder.SetInsertPoint(Error);
17458 Constant *
Zero = llvm::Constant::getNullValue(Out->getType());
17465 case X86::BI__builtin_ia32_aesencwide128kl_u8:
17466 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
17467 case X86::BI__builtin_ia32_aesencwide256kl_u8:
17468 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
17470 StringRef BlockName;
17471 switch (BuiltinID) {
17472 case X86::BI__builtin_ia32_aesencwide128kl_u8:
17473 IID = Intrinsic::x86_aesencwide128kl;
17474 BlockName =
"aesencwide128kl";
17476 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
17477 IID = Intrinsic::x86_aesdecwide128kl;
17478 BlockName =
"aesdecwide128kl";
17480 case X86::BI__builtin_ia32_aesencwide256kl_u8:
17481 IID = Intrinsic::x86_aesencwide256kl;
17482 BlockName =
"aesencwide256kl";
17484 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
17485 IID = Intrinsic::x86_aesdecwide256kl;
17486 BlockName =
"aesdecwide256kl";
17490 llvm::Type *Ty = FixedVectorType::get(
Builder.getInt64Ty(), 2);
17493 for (
int i = 0; i != 8; ++i) {
17494 Value *Ptr =
Builder.CreateConstGEP1_32(Ty, Ops[1], i);
17500 BasicBlock *NoError =
17507 Builder.CreateCondBr(Succ, NoError, Error);
17509 Builder.SetInsertPoint(NoError);
17510 for (
int i = 0; i != 8; ++i) {
17517 Builder.SetInsertPoint(Error);
17518 for (
int i = 0; i != 8; ++i) {
17520 Constant *
Zero = llvm::Constant::getNullValue(Out->getType());
17521 Value *Ptr =
Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
17529 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
17532 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
17533 Intrinsic::ID IID = IsConjFMA
17534 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
17535 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
17539 case X86::BI__builtin_ia32_vfcmaddcph256_round_mask:
17542 case X86::BI__builtin_ia32_vfmaddcph256_round_mask: {
17543 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx10_mask_vfcmaddcph256
17544 : Intrinsic::x86_avx10_mask_vfmaddcph256;
17548 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
17551 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
17552 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17553 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
17558 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
17561 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
17562 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17563 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
17565 static constexpr int Mask[] = {0, 5, 6, 7};
17566 return Builder.CreateShuffleVector(
Call, Ops[2], Mask);
17568 case X86::BI__builtin_ia32_prefetchi:
17571 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
17572 llvm::ConstantInt::get(Int32Ty, 0)});
17590 Intrinsic::ID
ID = Intrinsic::not_intrinsic;
17592#include "llvm/TargetParser/PPCTargetParser.def"
17593 auto GenAIXPPCBuiltinCpuExpr = [&](
unsigned SupportMethod,
unsigned FieldIdx,
17594 unsigned Mask, CmpInst::Predicate CompOp,
17595 unsigned OpValue) ->
Value * {
17596 if (SupportMethod == BUILTIN_PPC_FALSE)
17599 if (SupportMethod == BUILTIN_PPC_TRUE)
17602 assert(SupportMethod <= SYS_CALL &&
"Invalid value for SupportMethod.");
17604 llvm::Value *FieldValue =
nullptr;
17605 if (SupportMethod == USE_SYS_CONF) {
17606 llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE);
17607 llvm::Constant *SysConf =
17611 llvm::Value *Idxs[] = {ConstantInt::get(
Int32Ty, 0),
17612 ConstantInt::get(
Int32Ty, FieldIdx)};
17617 }
else if (SupportMethod == SYS_CALL) {
17618 llvm::FunctionType *FTy =
17620 llvm::FunctionCallee
Func =
17626 assert(FieldValue &&
17627 "SupportMethod value is not defined in PPCTargetParser.def.");
17630 FieldValue =
Builder.CreateAnd(FieldValue, Mask);
17632 llvm::Type *ValueType = FieldValue->getType();
17633 bool IsValueType64Bit = ValueType->isIntegerTy(64);
17635 (IsValueType64Bit || ValueType->isIntegerTy(32)) &&
17636 "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");
17639 CompOp, FieldValue,
17640 ConstantInt::get(IsValueType64Bit ?
Int64Ty :
Int32Ty, OpValue));
17643 switch (BuiltinID) {
17644 default:
return nullptr;
17646 case Builtin::BI__builtin_cpu_is: {
17648 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17651 unsigned LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue;
17652 typedef std::tuple<unsigned, unsigned, unsigned, unsigned>
CPUInfo;
17654 std::tie(LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue) =
17655 static_cast<CPUInfo>(StringSwitch<CPUInfo>(CPUStr)
17656#define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \
17658 .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID})
17659#include "llvm/TargetParser/PPCTargetParser.def"
17660 .Default({BUILTIN_PPC_UNSUPPORTED, 0,
17661 BUILTIN_PPC_UNSUPPORTED, 0}));
17663 if (Triple.isOSAIX()) {
17664 assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17665 "Invalid CPU name. Missed by SemaChecking?");
17666 return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0,
17667 ICmpInst::ICMP_EQ, AIXIDValue);
17670 assert(Triple.isOSLinux() &&
17671 "__builtin_cpu_is() is only supported for AIX and Linux.");
17673 assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17674 "Invalid CPU name. Missed by SemaChecking?");
17676 if (LinuxSupportMethod == BUILTIN_PPC_FALSE)
17679 Value *Op0 = llvm::ConstantInt::get(
Int32Ty, PPC_FAWORD_CPUID);
17681 Value *TheCall =
Builder.CreateCall(F, {Op0},
"cpu_is");
17682 return Builder.CreateICmpEQ(TheCall,
17683 llvm::ConstantInt::get(
Int32Ty, LinuxIDValue));
17685 case Builtin::BI__builtin_cpu_supports: {
17688 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17689 if (Triple.isOSAIX()) {
17690 unsigned SupportMethod, FieldIdx, Mask,
Value;
17691 CmpInst::Predicate CompOp;
17695 std::tie(SupportMethod, FieldIdx, Mask, CompOp,
Value) =
17696 static_cast<CPUSupportType
>(StringSwitch<CPUSupportType>(CPUStr)
17697#define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \
17699 .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})
17700#include "llvm/TargetParser/PPCTargetParser.def"
17701 .Default({BUILTIN_PPC_FALSE, 0, 0,
17702 CmpInst::Predicate(), 0}));
17703 return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
17707 assert(Triple.isOSLinux() &&
17708 "__builtin_cpu_supports() is only supported for AIX and Linux.");
17709 unsigned FeatureWord;
17711 std::tie(FeatureWord, BitMask) =
17712 StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
17713#define
PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
17714 .Case(Name, {FA_WORD, Bitmask})
17715#include
"llvm/TargetParser/PPCTargetParser.def"
17719 Value *Op0 = llvm::ConstantInt::get(
Int32Ty, FeatureWord);
17721 Value *TheCall =
Builder.CreateCall(F, {Op0},
"cpu_supports");
17723 Builder.CreateAnd(TheCall, llvm::ConstantInt::get(
Int32Ty, BitMask));
17724 return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(
Int32Ty));
17725#undef PPC_FAWORD_HWCAP
17726#undef PPC_FAWORD_HWCAP2
17727#undef PPC_FAWORD_CPUID
17732 case PPC::BI__builtin_ppc_get_timebase:
17736 case PPC::BI__builtin_altivec_lvx:
17737 case PPC::BI__builtin_altivec_lvxl:
17738 case PPC::BI__builtin_altivec_lvebx:
17739 case PPC::BI__builtin_altivec_lvehx:
17740 case PPC::BI__builtin_altivec_lvewx:
17741 case PPC::BI__builtin_altivec_lvsl:
17742 case PPC::BI__builtin_altivec_lvsr:
17743 case PPC::BI__builtin_vsx_lxvd2x:
17744 case PPC::BI__builtin_vsx_lxvw4x:
17745 case PPC::BI__builtin_vsx_lxvd2x_be:
17746 case PPC::BI__builtin_vsx_lxvw4x_be:
17747 case PPC::BI__builtin_vsx_lxvl:
17748 case PPC::BI__builtin_vsx_lxvll:
17753 if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
17754 BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
17759 switch (BuiltinID) {
17760 default: llvm_unreachable(
"Unsupported ld/lvsl/lvsr intrinsic!");
17761 case PPC::BI__builtin_altivec_lvx:
17762 ID = Intrinsic::ppc_altivec_lvx;
17764 case PPC::BI__builtin_altivec_lvxl:
17765 ID = Intrinsic::ppc_altivec_lvxl;
17767 case PPC::BI__builtin_altivec_lvebx:
17768 ID = Intrinsic::ppc_altivec_lvebx;
17770 case PPC::BI__builtin_altivec_lvehx:
17771 ID = Intrinsic::ppc_altivec_lvehx;
17773 case PPC::BI__builtin_altivec_lvewx:
17774 ID = Intrinsic::ppc_altivec_lvewx;
17776 case PPC::BI__builtin_altivec_lvsl:
17777 ID = Intrinsic::ppc_altivec_lvsl;
17779 case PPC::BI__builtin_altivec_lvsr:
17780 ID = Intrinsic::ppc_altivec_lvsr;
17782 case PPC::BI__builtin_vsx_lxvd2x:
17783 ID = Intrinsic::ppc_vsx_lxvd2x;
17785 case PPC::BI__builtin_vsx_lxvw4x:
17786 ID = Intrinsic::ppc_vsx_lxvw4x;
17788 case PPC::BI__builtin_vsx_lxvd2x_be:
17789 ID = Intrinsic::ppc_vsx_lxvd2x_be;
17791 case PPC::BI__builtin_vsx_lxvw4x_be:
17792 ID = Intrinsic::ppc_vsx_lxvw4x_be;
17794 case PPC::BI__builtin_vsx_lxvl:
17795 ID = Intrinsic::ppc_vsx_lxvl;
17797 case PPC::BI__builtin_vsx_lxvll:
17798 ID = Intrinsic::ppc_vsx_lxvll;
17802 return Builder.CreateCall(F, Ops,
"");
17806 case PPC::BI__builtin_altivec_stvx:
17807 case PPC::BI__builtin_altivec_stvxl:
17808 case PPC::BI__builtin_altivec_stvebx:
17809 case PPC::BI__builtin_altivec_stvehx:
17810 case PPC::BI__builtin_altivec_stvewx:
17811 case PPC::BI__builtin_vsx_stxvd2x:
17812 case PPC::BI__builtin_vsx_stxvw4x:
17813 case PPC::BI__builtin_vsx_stxvd2x_be:
17814 case PPC::BI__builtin_vsx_stxvw4x_be:
17815 case PPC::BI__builtin_vsx_stxvl:
17816 case PPC::BI__builtin_vsx_stxvll:
17822 if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
17823 BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
17828 switch (BuiltinID) {
17829 default: llvm_unreachable(
"Unsupported st intrinsic!");
17830 case PPC::BI__builtin_altivec_stvx:
17831 ID = Intrinsic::ppc_altivec_stvx;
17833 case PPC::BI__builtin_altivec_stvxl:
17834 ID = Intrinsic::ppc_altivec_stvxl;
17836 case PPC::BI__builtin_altivec_stvebx:
17837 ID = Intrinsic::ppc_altivec_stvebx;
17839 case PPC::BI__builtin_altivec_stvehx:
17840 ID = Intrinsic::ppc_altivec_stvehx;
17842 case PPC::BI__builtin_altivec_stvewx:
17843 ID = Intrinsic::ppc_altivec_stvewx;
17845 case PPC::BI__builtin_vsx_stxvd2x:
17846 ID = Intrinsic::ppc_vsx_stxvd2x;
17848 case PPC::BI__builtin_vsx_stxvw4x:
17849 ID = Intrinsic::ppc_vsx_stxvw4x;
17851 case PPC::BI__builtin_vsx_stxvd2x_be:
17852 ID = Intrinsic::ppc_vsx_stxvd2x_be;
17854 case PPC::BI__builtin_vsx_stxvw4x_be:
17855 ID = Intrinsic::ppc_vsx_stxvw4x_be;
17857 case PPC::BI__builtin_vsx_stxvl:
17858 ID = Intrinsic::ppc_vsx_stxvl;
17860 case PPC::BI__builtin_vsx_stxvll:
17861 ID = Intrinsic::ppc_vsx_stxvll;
17865 return Builder.CreateCall(F, Ops,
"");
17867 case PPC::BI__builtin_vsx_ldrmb: {
17873 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17878 if (NumBytes == 16) {
17886 for (
int Idx = 0; Idx < 16; Idx++)
17887 RevMask.push_back(15 - Idx);
17888 return Builder.CreateShuffleVector(LD, LD, RevMask);
17892 llvm::Function *Lvs =
CGM.
getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
17893 : Intrinsic::ppc_altivec_lvsl);
17894 llvm::Function *Vperm =
CGM.
getIntrinsic(Intrinsic::ppc_altivec_vperm);
17896 Int8Ty, Op0, ConstantInt::get(Op1->
getType(), NumBytes - 1));
17898 Value *HiLd =
Builder.CreateCall(Lvx, HiMem,
"ld.hi");
17901 Op0 = IsLE ? HiLd : LoLd;
17902 Op1 = IsLE ? LoLd : HiLd;
17903 Value *AllElts =
Builder.CreateCall(Vperm, {Op0, Op1, Mask1},
"shuffle1");
17904 Constant *
Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->
getType());
17908 for (
int Idx = 0; Idx < 16; Idx++) {
17909 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
17910 : 16 - (NumBytes - Idx);
17911 Consts.push_back(Val);
17913 return Builder.CreateShuffleVector(
Builder.CreateBitCast(AllElts, ResTy),
17917 for (
int Idx = 0; Idx < 16; Idx++)
17918 Consts.push_back(
Builder.getInt8(NumBytes + Idx));
17919 Value *Mask2 = ConstantVector::get(Consts);
17920 return Builder.CreateBitCast(
17921 Builder.CreateCall(Vperm, {Zero, AllElts, Mask2},
"shuffle2"), ResTy);
17923 case PPC::BI__builtin_vsx_strmb: {
17927 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17929 auto StoreSubVec = [&](
unsigned Width,
unsigned Offset,
unsigned EltNo) {
17933 Value *StVec = Op2;
17936 for (
int Idx = 0; Idx < 16; Idx++)
17937 RevMask.push_back(15 - Idx);
17938 StVec =
Builder.CreateShuffleVector(Op2, Op2, RevMask);
17944 unsigned NumElts = 0;
17947 llvm_unreachable(
"width for stores must be a power of 2");
17966 Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
17969 Value *Elt =
Builder.CreateExtractElement(Vec, EltNo);
17970 if (IsLE && Width > 1) {
17972 Elt =
Builder.CreateCall(F, Elt);
17977 unsigned Stored = 0;
17978 unsigned RemainingBytes = NumBytes;
17980 if (NumBytes == 16)
17981 return StoreSubVec(16, 0, 0);
17982 if (NumBytes >= 8) {
17983 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
17984 RemainingBytes -= 8;
17987 if (RemainingBytes >= 4) {
17988 Result = StoreSubVec(4, NumBytes - Stored - 4,
17989 IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
17990 RemainingBytes -= 4;
17993 if (RemainingBytes >= 2) {
17994 Result = StoreSubVec(2, NumBytes - Stored - 2,
17995 IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
17996 RemainingBytes -= 2;
17999 if (RemainingBytes)
18001 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
18005 case PPC::BI__builtin_vsx_xvsqrtsp:
18006 case PPC::BI__builtin_vsx_xvsqrtdp: {
18009 if (
Builder.getIsFPConstrained()) {
18011 Intrinsic::experimental_constrained_sqrt, ResultType);
18012 return Builder.CreateConstrainedFPCall(F,
X);
18019 case PPC::BI__builtin_altivec_vclzb:
18020 case PPC::BI__builtin_altivec_vclzh:
18021 case PPC::BI__builtin_altivec_vclzw:
18022 case PPC::BI__builtin_altivec_vclzd: {
18025 Value *Undef = ConstantInt::get(
Builder.getInt1Ty(),
false);
18027 return Builder.CreateCall(F, {
X, Undef});
18029 case PPC::BI__builtin_altivec_vctzb:
18030 case PPC::BI__builtin_altivec_vctzh:
18031 case PPC::BI__builtin_altivec_vctzw:
18032 case PPC::BI__builtin_altivec_vctzd: {
18035 Value *Undef = ConstantInt::get(
Builder.getInt1Ty(),
false);
18037 return Builder.CreateCall(F, {
X, Undef});
18039 case PPC::BI__builtin_altivec_vinsd:
18040 case PPC::BI__builtin_altivec_vinsw:
18041 case PPC::BI__builtin_altivec_vinsd_elt:
18042 case PPC::BI__builtin_altivec_vinsw_elt: {
18048 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
18049 BuiltinID == PPC::BI__builtin_altivec_vinsd);
18051 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
18052 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
18055 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18057 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
18061 int ValidMaxValue = 0;
18063 ValidMaxValue = (Is32bit) ? 12 : 8;
18065 ValidMaxValue = (Is32bit) ? 3 : 1;
18068 int64_t ConstArg = ArgCI->getSExtValue();
18071 std::string RangeErrMsg = IsUnaligned ?
"byte" :
"element";
18072 RangeErrMsg +=
" number " + llvm::to_string(ConstArg);
18073 RangeErrMsg +=
" is outside of the valid range [0, ";
18074 RangeErrMsg += llvm::to_string(ValidMaxValue) +
"]";
18077 if (ConstArg < 0 || ConstArg > ValidMaxValue)
18081 if (!IsUnaligned) {
18082 ConstArg *= Is32bit ? 4 : 8;
18085 ConstArg = (Is32bit ? 12 : 8) - ConstArg;
18088 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
18089 Op2 = ConstantInt::getSigned(
Int32Ty, ConstArg);
18093 ?
Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(
Int32Ty, 4))
18095 llvm::FixedVectorType::get(
Int64Ty, 2));
18096 return Builder.CreateBitCast(
18099 case PPC::BI__builtin_altivec_vadduqm:
18100 case PPC::BI__builtin_altivec_vsubuqm: {
18103 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
18104 Op0 =
Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
18105 Op1 =
Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
18106 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
18107 return Builder.CreateAdd(Op0, Op1,
"vadduqm");
18109 return Builder.CreateSub(Op0, Op1,
"vsubuqm");
18111 case PPC::BI__builtin_altivec_vaddcuq_c:
18112 case PPC::BI__builtin_altivec_vsubcuq_c: {
18116 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
18118 Ops.push_back(
Builder.CreateBitCast(Op0, V1I128Ty));
18119 Ops.push_back(
Builder.CreateBitCast(Op1, V1I128Ty));
18120 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
18121 ? Intrinsic::ppc_altivec_vaddcuq
18122 : Intrinsic::ppc_altivec_vsubcuq;
18125 case PPC::BI__builtin_altivec_vaddeuqm_c:
18126 case PPC::BI__builtin_altivec_vaddecuq_c:
18127 case PPC::BI__builtin_altivec_vsubeuqm_c:
18128 case PPC::BI__builtin_altivec_vsubecuq_c: {
18133 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
18135 Ops.push_back(
Builder.CreateBitCast(Op0, V1I128Ty));
18136 Ops.push_back(
Builder.CreateBitCast(Op1, V1I128Ty));
18137 Ops.push_back(
Builder.CreateBitCast(Op2, V1I128Ty));
18138 switch (BuiltinID) {
18140 llvm_unreachable(
"Unsupported intrinsic!");
18141 case PPC::BI__builtin_altivec_vaddeuqm_c:
18142 ID = Intrinsic::ppc_altivec_vaddeuqm;
18144 case PPC::BI__builtin_altivec_vaddecuq_c:
18145 ID = Intrinsic::ppc_altivec_vaddecuq;
18147 case PPC::BI__builtin_altivec_vsubeuqm_c:
18148 ID = Intrinsic::ppc_altivec_vsubeuqm;
18150 case PPC::BI__builtin_altivec_vsubecuq_c:
18151 ID = Intrinsic::ppc_altivec_vsubecuq;
18156 case PPC::BI__builtin_ppc_rldimi:
18157 case PPC::BI__builtin_ppc_rlwimi: {
18164 if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&
18174 ? Intrinsic::ppc_rldimi
18175 : Intrinsic::ppc_rlwimi),
18176 {Op0, Op1, Op2, Op3});
18178 case PPC::BI__builtin_ppc_rlwnm: {
18185 case PPC::BI__builtin_ppc_poppar4:
18186 case PPC::BI__builtin_ppc_poppar8: {
18188 llvm::Type *ArgType = Op0->
getType();
18194 if (
Result->getType() != ResultType)
18199 case PPC::BI__builtin_ppc_cmpb: {
18202 if (
getTarget().getTriple().isPPC64()) {
18205 return Builder.CreateCall(F, {Op0, Op1},
"cmpb");
18225 Constant *ShiftAmt = ConstantInt::get(
Int64Ty, 32);
18234 Value *ResHi =
Builder.CreateShl(ResHiShift, ShiftAmt);
18235 return Builder.CreateOr(ResLo, ResHi);
18238 case PPC::BI__builtin_vsx_xvcpsgnsp:
18239 case PPC::BI__builtin_vsx_xvcpsgndp: {
18243 ID = Intrinsic::copysign;
18245 return Builder.CreateCall(F, {
X, Y});
18248 case PPC::BI__builtin_vsx_xvrspip:
18249 case PPC::BI__builtin_vsx_xvrdpip:
18250 case PPC::BI__builtin_vsx_xvrdpim:
18251 case PPC::BI__builtin_vsx_xvrspim:
18252 case PPC::BI__builtin_vsx_xvrdpi:
18253 case PPC::BI__builtin_vsx_xvrspi:
18254 case PPC::BI__builtin_vsx_xvrdpic:
18255 case PPC::BI__builtin_vsx_xvrspic:
18256 case PPC::BI__builtin_vsx_xvrdpiz:
18257 case PPC::BI__builtin_vsx_xvrspiz: {
18260 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
18261 BuiltinID == PPC::BI__builtin_vsx_xvrspim)
18263 ? Intrinsic::experimental_constrained_floor
18264 : Intrinsic::floor;
18265 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
18266 BuiltinID == PPC::BI__builtin_vsx_xvrspi)
18268 ? Intrinsic::experimental_constrained_round
18269 : Intrinsic::round;
18270 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
18271 BuiltinID == PPC::BI__builtin_vsx_xvrspic)
18273 ? Intrinsic::experimental_constrained_rint
18275 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
18276 BuiltinID == PPC::BI__builtin_vsx_xvrspip)
18278 ? Intrinsic::experimental_constrained_ceil
18280 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
18281 BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
18283 ? Intrinsic::experimental_constrained_trunc
18284 : Intrinsic::trunc;
18286 return Builder.getIsFPConstrained() ?
Builder.CreateConstrainedFPCall(F,
X)
18291 case PPC::BI__builtin_vsx_xvabsdp:
18292 case PPC::BI__builtin_vsx_xvabssp: {
18300 case PPC::BI__builtin_ppc_recipdivf:
18301 case PPC::BI__builtin_ppc_recipdivd:
18302 case PPC::BI__builtin_ppc_rsqrtf:
18303 case PPC::BI__builtin_ppc_rsqrtd: {
18304 FastMathFlags FMF =
Builder.getFastMathFlags();
18305 Builder.getFastMathFlags().setFast();
18309 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
18310 BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
18313 Builder.getFastMathFlags() &= (FMF);
18316 auto *One = ConstantFP::get(ResultType, 1.0);
18319 Builder.getFastMathFlags() &= (FMF);
18322 case PPC::BI__builtin_ppc_alignx: {
18325 ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
18326 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
18327 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
18328 llvm::Value::MaximumAlignment);
18332 AlignmentCI,
nullptr);
18335 case PPC::BI__builtin_ppc_rdlam: {
18339 llvm::Type *Ty = Op0->
getType();
18340 Value *ShiftAmt =
Builder.CreateIntCast(Op1, Ty,
false);
18342 Value *Rotate =
Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
18343 return Builder.CreateAnd(Rotate, Op2);
18345 case PPC::BI__builtin_ppc_load2r: {
18352 case PPC::BI__builtin_ppc_fnmsub:
18353 case PPC::BI__builtin_ppc_fnmsubs:
18354 case PPC::BI__builtin_vsx_xvmaddadp:
18355 case PPC::BI__builtin_vsx_xvmaddasp:
18356 case PPC::BI__builtin_vsx_xvnmaddadp:
18357 case PPC::BI__builtin_vsx_xvnmaddasp:
18358 case PPC::BI__builtin_vsx_xvmsubadp:
18359 case PPC::BI__builtin_vsx_xvmsubasp:
18360 case PPC::BI__builtin_vsx_xvnmsubadp:
18361 case PPC::BI__builtin_vsx_xvnmsubasp: {
18367 if (
Builder.getIsFPConstrained())
18368 F =
CGM.
getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
18371 switch (BuiltinID) {
18372 case PPC::BI__builtin_vsx_xvmaddadp:
18373 case PPC::BI__builtin_vsx_xvmaddasp:
18374 if (
Builder.getIsFPConstrained())
18375 return Builder.CreateConstrainedFPCall(F, {
X, Y, Z});
18377 return Builder.CreateCall(F, {
X, Y, Z});
18378 case PPC::BI__builtin_vsx_xvnmaddadp:
18379 case PPC::BI__builtin_vsx_xvnmaddasp:
18380 if (
Builder.getIsFPConstrained())
18382 Builder.CreateConstrainedFPCall(F, {X, Y, Z}),
"neg");
18384 return Builder.CreateFNeg(
Builder.CreateCall(F, {X, Y, Z}),
"neg");
18385 case PPC::BI__builtin_vsx_xvmsubadp:
18386 case PPC::BI__builtin_vsx_xvmsubasp:
18387 if (
Builder.getIsFPConstrained())
18388 return Builder.CreateConstrainedFPCall(
18389 F, {
X, Y,
Builder.CreateFNeg(Z,
"neg")});
18392 case PPC::BI__builtin_ppc_fnmsub:
18393 case PPC::BI__builtin_ppc_fnmsubs:
18394 case PPC::BI__builtin_vsx_xvnmsubadp:
18395 case PPC::BI__builtin_vsx_xvnmsubasp:
18396 if (
Builder.getIsFPConstrained())
18398 Builder.CreateConstrainedFPCall(
18399 F, {X, Y, Builder.CreateFNeg(Z,
"neg")}),
18405 llvm_unreachable(
"Unknown FMA operation");
18409 case PPC::BI__builtin_vsx_insertword: {
18417 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18419 "Third arg to xxinsertw intrinsic must be constant integer");
18421 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
18428 std::swap(Op0, Op1);
18432 Op1 =
Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(
Int64Ty, 2));
18436 Op0 =
Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(
Int64Ty, 2));
18440 Index = MaxIndex - Index;
18444 Op0 =
Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(
Int32Ty, 4));
18445 Op2 = ConstantInt::getSigned(
Int32Ty, Index);
18446 return Builder.CreateCall(F, {Op0, Op1, Op2});
18449 case PPC::BI__builtin_vsx_extractuword: {
18452 llvm::Function *F =
CGM.
getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
18455 Op0 =
Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(
Int64Ty, 2));
18459 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
18461 "Second Arg to xxextractuw intrinsic must be a constant integer!");
18463 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
18467 Index = MaxIndex - Index;
18468 Op1 = ConstantInt::getSigned(
Int32Ty, Index);
18473 Value *ShuffleCall =
18475 return ShuffleCall;
18477 Op1 = ConstantInt::getSigned(
Int32Ty, Index);
18478 return Builder.CreateCall(F, {Op0, Op1});
18482 case PPC::BI__builtin_vsx_xxpermdi: {
18486 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18487 assert(ArgCI &&
"Third arg must be constant integer!");
18489 unsigned Index = ArgCI->getZExtValue();
18490 Op0 =
Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(
Int64Ty, 2));
18491 Op1 =
Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(
Int64Ty, 2));
18496 int ElemIdx0 = (Index & 2) >> 1;
18497 int ElemIdx1 = 2 + (Index & 1);
18499 int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
18500 Value *ShuffleCall =
Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
18503 return Builder.CreateBitCast(ShuffleCall, RetTy);
18506 case PPC::BI__builtin_vsx_xxsldwi: {
18510 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18511 assert(ArgCI &&
"Third argument must be a compile time constant");
18512 unsigned Index = ArgCI->getZExtValue() & 0x3;
18513 Op0 =
Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(
Int32Ty, 4));
18514 Op1 =
Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(
Int32Ty, 4));
18525 ElemIdx0 = (8 - Index) % 8;
18526 ElemIdx1 = (9 - Index) % 8;
18527 ElemIdx2 = (10 - Index) % 8;
18528 ElemIdx3 = (11 - Index) % 8;
18532 ElemIdx1 = Index + 1;
18533 ElemIdx2 = Index + 2;
18534 ElemIdx3 = Index + 3;
18537 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
18538 Value *ShuffleCall =
Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
18541 return Builder.CreateBitCast(ShuffleCall, RetTy);
18544 case PPC::BI__builtin_pack_vector_int128: {
18548 Value *PoisonValue =
18549 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->
getType(), 2));
18551 PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
18552 Res =
Builder.CreateInsertElement(Res, Op1,
18553 (uint64_t)(isLittleEndian ? 0 : 1));
18557 case PPC::BI__builtin_unpack_vector_int128: {
18560 ConstantInt *Index = cast<ConstantInt>(Op1);
18566 ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());
18568 return Builder.CreateExtractElement(Unpacked, Index);
18571 case PPC::BI__builtin_ppc_sthcx: {
18575 return Builder.CreateCall(F, {Op0, Op1});
18584#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
18585 case PPC::BI__builtin_##Name:
18586#include "clang/Basic/BuiltinsPPC.def"
18589 for (
unsigned i = 0, e =
E->getNumArgs(); i != e; i++)
18599 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
18600 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
18601 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
18602 unsigned NumVecs = 2;
18603 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
18604 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
18606 Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
18612 llvm::Type *VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
18613 Value *Ptr = Ops[0];
18614 for (
unsigned i=0; i<NumVecs; i++) {
18616 llvm::ConstantInt* Index = llvm::ConstantInt::get(
IntTy, i);
18622 if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
18623 BuiltinID == PPC::BI__builtin_mma_build_acc) {
18631 std::reverse(Ops.begin() + 1, Ops.end());
18634 switch (BuiltinID) {
18635 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
18636 case PPC::BI__builtin_##Name: \
18637 ID = Intrinsic::ppc_##Intr; \
18638 Accumulate = Acc; \
18640 #include "clang/Basic/BuiltinsPPC.def"
18642 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18643 BuiltinID == PPC::BI__builtin_vsx_stxvp ||
18644 BuiltinID == PPC::BI__builtin_mma_lxvp ||
18645 BuiltinID == PPC::BI__builtin_mma_stxvp) {
18646 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18647 BuiltinID == PPC::BI__builtin_mma_lxvp) {
18654 return Builder.CreateCall(F, Ops,
"");
18660 CallOps.push_back(Acc);
18662 for (
unsigned i=1; i<Ops.size(); i++)
18663 CallOps.push_back(Ops[i]);
18669 case PPC::BI__builtin_ppc_compare_and_swap:
18670 case PPC::BI__builtin_ppc_compare_and_swaplp: {
18679 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic,
true);
18687 Value *LoadedVal = Pair.first.getScalarVal();
18691 case PPC::BI__builtin_ppc_fetch_and_add:
18692 case PPC::BI__builtin_ppc_fetch_and_addlp: {
18694 llvm::AtomicOrdering::Monotonic);
18696 case PPC::BI__builtin_ppc_fetch_and_and:
18697 case PPC::BI__builtin_ppc_fetch_and_andlp: {
18699 llvm::AtomicOrdering::Monotonic);
18702 case PPC::BI__builtin_ppc_fetch_and_or:
18703 case PPC::BI__builtin_ppc_fetch_and_orlp: {
18705 llvm::AtomicOrdering::Monotonic);
18707 case PPC::BI__builtin_ppc_fetch_and_swap:
18708 case PPC::BI__builtin_ppc_fetch_and_swaplp: {
18710 llvm::AtomicOrdering::Monotonic);
18712 case PPC::BI__builtin_ppc_ldarx:
18713 case PPC::BI__builtin_ppc_lwarx:
18714 case PPC::BI__builtin_ppc_lharx:
18715 case PPC::BI__builtin_ppc_lbarx:
18717 case PPC::BI__builtin_ppc_mfspr: {
18723 return Builder.CreateCall(F, {Op0});
18725 case PPC::BI__builtin_ppc_mtspr: {
18732 return Builder.CreateCall(F, {Op0, Op1});
18734 case PPC::BI__builtin_ppc_popcntb: {
18736 llvm::Type *ArgType = ArgValue->
getType();
18738 return Builder.CreateCall(F, {ArgValue},
"popcntb");
18740 case PPC::BI__builtin_ppc_mtfsf: {
18750 case PPC::BI__builtin_ppc_swdiv_nochk:
18751 case PPC::BI__builtin_ppc_swdivs_nochk: {
18754 FastMathFlags FMF =
Builder.getFastMathFlags();
18755 Builder.getFastMathFlags().setFast();
18756 Value *FDiv =
Builder.CreateFDiv(Op0, Op1,
"swdiv_nochk");
18757 Builder.getFastMathFlags() &= (FMF);
18760 case PPC::BI__builtin_ppc_fric:
18762 *
this,
E, Intrinsic::rint,
18763 Intrinsic::experimental_constrained_rint))
18765 case PPC::BI__builtin_ppc_frim:
18766 case PPC::BI__builtin_ppc_frims:
18768 *
this,
E, Intrinsic::floor,
18769 Intrinsic::experimental_constrained_floor))
18771 case PPC::BI__builtin_ppc_frin:
18772 case PPC::BI__builtin_ppc_frins:
18774 *
this,
E, Intrinsic::round,
18775 Intrinsic::experimental_constrained_round))
18777 case PPC::BI__builtin_ppc_frip:
18778 case PPC::BI__builtin_ppc_frips:
18780 *
this,
E, Intrinsic::ceil,
18781 Intrinsic::experimental_constrained_ceil))
18783 case PPC::BI__builtin_ppc_friz:
18784 case PPC::BI__builtin_ppc_frizs:
18786 *
this,
E, Intrinsic::trunc,
18787 Intrinsic::experimental_constrained_trunc))
18789 case PPC::BI__builtin_ppc_fsqrt:
18790 case PPC::BI__builtin_ppc_fsqrts:
18792 *
this,
E, Intrinsic::sqrt,
18793 Intrinsic::experimental_constrained_sqrt))
18795 case PPC::BI__builtin_ppc_test_data_class: {
18800 {Op0, Op1},
"test_data_class");
18802 case PPC::BI__builtin_ppc_maxfe: {
18808 {Op0, Op1, Op2, Op3});
18810 case PPC::BI__builtin_ppc_maxfl: {
18816 {Op0, Op1, Op2, Op3});
18818 case PPC::BI__builtin_ppc_maxfs: {
18824 {Op0, Op1, Op2, Op3});
18826 case PPC::BI__builtin_ppc_minfe: {
18832 {Op0, Op1, Op2, Op3});
18834 case PPC::BI__builtin_ppc_minfl: {
18840 {Op0, Op1, Op2, Op3});
18842 case PPC::BI__builtin_ppc_minfs: {
18848 {Op0, Op1, Op2, Op3});
18850 case PPC::BI__builtin_ppc_swdiv:
18851 case PPC::BI__builtin_ppc_swdivs: {
18854 return Builder.CreateFDiv(Op0, Op1,
"swdiv");
18856 case PPC::BI__builtin_ppc_set_fpscr_rn:
18858 {EmitScalarExpr(E->getArg(0))});
18859 case PPC::BI__builtin_ppc_mffs:
18872 Attribute::getWithDereferenceableBytes(
Call->getContext(), 64));
18873 Call->addRetAttr(Attribute::getWithAlignment(
Call->getContext(), Align(4)));
18877 auto *RetTy = cast<llvm::PointerType>(CGF.
ConvertType(BuiltinRetType));
18878 if (RetTy ==
Call->getType())
18887 Attribute::getWithDereferenceableBytes(
Call->getContext(), 256));
18888 Call->addRetAttr(Attribute::getWithAlignment(
Call->getContext(), Align(8)));
18903 llvm::LoadInst *LD;
18907 if (Cov == CodeObjectVersionKind::COV_None) {
18908 StringRef Name =
"__oclc_ABI_version";
18909 auto *ABIVersionC = CGF.
CGM.
getModule().getNamedGlobal(Name);
18911 ABIVersionC =
new llvm::GlobalVariable(
18913 llvm::GlobalValue::ExternalLinkage,
nullptr, Name,
nullptr,
18914 llvm::GlobalVariable::NotThreadLocal,
18925 llvm::ConstantInt::get(CGF.
Int32Ty, CodeObjectVersionKind::COV_5));
18929 CGF.
Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18933 CGF.
Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18935 auto Result = CGF.
Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
18939 Value *GEP =
nullptr;
18940 if (Cov >= CodeObjectVersionKind::COV_5) {
18942 GEP = CGF.
Builder.CreateConstGEP1_32(
18943 CGF.
Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18946 GEP = CGF.
Builder.CreateConstGEP1_32(
18947 CGF.
Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18954 llvm::MDNode *RNode = MDHelper.createRange(
APInt(16, 1),
18956 LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
18957 LD->setMetadata(llvm::LLVMContext::MD_noundef,
18959 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18966 const unsigned XOffset = 12;
18967 auto *DP = EmitAMDGPUDispatchPtr(CGF);
18969 auto *Offset = llvm::ConstantInt::get(CGF.
Int32Ty, XOffset + Index * 4);
18977 LD->setMetadata(llvm::LLVMContext::MD_range,
18978 MDB.createRange(
APInt(32, 1), APInt::getZero(32)));
18979 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18992 llvm::AtomicOrdering &AO,
18993 llvm::SyncScope::ID &SSID) {
18994 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
18997 assert(llvm::isValidAtomicOrderingCABI(ord));
18998 switch (
static_cast<llvm::AtomicOrderingCABI
>(ord)) {
18999 case llvm::AtomicOrderingCABI::acquire:
19000 case llvm::AtomicOrderingCABI::consume:
19001 AO = llvm::AtomicOrdering::Acquire;
19003 case llvm::AtomicOrderingCABI::release:
19004 AO = llvm::AtomicOrdering::Release;
19006 case llvm::AtomicOrderingCABI::acq_rel:
19007 AO = llvm::AtomicOrdering::AcquireRelease;
19009 case llvm::AtomicOrderingCABI::seq_cst:
19010 AO = llvm::AtomicOrdering::SequentiallyConsistent;
19012 case llvm::AtomicOrderingCABI::relaxed:
19013 AO = llvm::AtomicOrdering::Monotonic;
19019 if (llvm::getConstantStringInfo(
Scope, scp)) {
19025 int scope = cast<llvm::ConstantInt>(
Scope)->getZExtValue();
19028 SSID = llvm::SyncScope::System;
19040 SSID = llvm::SyncScope::SingleThread;
19043 SSID = llvm::SyncScope::System;
19051 llvm::Value *Arg =
nullptr;
19052 if ((ICEArguments & (1 << Idx)) == 0) {
19057 std::optional<llvm::APSInt>
Result =
19059 assert(
Result &&
"Expected argument to be a constant");
19068 return RT.getFDotIntrinsic();
19070 return RT.getSDotIntrinsic();
19072 return RT.getUDotIntrinsic();
19077 return RT.getFirstBitSHighIntrinsic();
19081 return RT.getFirstBitUHighIntrinsic();
19088 case llvm::Triple::spirv:
19089 return llvm::Intrinsic::spv_wave_reduce_sum;
19090 case llvm::Triple::dxil: {
19092 return llvm::Intrinsic::dx_wave_reduce_usum;
19093 return llvm::Intrinsic::dx_wave_reduce_sum;
19096 llvm_unreachable(
"Intrinsic WaveActiveSum"
19097 " not supported by target architecture");
19107 switch (BuiltinID) {
19108 case Builtin::BI__builtin_hlsl_resource_getpointer: {
19113 llvm::Type *RetTy = llvm::PointerType::getUnqual(
getLLVMContext());
19115 return Builder.CreateIntrinsic(
19119 case Builtin::BI__builtin_hlsl_all: {
19121 return Builder.CreateIntrinsic(
19126 case Builtin::BI__builtin_hlsl_any: {
19128 return Builder.CreateIntrinsic(
19133 case Builtin::BI__builtin_hlsl_asdouble:
19135 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
19142 Ty = VecTy->getElementType();
19144 Intrinsic::ID Intr;
19153 return Builder.CreateIntrinsic(
19157 case Builtin::BI__builtin_hlsl_cross: {
19162 "cross operands must have a float representation");
19167 "input vectors must have 3 elements each");
19168 return Builder.CreateIntrinsic(
19172 case Builtin::BI__builtin_hlsl_dot: {
19175 llvm::Type *T0 = Op0->
getType();
19176 llvm::Type *T1 = Op1->
getType();
19179 if (!T0->isVectorTy() && !T1->isVectorTy()) {
19180 if (T0->isFloatingPointTy())
19181 return Builder.CreateFMul(Op0, Op1,
"hlsl.dot");
19183 if (T0->isIntegerTy())
19184 return Builder.CreateMul(Op0, Op1,
"hlsl.dot");
19187 "Scalar dot product is only supported on ints and floats.");
19192 assert(T0->isVectorTy() && T1->isVectorTy() &&
19193 "Dot product of vector and scalar is not supported.");
19196 [[maybe_unused]]
auto *VecTy1 =
19200 "Dot product of vectors need the same element types.");
19203 "Dot product requires vectors to be of the same size.");
19205 return Builder.CreateIntrinsic(
19206 T0->getScalarType(),
19210 case Builtin::BI__builtin_hlsl_dot4add_i8packed: {
19216 return Builder.CreateIntrinsic(
19218 "hlsl.dot4add.i8packed");
19220 case Builtin::BI__builtin_hlsl_dot4add_u8packed: {
19226 return Builder.CreateIntrinsic(
19228 "hlsl.dot4add.u8packed");
19230 case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
19233 return Builder.CreateIntrinsic(
19238 case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
19241 return Builder.CreateIntrinsic(
19244 nullptr,
"hlsl.firstbitlow");
19246 case Builtin::BI__builtin_hlsl_lerp: {
19251 llvm_unreachable(
"lerp operand must have a float representation");
19252 return Builder.CreateIntrinsic(
19256 case Builtin::BI__builtin_hlsl_normalize: {
19260 "normalize operand must have a float representation");
19262 return Builder.CreateIntrinsic(
19265 nullptr,
"hlsl.normalize");
19267 case Builtin::BI__builtin_hlsl_elementwise_degrees: {
19271 "degree operand must have a float representation");
19273 return Builder.CreateIntrinsic(
19277 case Builtin::BI__builtin_hlsl_elementwise_frac: {
19280 llvm_unreachable(
"frac operand must have a float representation");
19281 return Builder.CreateIntrinsic(
19285case Builtin::BI__builtin_hlsl_elementwise_isinf: {
19287 llvm::Type *Xty = Op0->
getType();
19288 llvm::Type *retType = llvm::Type::getInt1Ty(this->
getLLVMContext());
19289 if (Xty->isVectorTy()) {
19291 retType = llvm::VectorType::get(
19292 retType, ElementCount::getFixed(XVecTy->getNumElements()));
19295 llvm_unreachable(
"isinf operand must have a float representation");
19296 return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
19299 case Builtin::BI__builtin_hlsl_mad: {
19304 return Builder.CreateIntrinsic(
19305 M->
getType(), Intrinsic::fmuladd,
19310 return Builder.CreateIntrinsic(
19311 M->
getType(), Intrinsic::dx_imad,
19315 return Builder.CreateNSWAdd(Mul, B);
19319 return Builder.CreateIntrinsic(
19320 M->
getType(), Intrinsic::dx_umad,
19324 return Builder.CreateNUWAdd(Mul, B);
19326 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
19329 llvm_unreachable(
"rcp operand must have a float representation");
19330 llvm::Type *Ty = Op0->
getType();
19331 llvm::Type *EltTy = Ty->getScalarType();
19332 Constant *One = Ty->isVectorTy()
19333 ? ConstantVector::getSplat(
19334 ElementCount::getFixed(
19335 cast<FixedVectorType>(Ty)->getNumElements()),
19336 ConstantFP::get(EltTy, 1.0))
19337 : ConstantFP::get(EltTy, 1.0);
19338 return Builder.CreateFDiv(One, Op0,
"hlsl.rcp");
19340 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
19343 llvm_unreachable(
"rsqrt operand must have a float representation");
19344 return Builder.CreateIntrinsic(
19348 case Builtin::BI__builtin_hlsl_elementwise_saturate: {
19351 "saturate operand must have a float representation");
19352 return Builder.CreateIntrinsic(
19355 nullptr,
"hlsl.saturate");
19357 case Builtin::BI__builtin_hlsl_select: {
19371 Builder.CreateSelect(OpCond, OpTrue, OpFalse,
"hlsl.select");
19378 case Builtin::BI__builtin_hlsl_step: {
19383 "step operands must have a float representation");
19384 return Builder.CreateIntrinsic(
19388 case Builtin::BI__builtin_hlsl_wave_active_all_true: {
19390 assert(Op->
getType()->isIntegerTy(1) &&
19391 "Intrinsic WaveActiveAllTrue operand must be a bool");
19395 Intrinsic::getOrInsertDeclaration(&
CGM.
getModule(), ID), {Op});
19397 case Builtin::BI__builtin_hlsl_wave_active_any_true: {
19399 assert(Op->
getType()->isIntegerTy(1) &&
19400 "Intrinsic WaveActiveAnyTrue operand must be a bool");
19404 Intrinsic::getOrInsertDeclaration(&
CGM.
getModule(), ID), {Op});
19406 case Builtin::BI__builtin_hlsl_wave_active_count_bits: {
19410 Intrinsic::getOrInsertDeclaration(&
CGM.
getModule(), ID),
19413 case Builtin::BI__builtin_hlsl_wave_active_sum: {
19416 llvm::FunctionType *FT = llvm::FunctionType::get(
19428 ArrayRef{OpExpr},
"hlsl.wave.active.sum");
19430 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
19435 case llvm::Triple::dxil:
19438 case llvm::Triple::spirv:
19440 llvm::FunctionType::get(
IntTy, {},
false),
19441 "__hlsl_wave_get_lane_index", {},
false,
true));
19444 "Intrinsic WaveGetLaneIndex not supported by target architecture");
19447 case Builtin::BI__builtin_hlsl_wave_is_first_lane: {
19450 Intrinsic::getOrInsertDeclaration(&
CGM.
getModule(), ID));
19452 case Builtin::BI__builtin_hlsl_wave_read_lane_at: {
19457 llvm::FunctionType *FT = llvm::FunctionType::get(
19468 ArrayRef{OpExpr, OpIndex},
"hlsl.wave.readlane");
19470 case Builtin::BI__builtin_hlsl_elementwise_sign: {
19471 auto *Arg0 =
E->getArg(0);
19473 llvm::Type *Xty = Op0->
getType();
19474 llvm::Type *retType = llvm::Type::getInt32Ty(this->
getLLVMContext());
19475 if (Xty->isVectorTy()) {
19477 retType = llvm::VectorType::get(
19478 retType, ElementCount::getFixed(XVecTy->getNumElements()));
19482 "sign operand must have a float or int representation");
19485 Value *Cmp =
Builder.CreateICmpEQ(Op0, ConstantInt::get(Xty, 0));
19486 return Builder.CreateSelect(Cmp, ConstantInt::get(retType, 0),
19487 ConstantInt::get(retType, 1),
"hlsl.sign");
19490 return Builder.CreateIntrinsic(
19494 case Builtin::BI__builtin_hlsl_elementwise_radians: {
19497 "radians operand must have a float representation");
19498 return Builder.CreateIntrinsic(
19501 nullptr,
"hlsl.radians");
19503 case Builtin::BI__builtin_hlsl_buffer_update_counter: {
19507 return Builder.CreateIntrinsic(
19512 case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
19517 "asuint operands types mismatch");
19520 case Builtin::BI__builtin_hlsl_elementwise_clip:
19522 "clip operands types mismatch");
19524 case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: {
19528 Intrinsic::getOrInsertDeclaration(&
CGM.
getModule(), ID));
19536 constexpr const char *
Tag =
"amdgpu-as";
19538 LLVMContext &Ctx = Inst->getContext();
19540 for (
unsigned K = 2; K <
E->getNumArgs(); ++K) {
19543 if (llvm::getConstantStringInfo(
V, AS)) {
19544 MMRAs.push_back({
Tag, AS});
19549 "expected an address space name as a string literal");
19553 MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
19554 Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
19559 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
19560 llvm::SyncScope::ID SSID;
19561 switch (BuiltinID) {
19562 case AMDGPU::BI__builtin_amdgcn_div_scale:
19563 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
19576 llvm::Value *Tmp =
Builder.CreateCall(Callee, {
X, Y, Z});
19579 llvm::Value *Flag =
Builder.CreateExtractValue(Tmp, 1);
19583 llvm::Value *FlagExt =
Builder.CreateZExt(Flag, RealFlagType);
19587 case AMDGPU::BI__builtin_amdgcn_div_fmas:
19588 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
19596 llvm::Value *Src3ToBool =
Builder.CreateIsNotNull(Src3);
19597 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
19600 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
19601 return emitBuiltinWithOneOverloadedType<2>(*
this,
E,
19602 Intrinsic::amdgcn_ds_swizzle);
19603 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
19604 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
19605 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
19609 unsigned ICEArguments = 0;
19614 unsigned Size = DataTy->getPrimitiveSizeInBits();
19615 llvm::Type *
IntTy =
19616 llvm::IntegerType::get(
Builder.getContext(), std::max(Size, 32u));
19619 ? Intrinsic::amdgcn_mov_dpp8
19620 : Intrinsic::amdgcn_update_dpp,
19622 assert(
E->getNumArgs() == 5 ||
E->getNumArgs() == 6 ||
19623 E->getNumArgs() == 2);
19624 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
19626 Args.push_back(llvm::PoisonValue::get(
IntTy));
19627 for (
unsigned I = 0; I !=
E->getNumArgs(); ++I) {
19629 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
19631 if (!DataTy->isIntegerTy())
19633 V, llvm::IntegerType::get(
Builder.getContext(), Size));
19636 llvm::Type *ExpTy =
19637 F->getFunctionType()->getFunctionParamType(I + InsertOld);
19638 Args.push_back(
Builder.CreateTruncOrBitCast(
V, ExpTy));
19641 if (Size < 32 && !DataTy->isIntegerTy())
19643 V, llvm::IntegerType::get(
Builder.getContext(), Size));
19644 return Builder.CreateTruncOrBitCast(
V, DataTy);
19646 case AMDGPU::BI__builtin_amdgcn_permlane16:
19647 case AMDGPU::BI__builtin_amdgcn_permlanex16:
19648 return emitBuiltinWithOneOverloadedType<6>(
19650 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
19651 ? Intrinsic::amdgcn_permlane16
19652 : Intrinsic::amdgcn_permlanex16);
19653 case AMDGPU::BI__builtin_amdgcn_permlane64:
19654 return emitBuiltinWithOneOverloadedType<1>(*
this,
E,
19655 Intrinsic::amdgcn_permlane64);
19656 case AMDGPU::BI__builtin_amdgcn_readlane:
19657 return emitBuiltinWithOneOverloadedType<2>(*
this,
E,
19658 Intrinsic::amdgcn_readlane);
19659 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
19660 return emitBuiltinWithOneOverloadedType<1>(*
this,
E,
19661 Intrinsic::amdgcn_readfirstlane);
19662 case AMDGPU::BI__builtin_amdgcn_div_fixup:
19663 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
19664 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
19665 return emitBuiltinWithOneOverloadedType<3>(*
this,
E,
19666 Intrinsic::amdgcn_div_fixup);
19667 case AMDGPU::BI__builtin_amdgcn_trig_preop:
19668 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
19670 case AMDGPU::BI__builtin_amdgcn_rcp:
19671 case AMDGPU::BI__builtin_amdgcn_rcpf:
19672 case AMDGPU::BI__builtin_amdgcn_rcph:
19673 return emitBuiltinWithOneOverloadedType<1>(*
this,
E, Intrinsic::amdgcn_rcp);
19674 case AMDGPU::BI__builtin_amdgcn_sqrt:
19675 case AMDGPU::BI__builtin_amdgcn_sqrtf:
19676 case AMDGPU::BI__builtin_amdgcn_sqrth:
19677 return emitBuiltinWithOneOverloadedType<1>(*
this,
E,
19678 Intrinsic::amdgcn_sqrt);
19679 case AMDGPU::BI__builtin_amdgcn_rsq:
19680 case AMDGPU::BI__builtin_amdgcn_rsqf:
19681 case AMDGPU::BI__builtin_amdgcn_rsqh:
19682 return emitBuiltinWithOneOverloadedType<1>(*
this,
E, Intrinsic::amdgcn_rsq);
19683 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
19684 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
19685 return emitBuiltinWithOneOverloadedType<1>(*
this,
E,
19686 Intrinsic::amdgcn_rsq_clamp);
19687 case AMDGPU::BI__builtin_amdgcn_sinf:
19688 case AMDGPU::BI__builtin_amdgcn_sinh:
19689 return emitBuiltinWithOneOverloadedType<1>(*
this,
E, Intrinsic::amdgcn_sin);
19690 case AMDGPU::BI__builtin_amdgcn_cosf:
19691 case AMDGPU::BI__builtin_amdgcn_cosh:
19692 return emitBuiltinWithOneOverloadedType<1>(*
this,
E, Intrinsic::amdgcn_cos);
19693 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
19694 return EmitAMDGPUDispatchPtr(*
this,
E);
19695 case AMDGPU::BI__builtin_amdgcn_logf:
19696 return emitBuiltinWithOneOverloadedType<1>(*
this,
E, Intrinsic::amdgcn_log);
19697 case AMDGPU::BI__builtin_amdgcn_exp2f:
19698 return emitBuiltinWithOneOverloadedType<1>(*
this,
E,
19699 Intrinsic::amdgcn_exp2);
19700 case AMDGPU::BI__builtin_amdgcn_log_clampf:
19701 return emitBuiltinWithOneOverloadedType<1>(*
this,
E,
19702 Intrinsic::amdgcn_log_clamp);
19703 case AMDGPU::BI__builtin_amdgcn_ldexp:
19704 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
19707 llvm::Function *F =
19708 CGM.
getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
19709 return Builder.CreateCall(F, {Src0, Src1});
19711 case AMDGPU::BI__builtin_amdgcn_ldexph: {
19716 llvm::Function *F =
19720 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
19721 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
19722 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
19723 return emitBuiltinWithOneOverloadedType<1>(*
this,
E,
19724 Intrinsic::amdgcn_frexp_mant);
19725 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
19726 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
19730 return Builder.CreateCall(F, Src0);
19732 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
19736 return Builder.CreateCall(F, Src0);
19738 case AMDGPU::BI__builtin_amdgcn_fract:
19739 case AMDGPU::BI__builtin_amdgcn_fractf:
19740 case AMDGPU::BI__builtin_amdgcn_fracth:
19741 return emitBuiltinWithOneOverloadedType<1>(*
this,
E,
19742 Intrinsic::amdgcn_fract);
19743 case AMDGPU::BI__builtin_amdgcn_lerp:
19744 return emitBuiltinWithOneOverloadedType<3>(*
this,
E,
19745 Intrinsic::amdgcn_lerp);
19746 case AMDGPU::BI__builtin_amdgcn_ubfe:
19747 return emitBuiltinWithOneOverloadedType<3>(*
this,
E,
19748 Intrinsic::amdgcn_ubfe);
19749 case AMDGPU::BI__builtin_amdgcn_sbfe:
19750 return emitBuiltinWithOneOverloadedType<3>(*
this,
E,
19751 Intrinsic::amdgcn_sbfe);
19752 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
19753 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
19757 return Builder.CreateCall(F, { Src });
19759 case AMDGPU::BI__builtin_amdgcn_uicmp:
19760 case AMDGPU::BI__builtin_amdgcn_uicmpl:
19761 case AMDGPU::BI__builtin_amdgcn_sicmp:
19762 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
19769 {
Builder.getInt64Ty(), Src0->getType() });
19770 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19772 case AMDGPU::BI__builtin_amdgcn_fcmp:
19773 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
19780 {
Builder.getInt64Ty(), Src0->getType() });
19781 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19783 case AMDGPU::BI__builtin_amdgcn_class:
19784 case AMDGPU::BI__builtin_amdgcn_classf:
19785 case AMDGPU::BI__builtin_amdgcn_classh:
19787 case AMDGPU::BI__builtin_amdgcn_fmed3f:
19788 case AMDGPU::BI__builtin_amdgcn_fmed3h:
19789 return emitBuiltinWithOneOverloadedType<3>(*
this,
E,
19790 Intrinsic::amdgcn_fmed3);
19791 case AMDGPU::BI__builtin_amdgcn_ds_append:
19792 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
19793 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
19794 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
19799 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
19800 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
19801 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
19802 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
19803 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
19804 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
19805 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
19806 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
19807 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
19808 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
19809 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
19810 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
19811 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
19812 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
19814 switch (BuiltinID) {
19815 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
19816 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
19817 IID = Intrinsic::amdgcn_global_load_tr_b64;
19819 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
19820 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
19821 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
19822 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
19823 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
19824 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
19825 IID = Intrinsic::amdgcn_global_load_tr_b128;
19827 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
19828 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
19830 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
19831 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
19833 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
19834 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
19836 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
19837 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
19838 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
19839 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
19845 return Builder.CreateCall(F, {Addr});
19847 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
19850 return Builder.CreateCall(F);
19852 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
19858 case AMDGPU::BI__builtin_amdgcn_read_exec:
19860 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
19862 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
19864 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
19865 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
19866 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
19867 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
19877 RayOrigin =
Builder.CreateShuffleVector(RayOrigin, RayOrigin,
19881 RayInverseDir =
Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
19885 {NodePtr->getType(), RayDir->getType()});
19886 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
19887 RayInverseDir, TextureDescr});
19890 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
19892 for (
int i = 0, e =
E->getNumArgs(); i != e; ++i)
19900 Value *I0 =
Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
19902 return Builder.CreateInsertElement(I0, A, 1);
19904 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
19905 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
19906 llvm::FixedVectorType *VT = FixedVectorType::get(
Builder.getInt32Ty(), 8);
19908 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
19909 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
19910 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
19914 for (
unsigned I = 0, N =
E->getNumArgs(); I != N; ++I)
19916 return Builder.CreateCall(F, Args);
19918 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
19919 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
19920 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
19921 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
19922 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
19923 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
19924 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
19925 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
19926 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
19927 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
19928 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
19929 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
19930 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
19931 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
19932 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
19933 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
19934 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
19935 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
19936 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
19937 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
19938 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
19939 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
19940 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
19941 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
19942 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
19943 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
19944 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
19945 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
19946 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
19947 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
19948 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
19949 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
19950 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
19951 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
19952 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
19953 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
19954 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
19955 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
19956 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
19957 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
19958 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
19959 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
19960 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
19961 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
19962 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
19963 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
19964 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
19965 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
19966 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
19967 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
19968 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
19969 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
19970 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
19971 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
19972 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
19973 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
19974 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
19975 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
19976 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
19977 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
19990 bool AppendFalseForOpselArg =
false;
19991 unsigned BuiltinWMMAOp;
19993 switch (BuiltinID) {
19994 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
19995 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
19996 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
19997 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
19998 ArgsForMatchingMatrixTypes = {2, 0};
19999 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
20001 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
20002 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
20003 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
20004 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
20005 ArgsForMatchingMatrixTypes = {2, 0};
20006 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
20008 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
20009 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
20010 AppendFalseForOpselArg =
true;
20012 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
20013 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
20014 ArgsForMatchingMatrixTypes = {2, 0};
20015 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
20017 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
20018 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
20019 AppendFalseForOpselArg =
true;
20021 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
20022 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
20023 ArgsForMatchingMatrixTypes = {2, 0};
20024 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
20026 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
20027 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
20028 ArgsForMatchingMatrixTypes = {2, 0};
20029 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
20031 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
20032 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
20033 ArgsForMatchingMatrixTypes = {2, 0};
20034 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
20036 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
20037 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
20038 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
20039 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
20040 ArgsForMatchingMatrixTypes = {4, 1};
20041 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
20043 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
20044 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
20045 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
20046 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
20047 ArgsForMatchingMatrixTypes = {4, 1};
20048 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
20050 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
20051 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
20052 ArgsForMatchingMatrixTypes = {2, 0};
20053 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
20055 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
20056 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
20057 ArgsForMatchingMatrixTypes = {2, 0};
20058 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
20060 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
20061 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
20062 ArgsForMatchingMatrixTypes = {2, 0};
20063 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
20065 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
20066 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
20067 ArgsForMatchingMatrixTypes = {2, 0};
20068 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
20070 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
20071 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
20072 ArgsForMatchingMatrixTypes = {4, 1};
20073 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
20075 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
20076 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
20077 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
20078 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
20080 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
20081 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
20082 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
20083 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
20085 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
20086 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
20087 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
20088 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
20090 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
20091 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
20092 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
20093 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
20095 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
20096 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
20097 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
20098 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
20100 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
20101 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
20102 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
20103 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
20105 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
20106 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
20107 ArgsForMatchingMatrixTypes = {4, 1, 3, 5};
20108 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
20110 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
20111 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
20112 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
20113 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
20115 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
20116 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
20117 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
20118 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
20120 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
20121 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
20122 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
20123 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
20125 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
20126 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
20127 ArgsForMatchingMatrixTypes = {2, 0, 1, 3};
20128 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
20133 for (
int i = 0, e =
E->getNumArgs(); i != e; ++i)
20135 if (AppendFalseForOpselArg)
20136 Args.push_back(
Builder.getFalse());
20139 for (
auto ArgIdx : ArgsForMatchingMatrixTypes)
20140 ArgTypes.push_back(Args[ArgIdx]->getType());
20143 return Builder.CreateCall(F, Args);
20147 case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
20149 case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
20151 case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
20155 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
20156 return EmitAMDGPUWorkGroupSize(*
this, 0);
20157 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
20158 return EmitAMDGPUWorkGroupSize(*
this, 1);
20159 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
20160 return EmitAMDGPUWorkGroupSize(*
this, 2);
20163 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
20164 return EmitAMDGPUGridSize(*
this, 0);
20165 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
20166 return EmitAMDGPUGridSize(*
this, 1);
20167 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
20168 return EmitAMDGPUGridSize(*
this, 2);
20171 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
20172 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
20173 return emitBuiltinWithOneOverloadedType<1>(*
this,
E,
20174 Intrinsic::r600_recipsqrt_ieee);
20175 case AMDGPU::BI__builtin_r600_read_tidig_x:
20177 case AMDGPU::BI__builtin_r600_read_tidig_y:
20179 case AMDGPU::BI__builtin_r600_read_tidig_z:
20181 case AMDGPU::BI__builtin_amdgcn_alignbit: {
20186 return Builder.CreateCall(F, { Src0, Src1, Src2 });
20188 case AMDGPU::BI__builtin_amdgcn_fence: {
20191 FenceInst *Fence =
Builder.CreateFence(AO, SSID);
20192 if (
E->getNumArgs() > 2)
20196 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
20197 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
20198 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
20199 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
20200 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
20201 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
20202 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
20203 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
20204 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
20205 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
20206 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
20207 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
20208 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
20209 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
20210 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
20211 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
20212 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
20213 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
20214 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
20215 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
20216 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
20217 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
20218 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
20219 llvm::AtomicRMWInst::BinOp BinOp;
20220 switch (BuiltinID) {
20221 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
20222 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
20223 BinOp = llvm::AtomicRMWInst::UIncWrap;
20225 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
20226 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
20227 BinOp = llvm::AtomicRMWInst::UDecWrap;
20229 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
20230 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
20231 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
20232 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
20233 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
20234 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
20235 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
20236 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
20237 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
20238 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
20239 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
20240 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
20241 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
20242 BinOp = llvm::AtomicRMWInst::FAdd;
20244 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
20245 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
20246 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
20247 BinOp = llvm::AtomicRMWInst::FMin;
20249 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
20250 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
20251 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
20252 BinOp = llvm::AtomicRMWInst::FMax;
20258 llvm::Type *OrigTy = Val->
getType();
20263 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
20264 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
20265 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
20275 if (
E->getNumArgs() >= 4) {
20287 AO = AtomicOrdering::Monotonic;
20290 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
20291 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
20292 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
20293 llvm::Type *V2BF16Ty = FixedVectorType::get(
20294 llvm::Type::getBFloatTy(
Builder.getContext()), 2);
20295 Val =
Builder.CreateBitCast(Val, V2BF16Ty);
20299 llvm::AtomicRMWInst *RMW =
20302 RMW->setVolatile(
true);
20304 unsigned AddrSpace = Ptr.
getType()->getAddressSpace();
20305 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
20309 RMW->setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
20313 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->
getType()->isFloatTy())
20314 RMW->setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
20317 return Builder.CreateBitCast(RMW, OrigTy);
20319 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
20320 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
20326 return Builder.CreateCall(F, {Arg});
20328 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
20329 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
20337 CGM.
getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
20338 ? Intrinsic::amdgcn_permlane16_swap
20339 : Intrinsic::amdgcn_permlane32_swap);
20340 llvm::CallInst *
Call =
20341 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
20343 llvm::Value *Elt0 =
Builder.CreateExtractValue(
Call, 0);
20344 llvm::Value *Elt1 =
Builder.CreateExtractValue(
Call, 1);
20348 llvm::Value *Insert0 =
Builder.CreateInsertElement(
20349 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
20350 llvm::Value *AsVector =
20351 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
20354 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
20355 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
20356 return emitBuiltinWithOneOverloadedType<4>(*
this,
E,
20357 Intrinsic::amdgcn_bitop3);
20358 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc:
20359 return emitBuiltinWithOneOverloadedType<4>(
20360 *
this,
E, Intrinsic::amdgcn_make_buffer_rsrc);
20361 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
20362 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
20363 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
20364 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
20365 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
20366 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
20367 return emitBuiltinWithOneOverloadedType<5>(
20368 *
this,
E, Intrinsic::amdgcn_raw_ptr_buffer_store);
20369 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
20370 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
20371 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
20372 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
20373 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
20374 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
20375 llvm::Type *RetTy =
nullptr;
20376 switch (BuiltinID) {
20377 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
20380 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
20383 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
20386 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
20387 RetTy = llvm::FixedVectorType::get(
Int32Ty, 2);
20389 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
20390 RetTy = llvm::FixedVectorType::get(
Int32Ty, 3);
20392 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
20393 RetTy = llvm::FixedVectorType::get(
Int32Ty, 4);
20402 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
20403 return emitBuiltinWithOneOverloadedType<2>(
20404 *
this,
E, Intrinsic::amdgcn_s_prefetch_data);
20412 switch (BuiltinID) {
20413 case SPIRV::BI__builtin_spirv_distance: {
20418 "Distance operands must have a float representation");
20421 "Distance operands must be a vector");
20422 return Builder.CreateIntrinsic(
20423 X->getType()->getScalarType(), Intrinsic::spv_distance,
20426 case SPIRV::BI__builtin_spirv_length: {
20429 "length operand must have a float representation");
20431 "length operand must be a vector");
20432 return Builder.CreateIntrinsic(
20433 X->getType()->getScalarType(), Intrinsic::spv_length,
20444 unsigned IntrinsicID,
20446 unsigned NumArgs =
E->getNumArgs() - 1;
20448 for (
unsigned I = 0; I < NumArgs; ++I)
20460 switch (BuiltinID) {
20461 case SystemZ::BI__builtin_tbegin: {
20463 Value *Control = llvm::ConstantInt::get(
Int32Ty, 0xff0c);
20465 return Builder.CreateCall(F, {TDB, Control});
20467 case SystemZ::BI__builtin_tbegin_nofloat: {
20469 Value *Control = llvm::ConstantInt::get(
Int32Ty, 0xff0c);
20471 return Builder.CreateCall(F, {TDB, Control});
20473 case SystemZ::BI__builtin_tbeginc: {
20475 Value *Control = llvm::ConstantInt::get(
Int32Ty, 0xff08);
20477 return Builder.CreateCall(F, {TDB, Control});
20479 case SystemZ::BI__builtin_tabort: {
20484 case SystemZ::BI__builtin_non_tx_store: {
20496 case SystemZ::BI__builtin_s390_vclzb:
20497 case SystemZ::BI__builtin_s390_vclzh:
20498 case SystemZ::BI__builtin_s390_vclzf:
20499 case SystemZ::BI__builtin_s390_vclzg:
20500 case SystemZ::BI__builtin_s390_vclzq: {
20503 Value *Undef = ConstantInt::get(
Builder.getInt1Ty(),
false);
20505 return Builder.CreateCall(F, {
X, Undef});
20508 case SystemZ::BI__builtin_s390_vctzb:
20509 case SystemZ::BI__builtin_s390_vctzh:
20510 case SystemZ::BI__builtin_s390_vctzf:
20511 case SystemZ::BI__builtin_s390_vctzg:
20512 case SystemZ::BI__builtin_s390_vctzq: {
20515 Value *Undef = ConstantInt::get(
Builder.getInt1Ty(),
false);
20517 return Builder.CreateCall(F, {
X, Undef});
20520 case SystemZ::BI__builtin_s390_verllb:
20521 case SystemZ::BI__builtin_s390_verllh:
20522 case SystemZ::BI__builtin_s390_verllf:
20523 case SystemZ::BI__builtin_s390_verllg: {
20528 unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
20529 Amt =
Builder.CreateIntCast(Amt, ResultType->getScalarType(),
false);
20530 Amt =
Builder.CreateVectorSplat(NumElts, Amt);
20532 return Builder.CreateCall(F, { Src, Src, Amt });
20535 case SystemZ::BI__builtin_s390_verllvb:
20536 case SystemZ::BI__builtin_s390_verllvh:
20537 case SystemZ::BI__builtin_s390_verllvf:
20538 case SystemZ::BI__builtin_s390_verllvg: {
20543 return Builder.CreateCall(F, { Src, Src, Amt });
20546 case SystemZ::BI__builtin_s390_vfsqsb:
20547 case SystemZ::BI__builtin_s390_vfsqdb: {
20550 if (
Builder.getIsFPConstrained()) {
20552 return Builder.CreateConstrainedFPCall(F, {
X });
20558 case SystemZ::BI__builtin_s390_vfmasb:
20559 case SystemZ::BI__builtin_s390_vfmadb: {
20564 if (
Builder.getIsFPConstrained()) {
20566 return Builder.CreateConstrainedFPCall(F, {
X, Y, Z});
20569 return Builder.CreateCall(F, {
X, Y, Z});
20572 case SystemZ::BI__builtin_s390_vfmssb:
20573 case SystemZ::BI__builtin_s390_vfmsdb: {
20578 if (
Builder.getIsFPConstrained()) {
20580 return Builder.CreateConstrainedFPCall(F, {
X, Y,
Builder.CreateFNeg(Z,
"neg")});
20586 case SystemZ::BI__builtin_s390_vfnmasb:
20587 case SystemZ::BI__builtin_s390_vfnmadb: {
20592 if (
Builder.getIsFPConstrained()) {
20594 return Builder.CreateFNeg(
Builder.CreateConstrainedFPCall(F, {X, Y, Z}),
"neg");
20597 return Builder.CreateFNeg(
Builder.CreateCall(F, {X, Y, Z}),
"neg");
20600 case SystemZ::BI__builtin_s390_vfnmssb:
20601 case SystemZ::BI__builtin_s390_vfnmsdb: {
20606 if (
Builder.getIsFPConstrained()) {
20609 return Builder.CreateFNeg(
Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
20616 case SystemZ::BI__builtin_s390_vflpsb:
20617 case SystemZ::BI__builtin_s390_vflpdb: {
20623 case SystemZ::BI__builtin_s390_vflnsb:
20624 case SystemZ::BI__builtin_s390_vflndb: {
20630 case SystemZ::BI__builtin_s390_vfisb:
20631 case SystemZ::BI__builtin_s390_vfidb: {
20639 Intrinsic::ID
ID = Intrinsic::not_intrinsic;
20641 switch (M4.getZExtValue()) {
20644 switch (M5.getZExtValue()) {
20646 case 0:
ID = Intrinsic::rint;
20647 CI = Intrinsic::experimental_constrained_rint;
break;
20651 switch (M5.getZExtValue()) {
20653 case 0:
ID = Intrinsic::nearbyint;
20654 CI = Intrinsic::experimental_constrained_nearbyint;
break;
20655 case 1:
ID = Intrinsic::round;
20656 CI = Intrinsic::experimental_constrained_round;
break;
20657 case 5:
ID = Intrinsic::trunc;
20658 CI = Intrinsic::experimental_constrained_trunc;
break;
20659 case 6:
ID = Intrinsic::ceil;
20660 CI = Intrinsic::experimental_constrained_ceil;
break;
20661 case 7:
ID = Intrinsic::floor;
20662 CI = Intrinsic::experimental_constrained_floor;
break;
20666 if (ID != Intrinsic::not_intrinsic) {
20667 if (
Builder.getIsFPConstrained()) {
20669 return Builder.CreateConstrainedFPCall(F,
X);
20675 switch (BuiltinID) {
20676 case SystemZ::BI__builtin_s390_vfisb:
ID = Intrinsic::s390_vfisb;
break;
20677 case SystemZ::BI__builtin_s390_vfidb:
ID = Intrinsic::s390_vfidb;
break;
20678 default: llvm_unreachable(
"Unknown BuiltinID");
20683 return Builder.CreateCall(F, {
X, M4Value, M5Value});
20685 case SystemZ::BI__builtin_s390_vfmaxsb:
20686 case SystemZ::BI__builtin_s390_vfmaxdb: {
20694 Intrinsic::ID
ID = Intrinsic::not_intrinsic;
20696 switch (M4.getZExtValue()) {
20698 case 4:
ID = Intrinsic::maxnum;
20699 CI = Intrinsic::experimental_constrained_maxnum;
break;
20701 if (ID != Intrinsic::not_intrinsic) {
20702 if (
Builder.getIsFPConstrained()) {
20704 return Builder.CreateConstrainedFPCall(F, {
X, Y});
20707 return Builder.CreateCall(F, {
X, Y});
20710 switch (BuiltinID) {
20711 case SystemZ::BI__builtin_s390_vfmaxsb:
ID = Intrinsic::s390_vfmaxsb;
break;
20712 case SystemZ::BI__builtin_s390_vfmaxdb:
ID = Intrinsic::s390_vfmaxdb;
break;
20713 default: llvm_unreachable(
"Unknown BuiltinID");
20717 return Builder.CreateCall(F, {
X, Y, M4Value});
20719 case SystemZ::BI__builtin_s390_vfminsb:
20720 case SystemZ::BI__builtin_s390_vfmindb: {
20728 Intrinsic::ID
ID = Intrinsic::not_intrinsic;
20730 switch (M4.getZExtValue()) {
20732 case 4:
ID = Intrinsic::minnum;
20733 CI = Intrinsic::experimental_constrained_minnum;
break;
20735 if (ID != Intrinsic::not_intrinsic) {
20736 if (
Builder.getIsFPConstrained()) {
20738 return Builder.CreateConstrainedFPCall(F, {
X, Y});
20741 return Builder.CreateCall(F, {
X, Y});
20744 switch (BuiltinID) {
20745 case SystemZ::BI__builtin_s390_vfminsb:
ID = Intrinsic::s390_vfminsb;
break;
20746 case SystemZ::BI__builtin_s390_vfmindb:
ID = Intrinsic::s390_vfmindb;
break;
20747 default: llvm_unreachable(
"Unknown BuiltinID");
20751 return Builder.CreateCall(F, {
X, Y, M4Value});
20754 case SystemZ::BI__builtin_s390_vlbrh:
20755 case SystemZ::BI__builtin_s390_vlbrf:
20756 case SystemZ::BI__builtin_s390_vlbrg:
20757 case SystemZ::BI__builtin_s390_vlbrq: {
20766#define INTRINSIC_WITH_CC(NAME) \
20767 case SystemZ::BI__builtin_##NAME: \
20768 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
20850#undef INTRINSIC_WITH_CC
20859struct NVPTXMmaLdstInfo {
20860 unsigned NumResults;
20866#define MMA_INTR(geom_op_type, layout) \
20867 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
20868#define MMA_LDST(n, geom_op_type) \
20869 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
20871static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(
unsigned BuiltinID) {
20872 switch (BuiltinID) {
20874 case NVPTX::BI__hmma_m16n16k16_ld_a:
20875 return MMA_LDST(8, m16n16k16_load_a_f16);
20876 case NVPTX::BI__hmma_m16n16k16_ld_b:
20877 return MMA_LDST(8, m16n16k16_load_b_f16);
20878 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
20879 return MMA_LDST(4, m16n16k16_load_c_f16);
20880 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
20881 return MMA_LDST(8, m16n16k16_load_c_f32);
20882 case NVPTX::BI__hmma_m32n8k16_ld_a:
20883 return MMA_LDST(8, m32n8k16_load_a_f16);
20884 case NVPTX::BI__hmma_m32n8k16_ld_b:
20885 return MMA_LDST(8, m32n8k16_load_b_f16);
20886 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
20887 return MMA_LDST(4, m32n8k16_load_c_f16);
20888 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
20889 return MMA_LDST(8, m32n8k16_load_c_f32);
20890 case NVPTX::BI__hmma_m8n32k16_ld_a:
20891 return MMA_LDST(8, m8n32k16_load_a_f16);
20892 case NVPTX::BI__hmma_m8n32k16_ld_b:
20893 return MMA_LDST(8, m8n32k16_load_b_f16);
20894 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
20895 return MMA_LDST(4, m8n32k16_load_c_f16);
20896 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
20897 return MMA_LDST(8, m8n32k16_load_c_f32);
20900 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
20901 return MMA_LDST(2, m16n16k16_load_a_s8);
20902 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
20903 return MMA_LDST(2, m16n16k16_load_a_u8);
20904 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
20905 return MMA_LDST(2, m16n16k16_load_b_s8);
20906 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
20907 return MMA_LDST(2, m16n16k16_load_b_u8);
20908 case NVPTX::BI__imma_m16n16k16_ld_c:
20909 return MMA_LDST(8, m16n16k16_load_c_s32);
20910 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
20911 return MMA_LDST(4, m32n8k16_load_a_s8);
20912 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
20913 return MMA_LDST(4, m32n8k16_load_a_u8);
20914 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
20915 return MMA_LDST(1, m32n8k16_load_b_s8);
20916 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
20917 return MMA_LDST(1, m32n8k16_load_b_u8);
20918 case NVPTX::BI__imma_m32n8k16_ld_c:
20919 return MMA_LDST(8, m32n8k16_load_c_s32);
20920 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
20921 return MMA_LDST(1, m8n32k16_load_a_s8);
20922 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
20923 return MMA_LDST(1, m8n32k16_load_a_u8);
20924 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
20925 return MMA_LDST(4, m8n32k16_load_b_s8);
20926 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
20927 return MMA_LDST(4, m8n32k16_load_b_u8);
20928 case NVPTX::BI__imma_m8n32k16_ld_c:
20929 return MMA_LDST(8, m8n32k16_load_c_s32);
20933 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
20934 return {1, 0,
MMA_INTR(m8n8k32_load_a_s4, row)};
20935 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
20936 return {1, 0,
MMA_INTR(m8n8k32_load_a_u4, row)};
20937 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
20938 return {1,
MMA_INTR(m8n8k32_load_b_s4, col), 0};
20939 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
20940 return {1,
MMA_INTR(m8n8k32_load_b_u4, col), 0};
20941 case NVPTX::BI__imma_m8n8k32_ld_c:
20942 return MMA_LDST(2, m8n8k32_load_c_s32);
20943 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
20944 return {1, 0,
MMA_INTR(m8n8k128_load_a_b1, row)};
20945 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
20946 return {1,
MMA_INTR(m8n8k128_load_b_b1, col), 0};
20947 case NVPTX::BI__bmma_m8n8k128_ld_c:
20948 return MMA_LDST(2, m8n8k128_load_c_s32);
20951 case NVPTX::BI__dmma_m8n8k4_ld_a:
20952 return MMA_LDST(1, m8n8k4_load_a_f64);
20953 case NVPTX::BI__dmma_m8n8k4_ld_b:
20954 return MMA_LDST(1, m8n8k4_load_b_f64);
20955 case NVPTX::BI__dmma_m8n8k4_ld_c:
20956 return MMA_LDST(2, m8n8k4_load_c_f64);
20959 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
20960 return MMA_LDST(4, m16n16k16_load_a_bf16);
20961 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
20962 return MMA_LDST(4, m16n16k16_load_b_bf16);
20963 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
20964 return MMA_LDST(2, m8n32k16_load_a_bf16);
20965 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
20966 return MMA_LDST(8, m8n32k16_load_b_bf16);
20967 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
20968 return MMA_LDST(8, m32n8k16_load_a_bf16);
20969 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
20970 return MMA_LDST(2, m32n8k16_load_b_bf16);
20971 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
20972 return MMA_LDST(4, m16n16k8_load_a_tf32);
20973 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
20974 return MMA_LDST(4, m16n16k8_load_b_tf32);
20975 case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
20976 return MMA_LDST(8, m16n16k8_load_c_f32);
20982 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
20983 return MMA_LDST(4, m16n16k16_store_d_f16);
20984 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
20985 return MMA_LDST(8, m16n16k16_store_d_f32);
20986 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
20987 return MMA_LDST(4, m32n8k16_store_d_f16);
20988 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
20989 return MMA_LDST(8, m32n8k16_store_d_f32);
20990 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
20991 return MMA_LDST(4, m8n32k16_store_d_f16);
20992 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
20993 return MMA_LDST(8, m8n32k16_store_d_f32);
20998 case NVPTX::BI__imma_m16n16k16_st_c_i32:
20999 return MMA_LDST(8, m16n16k16_store_d_s32);
21000 case NVPTX::BI__imma_m32n8k16_st_c_i32:
21001 return MMA_LDST(8, m32n8k16_store_d_s32);
21002 case NVPTX::BI__imma_m8n32k16_st_c_i32:
21003 return MMA_LDST(8, m8n32k16_store_d_s32);
21004 case NVPTX::BI__imma_m8n8k32_st_c_i32:
21005 return MMA_LDST(2, m8n8k32_store_d_s32);
21006 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
21007 return MMA_LDST(2, m8n8k128_store_d_s32);
21010 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
21011 return MMA_LDST(2, m8n8k4_store_d_f64);
21014 case NVPTX::BI__mma_m16n16k8_st_c_f32:
21015 return MMA_LDST(8, m16n16k8_store_d_f32);
21018 llvm_unreachable(
"Unknown MMA builtin");
21025struct NVPTXMmaInfo {
21034 std::array<unsigned, 8> Variants;
21036 unsigned getMMAIntrinsic(
int Layout,
bool Satf) {
21037 unsigned Index = Layout + 4 * Satf;
21038 if (Index >= Variants.size())
21040 return Variants[Index];
21046static NVPTXMmaInfo getNVPTXMmaInfo(
unsigned BuiltinID) {
21048#define MMA_VARIANTS(geom, type) \
21049 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
21050 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21051 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
21052 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
21053#define MMA_SATF_VARIANTS(geom, type) \
21054 MMA_VARIANTS(geom, type), \
21055 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
21056 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21057 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
21058 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
21060#define MMA_VARIANTS_I4(geom, type) \
21062 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21066 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21070#define MMA_VARIANTS_B1_XOR(geom, type) \
21072 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
21079#define MMA_VARIANTS_B1_AND(geom, type) \
21081 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
21089 switch (BuiltinID) {
21093 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
21095 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
21097 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
21099 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
21101 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
21103 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
21105 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
21107 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
21109 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
21111 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
21113 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
21115 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
21119 case NVPTX::BI__imma_m16n16k16_mma_s8:
21121 case NVPTX::BI__imma_m16n16k16_mma_u8:
21123 case NVPTX::BI__imma_m32n8k16_mma_s8:
21125 case NVPTX::BI__imma_m32n8k16_mma_u8:
21127 case NVPTX::BI__imma_m8n32k16_mma_s8:
21129 case NVPTX::BI__imma_m8n32k16_mma_u8:
21133 case NVPTX::BI__imma_m8n8k32_mma_s4:
21135 case NVPTX::BI__imma_m8n8k32_mma_u4:
21137 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
21139 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
21143 case NVPTX::BI__dmma_m8n8k4_mma_f64:
21147 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
21148 return {4, 4, 8, 8, {{
MMA_VARIANTS(m16n16k16, bf16)}}};
21149 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
21150 return {2, 8, 8, 8, {{
MMA_VARIANTS(m8n32k16, bf16)}}};
21151 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
21152 return {8, 2, 8, 8, {{
MMA_VARIANTS(m32n8k16, bf16)}}};
21153 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
21154 return {4, 4, 8, 8, {{
MMA_VARIANTS(m16n16k8, tf32)}}};
21156 llvm_unreachable(
"Unexpected builtin ID.");
21159#undef MMA_SATF_VARIANTS
21160#undef MMA_VARIANTS_I4
21161#undef MMA_VARIANTS_B1_AND
21162#undef MMA_VARIANTS_B1_XOR
21171 return CGF.
Builder.CreateCall(
21173 {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
21185 MDNode *MD = MDNode::get(CGF.
Builder.getContext(), {});
21186 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
21194 llvm::Type *ElemTy =
21196 return CGF.
Builder.CreateCall(
21198 {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
21201static Value *MakeCpAsync(
unsigned IntrinsicID,
unsigned IntrinsicIDS,
21204 return E->getNumArgs() == 3
21206 {CGF.EmitScalarExpr(E->getArg(0)),
21207 CGF.EmitScalarExpr(E->getArg(1)),
21208 CGF.EmitScalarExpr(E->getArg(2))})
21210 {CGF.EmitScalarExpr(E->getArg(0)),
21211 CGF.EmitScalarExpr(E->getArg(1))});
21214static Value *MakeHalfType(
unsigned IntrinsicID,
unsigned BuiltinID,
21217 if (!(
C.getLangOpts().NativeHalfType ||
21218 !
C.getTargetInfo().useFP16ConversionIntrinsics())) {
21220 " requires native half type support.");
21224 if (BuiltinID == NVPTX::BI__nvvm_ldg_h || BuiltinID == NVPTX::BI__nvvm_ldg_h2)
21225 return MakeLdg(CGF,
E);
21227 if (IntrinsicID == Intrinsic::nvvm_ldu_global_f)
21228 return MakeLdu(IntrinsicID, CGF,
E);
21232 auto *FTy = F->getFunctionType();
21233 unsigned ICEArguments = 0;
21235 C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
21237 for (
unsigned i = 0, e =
E->getNumArgs(); i != e; ++i) {
21238 assert((ICEArguments & (1 << i)) == 0);
21240 auto *PTy = FTy->getParamType(i);
21241 if (PTy != ArgValue->
getType())
21242 ArgValue = CGF.
Builder.CreateBitCast(ArgValue, PTy);
21243 Args.push_back(ArgValue);
21246 return CGF.
Builder.CreateCall(F, Args);
21252 switch (BuiltinID) {
21253 case NVPTX::BI__nvvm_atom_add_gen_i:
21254 case NVPTX::BI__nvvm_atom_add_gen_l:
21255 case NVPTX::BI__nvvm_atom_add_gen_ll:
21258 case NVPTX::BI__nvvm_atom_sub_gen_i:
21259 case NVPTX::BI__nvvm_atom_sub_gen_l:
21260 case NVPTX::BI__nvvm_atom_sub_gen_ll:
21263 case NVPTX::BI__nvvm_atom_and_gen_i:
21264 case NVPTX::BI__nvvm_atom_and_gen_l:
21265 case NVPTX::BI__nvvm_atom_and_gen_ll:
21268 case NVPTX::BI__nvvm_atom_or_gen_i:
21269 case NVPTX::BI__nvvm_atom_or_gen_l:
21270 case NVPTX::BI__nvvm_atom_or_gen_ll:
21273 case NVPTX::BI__nvvm_atom_xor_gen_i:
21274 case NVPTX::BI__nvvm_atom_xor_gen_l:
21275 case NVPTX::BI__nvvm_atom_xor_gen_ll:
21278 case NVPTX::BI__nvvm_atom_xchg_gen_i:
21279 case NVPTX::BI__nvvm_atom_xchg_gen_l:
21280 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
21283 case NVPTX::BI__nvvm_atom_max_gen_i:
21284 case NVPTX::BI__nvvm_atom_max_gen_l:
21285 case NVPTX::BI__nvvm_atom_max_gen_ll:
21288 case NVPTX::BI__nvvm_atom_max_gen_ui:
21289 case NVPTX::BI__nvvm_atom_max_gen_ul:
21290 case NVPTX::BI__nvvm_atom_max_gen_ull:
21293 case NVPTX::BI__nvvm_atom_min_gen_i:
21294 case NVPTX::BI__nvvm_atom_min_gen_l:
21295 case NVPTX::BI__nvvm_atom_min_gen_ll:
21298 case NVPTX::BI__nvvm_atom_min_gen_ui:
21299 case NVPTX::BI__nvvm_atom_min_gen_ul:
21300 case NVPTX::BI__nvvm_atom_min_gen_ull:
21303 case NVPTX::BI__nvvm_atom_cas_gen_us:
21304 case NVPTX::BI__nvvm_atom_cas_gen_i:
21305 case NVPTX::BI__nvvm_atom_cas_gen_l:
21306 case NVPTX::BI__nvvm_atom_cas_gen_ll:
21311 case NVPTX::BI__nvvm_atom_add_gen_f:
21312 case NVPTX::BI__nvvm_atom_add_gen_d: {
21317 AtomicOrdering::SequentiallyConsistent);
21320 case NVPTX::BI__nvvm_atom_inc_gen_ui: {
21325 return Builder.CreateCall(FnALI32, {Ptr, Val});
21328 case NVPTX::BI__nvvm_atom_dec_gen_ui: {
21333 return Builder.CreateCall(FnALD32, {Ptr, Val});
21336 case NVPTX::BI__nvvm_ldg_c:
21337 case NVPTX::BI__nvvm_ldg_sc:
21338 case NVPTX::BI__nvvm_ldg_c2:
21339 case NVPTX::BI__nvvm_ldg_sc2:
21340 case NVPTX::BI__nvvm_ldg_c4:
21341 case NVPTX::BI__nvvm_ldg_sc4:
21342 case NVPTX::BI__nvvm_ldg_s:
21343 case NVPTX::BI__nvvm_ldg_s2:
21344 case NVPTX::BI__nvvm_ldg_s4:
21345 case NVPTX::BI__nvvm_ldg_i:
21346 case NVPTX::BI__nvvm_ldg_i2:
21347 case NVPTX::BI__nvvm_ldg_i4:
21348 case NVPTX::BI__nvvm_ldg_l:
21349 case NVPTX::BI__nvvm_ldg_l2:
21350 case NVPTX::BI__nvvm_ldg_ll:
21351 case NVPTX::BI__nvvm_ldg_ll2:
21352 case NVPTX::BI__nvvm_ldg_uc:
21353 case NVPTX::BI__nvvm_ldg_uc2:
21354 case NVPTX::BI__nvvm_ldg_uc4:
21355 case NVPTX::BI__nvvm_ldg_us:
21356 case NVPTX::BI__nvvm_ldg_us2:
21357 case NVPTX::BI__nvvm_ldg_us4:
21358 case NVPTX::BI__nvvm_ldg_ui:
21359 case NVPTX::BI__nvvm_ldg_ui2:
21360 case NVPTX::BI__nvvm_ldg_ui4:
21361 case NVPTX::BI__nvvm_ldg_ul:
21362 case NVPTX::BI__nvvm_ldg_ul2:
21363 case NVPTX::BI__nvvm_ldg_ull:
21364 case NVPTX::BI__nvvm_ldg_ull2:
21365 case NVPTX::BI__nvvm_ldg_f:
21366 case NVPTX::BI__nvvm_ldg_f2:
21367 case NVPTX::BI__nvvm_ldg_f4:
21368 case NVPTX::BI__nvvm_ldg_d:
21369 case NVPTX::BI__nvvm_ldg_d2:
21373 return MakeLdg(*
this,
E);
21375 case NVPTX::BI__nvvm_ldu_c:
21376 case NVPTX::BI__nvvm_ldu_sc:
21377 case NVPTX::BI__nvvm_ldu_c2:
21378 case NVPTX::BI__nvvm_ldu_sc2:
21379 case NVPTX::BI__nvvm_ldu_c4:
21380 case NVPTX::BI__nvvm_ldu_sc4:
21381 case NVPTX::BI__nvvm_ldu_s:
21382 case NVPTX::BI__nvvm_ldu_s2:
21383 case NVPTX::BI__nvvm_ldu_s4:
21384 case NVPTX::BI__nvvm_ldu_i:
21385 case NVPTX::BI__nvvm_ldu_i2:
21386 case NVPTX::BI__nvvm_ldu_i4:
21387 case NVPTX::BI__nvvm_ldu_l:
21388 case NVPTX::BI__nvvm_ldu_l2:
21389 case NVPTX::BI__nvvm_ldu_ll:
21390 case NVPTX::BI__nvvm_ldu_ll2:
21391 case NVPTX::BI__nvvm_ldu_uc:
21392 case NVPTX::BI__nvvm_ldu_uc2:
21393 case NVPTX::BI__nvvm_ldu_uc4:
21394 case NVPTX::BI__nvvm_ldu_us:
21395 case NVPTX::BI__nvvm_ldu_us2:
21396 case NVPTX::BI__nvvm_ldu_us4:
21397 case NVPTX::BI__nvvm_ldu_ui:
21398 case NVPTX::BI__nvvm_ldu_ui2:
21399 case NVPTX::BI__nvvm_ldu_ui4:
21400 case NVPTX::BI__nvvm_ldu_ul:
21401 case NVPTX::BI__nvvm_ldu_ul2:
21402 case NVPTX::BI__nvvm_ldu_ull:
21403 case NVPTX::BI__nvvm_ldu_ull2:
21404 return MakeLdu(Intrinsic::nvvm_ldu_global_i, *
this,
E);
21405 case NVPTX::BI__nvvm_ldu_f:
21406 case NVPTX::BI__nvvm_ldu_f2:
21407 case NVPTX::BI__nvvm_ldu_f4:
21408 case NVPTX::BI__nvvm_ldu_d:
21409 case NVPTX::BI__nvvm_ldu_d2:
21410 return MakeLdu(Intrinsic::nvvm_ldu_global_f, *
this,
E);
21412 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
21413 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
21414 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
21415 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *
this,
E);
21416 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
21417 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
21418 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
21419 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *
this,
E);
21420 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
21421 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
21422 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *
this,
E);
21423 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
21424 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
21425 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *
this,
E);
21426 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
21427 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
21428 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
21429 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *
this,
E);
21430 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
21431 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
21432 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
21433 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *
this,
E);
21434 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
21435 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
21436 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
21437 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
21438 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
21439 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
21440 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *
this,
E);
21441 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
21442 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
21443 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
21444 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
21445 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
21446 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
21447 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *
this,
E);
21448 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
21449 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
21450 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
21451 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
21452 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
21453 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
21454 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *
this,
E);
21455 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
21456 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
21457 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
21458 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
21459 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
21460 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
21461 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *
this,
E);
21462 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
21463 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *
this,
E);
21464 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
21465 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *
this,
E);
21466 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
21467 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *
this,
E);
21468 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
21469 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *
this,
E);
21470 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
21471 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
21472 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
21473 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *
this,
E);
21474 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
21475 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
21476 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
21477 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *
this,
E);
21478 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
21479 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
21480 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
21481 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *
this,
E);
21482 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
21483 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
21484 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
21485 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *
this,
E);
21486 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
21487 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
21488 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
21489 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *
this,
E);
21490 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
21491 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
21492 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
21493 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *
this,
E);
21494 case NVPTX::BI__nvvm_atom_cta_cas_gen_us:
21495 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
21496 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
21497 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
21499 llvm::Type *ElemTy =
21503 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
21504 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
21506 case NVPTX::BI__nvvm_atom_sys_cas_gen_us:
21507 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
21508 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
21509 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
21511 llvm::Type *ElemTy =
21515 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
21516 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
21518 case NVPTX::BI__nvvm_match_all_sync_i32p:
21519 case NVPTX::BI__nvvm_match_all_sync_i64p: {
21525 ? Intrinsic::nvvm_match_all_sync_i32p
21526 : Intrinsic::nvvm_match_all_sync_i64p),
21531 return Builder.CreateExtractValue(ResultPair, 0);
21535 case NVPTX::BI__hmma_m16n16k16_ld_a:
21536 case NVPTX::BI__hmma_m16n16k16_ld_b:
21537 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
21538 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
21539 case NVPTX::BI__hmma_m32n8k16_ld_a:
21540 case NVPTX::BI__hmma_m32n8k16_ld_b:
21541 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
21542 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
21543 case NVPTX::BI__hmma_m8n32k16_ld_a:
21544 case NVPTX::BI__hmma_m8n32k16_ld_b:
21545 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
21546 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
21548 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
21549 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
21550 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
21551 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
21552 case NVPTX::BI__imma_m16n16k16_ld_c:
21553 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
21554 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
21555 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
21556 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
21557 case NVPTX::BI__imma_m32n8k16_ld_c:
21558 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
21559 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
21560 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
21561 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
21562 case NVPTX::BI__imma_m8n32k16_ld_c:
21564 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
21565 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
21566 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
21567 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
21568 case NVPTX::BI__imma_m8n8k32_ld_c:
21569 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
21570 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
21571 case NVPTX::BI__bmma_m8n8k128_ld_c:
21573 case NVPTX::BI__dmma_m8n8k4_ld_a:
21574 case NVPTX::BI__dmma_m8n8k4_ld_b:
21575 case NVPTX::BI__dmma_m8n8k4_ld_c:
21577 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
21578 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
21579 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
21580 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
21581 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
21582 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
21583 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
21584 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
21585 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
21589 std::optional<llvm::APSInt> isColMajorArg =
21591 if (!isColMajorArg)
21593 bool isColMajor = isColMajorArg->getSExtValue();
21594 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
21595 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
21603 assert(II.NumResults);
21604 if (II.NumResults == 1) {
21608 for (
unsigned i = 0; i < II.NumResults; ++i) {
21613 llvm::ConstantInt::get(
IntTy, i)),
21620 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
21621 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
21622 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
21623 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
21624 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
21625 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
21626 case NVPTX::BI__imma_m16n16k16_st_c_i32:
21627 case NVPTX::BI__imma_m32n8k16_st_c_i32:
21628 case NVPTX::BI__imma_m8n32k16_st_c_i32:
21629 case NVPTX::BI__imma_m8n8k32_st_c_i32:
21630 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
21631 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
21632 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
21636 std::optional<llvm::APSInt> isColMajorArg =
21638 if (!isColMajorArg)
21640 bool isColMajor = isColMajorArg->getSExtValue();
21641 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
21642 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
21647 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
21649 for (
unsigned i = 0; i < II.NumResults; ++i) {
21653 llvm::ConstantInt::get(
IntTy, i)),
21655 Values.push_back(
Builder.CreateBitCast(
V, ParamType));
21657 Values.push_back(Ldm);
21664 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
21665 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
21666 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
21667 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
21668 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
21669 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
21670 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
21671 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
21672 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
21673 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
21674 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
21675 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
21676 case NVPTX::BI__imma_m16n16k16_mma_s8:
21677 case NVPTX::BI__imma_m16n16k16_mma_u8:
21678 case NVPTX::BI__imma_m32n8k16_mma_s8:
21679 case NVPTX::BI__imma_m32n8k16_mma_u8:
21680 case NVPTX::BI__imma_m8n32k16_mma_s8:
21681 case NVPTX::BI__imma_m8n32k16_mma_u8:
21682 case NVPTX::BI__imma_m8n8k32_mma_s4:
21683 case NVPTX::BI__imma_m8n8k32_mma_u4:
21684 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
21685 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
21686 case NVPTX::BI__dmma_m8n8k4_mma_f64:
21687 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
21688 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
21689 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
21690 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
21695 std::optional<llvm::APSInt> LayoutArg =
21699 int Layout = LayoutArg->getSExtValue();
21700 if (Layout < 0 || Layout > 3)
21702 llvm::APSInt SatfArg;
21703 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
21704 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
21706 else if (std::optional<llvm::APSInt> OptSatfArg =
21708 SatfArg = *OptSatfArg;
21711 bool Satf = SatfArg.getSExtValue();
21712 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
21713 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
21719 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
21721 for (
unsigned i = 0; i < MI.NumEltsA; ++i) {
21725 llvm::ConstantInt::get(
IntTy, i)),
21727 Values.push_back(
Builder.CreateBitCast(
V, AType));
21730 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
21731 for (
unsigned i = 0; i < MI.NumEltsB; ++i) {
21735 llvm::ConstantInt::get(
IntTy, i)),
21737 Values.push_back(
Builder.CreateBitCast(
V, BType));
21740 llvm::Type *CType =
21741 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
21742 for (
unsigned i = 0; i < MI.NumEltsC; ++i) {
21746 llvm::ConstantInt::get(
IntTy, i)),
21748 Values.push_back(
Builder.CreateBitCast(
V, CType));
21752 for (
unsigned i = 0; i < MI.NumEltsD; ++i)
21756 llvm::ConstantInt::get(
IntTy, i)),
21761 case NVPTX::BI__nvvm_ex2_approx_f16:
21762 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID,
E, *
this);
21763 case NVPTX::BI__nvvm_ex2_approx_f16x2:
21764 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID,
E, *
this);
21765 case NVPTX::BI__nvvm_ff2f16x2_rn:
21766 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID,
E, *
this);
21767 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
21768 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID,
E, *
this);
21769 case NVPTX::BI__nvvm_ff2f16x2_rz:
21770 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID,
E, *
this);
21771 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
21772 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID,
E, *
this);
21773 case NVPTX::BI__nvvm_fma_rn_f16:
21774 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID,
E, *
this);
21775 case NVPTX::BI__nvvm_fma_rn_f16x2:
21776 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID,
E, *
this);
21777 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
21778 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID,
E, *
this);
21779 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
21780 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID,
E, *
this);
21781 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
21782 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID,
E,
21784 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
21785 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID,
E,
21787 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
21788 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID,
E,
21790 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
21791 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID,
E,
21793 case NVPTX::BI__nvvm_fma_rn_relu_f16:
21794 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID,
E, *
this);
21795 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
21796 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID,
E, *
this);
21797 case NVPTX::BI__nvvm_fma_rn_sat_f16:
21798 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID,
E, *
this);
21799 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
21800 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID,
E, *
this);
21801 case NVPTX::BI__nvvm_fmax_f16:
21802 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID,
E, *
this);
21803 case NVPTX::BI__nvvm_fmax_f16x2:
21804 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID,
E, *
this);
21805 case NVPTX::BI__nvvm_fmax_ftz_f16:
21806 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID,
E, *
this);
21807 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
21808 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID,
E, *
this);
21809 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
21810 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID,
E, *
this);
21811 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
21812 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID,
E,
21814 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
21815 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
21817 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
21818 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
21819 BuiltinID,
E, *
this);
21820 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
21821 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID,
E,
21823 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
21824 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
21826 case NVPTX::BI__nvvm_fmax_nan_f16:
21827 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID,
E, *
this);
21828 case NVPTX::BI__nvvm_fmax_nan_f16x2:
21829 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID,
E, *
this);
21830 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
21831 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID,
E,
21833 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
21834 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
21836 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
21837 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID,
E,
21839 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
21840 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID,
E,
21842 case NVPTX::BI__nvvm_fmin_f16:
21843 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID,
E, *
this);
21844 case NVPTX::BI__nvvm_fmin_f16x2:
21845 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID,
E, *
this);
21846 case NVPTX::BI__nvvm_fmin_ftz_f16:
21847 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID,
E, *
this);
21848 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
21849 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID,
E, *
this);
21850 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
21851 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID,
E, *
this);
21852 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
21853 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID,
E,
21855 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
21856 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
21858 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
21859 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
21860 BuiltinID,
E, *
this);
21861 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
21862 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID,
E,
21864 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
21865 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
21867 case NVPTX::BI__nvvm_fmin_nan_f16:
21868 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID,
E, *
this);
21869 case NVPTX::BI__nvvm_fmin_nan_f16x2:
21870 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID,
E, *
this);
21871 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
21872 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID,
E,
21874 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
21875 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
21877 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
21878 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID,
E,
21880 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
21881 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID,
E,
21883 case NVPTX::BI__nvvm_ldg_h:
21884 case NVPTX::BI__nvvm_ldg_h2:
21885 return MakeHalfType(Intrinsic::not_intrinsic, BuiltinID,
E, *
this);
21886 case NVPTX::BI__nvvm_ldu_h:
21887 case NVPTX::BI__nvvm_ldu_h2:
21888 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID,
E, *
this);
21889 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
21890 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
21891 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *
this,
E,
21893 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
21894 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
21895 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *
this,
E,
21897 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
21898 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
21899 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *
this,
E,
21901 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
21902 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
21903 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *
this,
E,
21905 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
21908 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
21911 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
21914 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
21917 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
21920 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
21923 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
21926 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
21929 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
21932 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
21935 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
21938 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
21941 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
21944 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
21947 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
21950 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
21953 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
21956 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
21959 case NVPTX::BI__nvvm_is_explicit_cluster:
21962 case NVPTX::BI__nvvm_isspacep_shared_cluster:
21966 case NVPTX::BI__nvvm_mapa:
21969 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21970 case NVPTX::BI__nvvm_mapa_shared_cluster:
21973 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21974 case NVPTX::BI__nvvm_getctarank:
21978 case NVPTX::BI__nvvm_getctarank_shared_cluster:
21982 case NVPTX::BI__nvvm_barrier_cluster_arrive:
21985 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
21988 case NVPTX::BI__nvvm_barrier_cluster_wait:
21991 case NVPTX::BI__nvvm_fence_sc_cluster:
22000struct BuiltinAlignArgs {
22001 llvm::Value *Src =
nullptr;
22002 llvm::Type *SrcType =
nullptr;
22003 llvm::Value *Alignment =
nullptr;
22004 llvm::Value *Mask =
nullptr;
22005 llvm::IntegerType *IntType =
nullptr;
22013 SrcType = Src->getType();
22014 if (SrcType->isPointerTy()) {
22015 IntType = IntegerType::get(
22019 assert(SrcType->isIntegerTy());
22020 IntType = cast<llvm::IntegerType>(SrcType);
22023 Alignment = CGF.
Builder.CreateZExtOrTrunc(Alignment, IntType,
"alignment");
22024 auto *One = llvm::ConstantInt::get(IntType, 1);
22025 Mask = CGF.
Builder.CreateSub(Alignment, One,
"mask");
22032 BuiltinAlignArgs Args(
E, *
this);
22033 llvm::Value *SrcAddress = Args.Src;
22034 if (Args.SrcType->isPointerTy())
22036 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType,
"src_addr");
22038 Builder.CreateAnd(SrcAddress, Args.Mask,
"set_bits"),
22039 llvm::Constant::getNullValue(Args.IntType),
"is_aligned"));
22046 BuiltinAlignArgs Args(
E, *
this);
22047 llvm::Value *SrcForMask = Args.Src;
22053 if (Args.Src->getType()->isPointerTy()) {
22063 SrcForMask =
Builder.CreateAdd(SrcForMask, Args.Mask,
"over_boundary");
22067 llvm::Value *InvertedMask =
Builder.CreateNot(Args.Mask,
"inverted_mask");
22068 llvm::Value *
Result =
nullptr;
22069 if (Args.Src->getType()->isPointerTy()) {
22071 Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
22072 {SrcForMask, InvertedMask},
nullptr,
"aligned_result");
22074 Result =
Builder.CreateAnd(SrcForMask, InvertedMask,
"aligned_result");
22076 assert(
Result->getType() == Args.SrcType);
22082 switch (BuiltinID) {
22083 case WebAssembly::BI__builtin_wasm_memory_size: {
22088 return Builder.CreateCall(Callee, I);
22090 case WebAssembly::BI__builtin_wasm_memory_grow: {
22096 return Builder.CreateCall(Callee, Args);
22098 case WebAssembly::BI__builtin_wasm_tls_size: {
22101 return Builder.CreateCall(Callee);
22103 case WebAssembly::BI__builtin_wasm_tls_align: {
22106 return Builder.CreateCall(Callee);
22108 case WebAssembly::BI__builtin_wasm_tls_base: {
22110 return Builder.CreateCall(Callee);
22112 case WebAssembly::BI__builtin_wasm_throw: {
22116 return Builder.CreateCall(Callee, {
Tag, Obj});
22118 case WebAssembly::BI__builtin_wasm_rethrow: {
22120 return Builder.CreateCall(Callee);
22122 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
22129 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
22136 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
22140 return Builder.CreateCall(Callee, {Addr, Count});
22142 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
22143 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
22144 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
22145 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
22150 return Builder.CreateCall(Callee, {Src});
22152 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
22153 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
22154 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
22155 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
22160 return Builder.CreateCall(Callee, {Src});
22162 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
22163 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
22164 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
22165 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
22166 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i16x8_f16x8:
22167 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
22172 return Builder.CreateCall(Callee, {Src});
22174 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
22175 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
22176 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
22177 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
22178 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i16x8_f16x8:
22179 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
22184 return Builder.CreateCall(Callee, {Src});
22186 case WebAssembly::BI__builtin_wasm_min_f32:
22187 case WebAssembly::BI__builtin_wasm_min_f64:
22188 case WebAssembly::BI__builtin_wasm_min_f16x8:
22189 case WebAssembly::BI__builtin_wasm_min_f32x4:
22190 case WebAssembly::BI__builtin_wasm_min_f64x2: {
22195 return Builder.CreateCall(Callee, {LHS, RHS});
22197 case WebAssembly::BI__builtin_wasm_max_f32:
22198 case WebAssembly::BI__builtin_wasm_max_f64:
22199 case WebAssembly::BI__builtin_wasm_max_f16x8:
22200 case WebAssembly::BI__builtin_wasm_max_f32x4:
22201 case WebAssembly::BI__builtin_wasm_max_f64x2: {
22206 return Builder.CreateCall(Callee, {LHS, RHS});
22208 case WebAssembly::BI__builtin_wasm_pmin_f16x8:
22209 case WebAssembly::BI__builtin_wasm_pmin_f32x4:
22210 case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
22215 return Builder.CreateCall(Callee, {LHS, RHS});
22217 case WebAssembly::BI__builtin_wasm_pmax_f16x8:
22218 case WebAssembly::BI__builtin_wasm_pmax_f32x4:
22219 case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
22224 return Builder.CreateCall(Callee, {LHS, RHS});
22226 case WebAssembly::BI__builtin_wasm_ceil_f16x8:
22227 case WebAssembly::BI__builtin_wasm_floor_f16x8:
22228 case WebAssembly::BI__builtin_wasm_trunc_f16x8:
22229 case WebAssembly::BI__builtin_wasm_nearest_f16x8:
22230 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22231 case WebAssembly::BI__builtin_wasm_floor_f32x4:
22232 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22233 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22234 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22235 case WebAssembly::BI__builtin_wasm_floor_f64x2:
22236 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22237 case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
22239 switch (BuiltinID) {
22240 case WebAssembly::BI__builtin_wasm_ceil_f16x8:
22241 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22242 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22243 IntNo = Intrinsic::ceil;
22245 case WebAssembly::BI__builtin_wasm_floor_f16x8:
22246 case WebAssembly::BI__builtin_wasm_floor_f32x4:
22247 case WebAssembly::BI__builtin_wasm_floor_f64x2:
22248 IntNo = Intrinsic::floor;
22250 case WebAssembly::BI__builtin_wasm_trunc_f16x8:
22251 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22252 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22253 IntNo = Intrinsic::trunc;
22255 case WebAssembly::BI__builtin_wasm_nearest_f16x8:
22256 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22257 case WebAssembly::BI__builtin_wasm_nearest_f64x2:
22258 IntNo = Intrinsic::nearbyint;
22261 llvm_unreachable(
"unexpected builtin ID");
22267 case WebAssembly::BI__builtin_wasm_ref_null_extern: {
22269 return Builder.CreateCall(Callee);
22271 case WebAssembly::BI__builtin_wasm_ref_null_func: {
22273 return Builder.CreateCall(Callee);
22275 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
22279 return Builder.CreateCall(Callee, {Src, Indices});
22281 case WebAssembly::BI__builtin_wasm_abs_i8x16:
22282 case WebAssembly::BI__builtin_wasm_abs_i16x8:
22283 case WebAssembly::BI__builtin_wasm_abs_i32x4:
22284 case WebAssembly::BI__builtin_wasm_abs_i64x2: {
22287 Constant *
Zero = llvm::Constant::getNullValue(Vec->
getType());
22288 Value *ICmp =
Builder.CreateICmpSLT(Vec, Zero,
"abscond");
22289 return Builder.CreateSelect(ICmp, Neg, Vec,
"abs");
22291 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
22292 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
22297 return Builder.CreateCall(Callee, {LHS, RHS});
22299 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
22303 return Builder.CreateCall(Callee, {LHS, RHS});
22305 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
22306 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
22307 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
22308 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
22311 switch (BuiltinID) {
22312 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
22313 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
22314 IntNo = Intrinsic::wasm_extadd_pairwise_signed;
22316 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
22317 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
22318 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
22321 llvm_unreachable(
"unexpected builtin ID");
22325 return Builder.CreateCall(Callee, Vec);
22327 case WebAssembly::BI__builtin_wasm_bitselect: {
22333 return Builder.CreateCall(Callee, {V1, V2,
C});
22335 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
22339 return Builder.CreateCall(Callee, {LHS, RHS});
22341 case WebAssembly::BI__builtin_wasm_any_true_v128:
22342 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
22343 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
22344 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
22345 case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
22347 switch (BuiltinID) {
22348 case WebAssembly::BI__builtin_wasm_any_true_v128:
22349 IntNo = Intrinsic::wasm_anytrue;
22351 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
22352 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
22353 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
22354 case WebAssembly::BI__builtin_wasm_all_true_i64x2:
22355 IntNo = Intrinsic::wasm_alltrue;
22358 llvm_unreachable(
"unexpected builtin ID");
22362 return Builder.CreateCall(Callee, {Vec});
22364 case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
22365 case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
22366 case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
22367 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
22371 return Builder.CreateCall(Callee, {Vec});
22373 case WebAssembly::BI__builtin_wasm_abs_f16x8:
22374 case WebAssembly::BI__builtin_wasm_abs_f32x4:
22375 case WebAssembly::BI__builtin_wasm_abs_f64x2: {
22378 return Builder.CreateCall(Callee, {Vec});
22380 case WebAssembly::BI__builtin_wasm_sqrt_f16x8:
22381 case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
22382 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
22385 return Builder.CreateCall(Callee, {Vec});
22387 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
22388 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
22389 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
22390 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
22394 switch (BuiltinID) {
22395 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
22396 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
22397 IntNo = Intrinsic::wasm_narrow_signed;
22399 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
22400 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
22401 IntNo = Intrinsic::wasm_narrow_unsigned;
22404 llvm_unreachable(
"unexpected builtin ID");
22408 return Builder.CreateCall(Callee, {Low, High});
22410 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
22411 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
22414 switch (BuiltinID) {
22415 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
22416 IntNo = Intrinsic::fptosi_sat;
22418 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
22419 IntNo = Intrinsic::fptoui_sat;
22422 llvm_unreachable(
"unexpected builtin ID");
22424 llvm::Type *SrcT = Vec->
getType();
22425 llvm::Type *TruncT = SrcT->getWithNewType(
Builder.getInt32Ty());
22428 Value *Splat = Constant::getNullValue(TruncT);
22431 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
22436 while (OpIdx < 18) {
22437 std::optional<llvm::APSInt> LaneConst =
22439 assert(LaneConst &&
"Constant arg isn't actually constant?");
22440 Ops[OpIdx++] = llvm::ConstantInt::get(
getLLVMContext(), *LaneConst);
22443 return Builder.CreateCall(Callee, Ops);
22445 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
22446 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
22447 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
22448 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
22449 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
22450 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
22455 switch (BuiltinID) {
22456 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
22457 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
22458 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
22459 IntNo = Intrinsic::wasm_relaxed_madd;
22461 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
22462 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
22463 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
22464 IntNo = Intrinsic::wasm_relaxed_nmadd;
22467 llvm_unreachable(
"unexpected builtin ID");
22470 return Builder.CreateCall(Callee, {A, B,
C});
22472 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
22473 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
22474 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
22475 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
22481 return Builder.CreateCall(Callee, {A, B,
C});
22483 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
22487 return Builder.CreateCall(Callee, {Src, Indices});
22489 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
22490 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
22491 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
22492 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
22496 switch (BuiltinID) {
22497 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
22498 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
22499 IntNo = Intrinsic::wasm_relaxed_min;
22501 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
22502 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
22503 IntNo = Intrinsic::wasm_relaxed_max;
22506 llvm_unreachable(
"unexpected builtin ID");
22509 return Builder.CreateCall(Callee, {LHS, RHS});
22511 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
22512 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
22513 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
22514 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
22517 switch (BuiltinID) {
22518 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
22519 IntNo = Intrinsic::wasm_relaxed_trunc_signed;
22521 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
22522 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
22524 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
22525 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
22527 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
22528 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
22531 llvm_unreachable(
"unexpected builtin ID");
22534 return Builder.CreateCall(Callee, {Vec});
22536 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
22540 return Builder.CreateCall(Callee, {LHS, RHS});
22542 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
22547 return Builder.CreateCall(Callee, {LHS, RHS});
22549 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
22554 CGM.
getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
22555 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
22557 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
22563 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
22565 case WebAssembly::BI__builtin_wasm_loadf16_f32: {
22568 return Builder.CreateCall(Callee, {Addr});
22570 case WebAssembly::BI__builtin_wasm_storef16_f32: {
22574 return Builder.CreateCall(Callee, {Val, Addr});
22576 case WebAssembly::BI__builtin_wasm_splat_f16x8: {
22579 return Builder.CreateCall(Callee, {Val});
22581 case WebAssembly::BI__builtin_wasm_extract_lane_f16x8: {
22587 case WebAssembly::BI__builtin_wasm_replace_lane_f16x8: {
22594 case WebAssembly::BI__builtin_wasm_table_get: {
22605 "Unexpected reference type for __builtin_wasm_table_get");
22606 return Builder.CreateCall(Callee, {Table, Index});
22608 case WebAssembly::BI__builtin_wasm_table_set: {
22620 "Unexpected reference type for __builtin_wasm_table_set");
22621 return Builder.CreateCall(Callee, {Table, Index, Val});
22623 case WebAssembly::BI__builtin_wasm_table_size: {
22629 case WebAssembly::BI__builtin_wasm_table_grow: {
22642 "Unexpected reference type for __builtin_wasm_table_grow");
22644 return Builder.CreateCall(Callee, {Table, Val, NElems});
22646 case WebAssembly::BI__builtin_wasm_table_fill: {
22660 "Unexpected reference type for __builtin_wasm_table_fill");
22662 return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
22664 case WebAssembly::BI__builtin_wasm_table_copy: {
22674 return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
22681static std::pair<Intrinsic::ID, unsigned>
22684 unsigned BuiltinID;
22685 Intrinsic::ID IntrinsicID;
22688 static Info Infos[] = {
22689#define CUSTOM_BUILTIN_MAPPING(x,s) \
22690 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
22722#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
22723#undef CUSTOM_BUILTIN_MAPPING
22726 auto CmpInfo = [] (Info A, Info B) {
return A.BuiltinID < B.BuiltinID; };
22727 static const bool SortOnce = (llvm::sort(Infos, CmpInfo),
true);
22730 const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
22731 if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
22732 return {Intrinsic::not_intrinsic, 0};
22734 return {F->IntrinsicID, F->VecLen};
22743 auto MakeCircOp = [
this,
E](
unsigned IntID,
bool IsLoad) {
22757 for (
unsigned i = 1, e =
E->getNumArgs(); i != e; ++i)
22763 llvm::Value *NewBase = IsLoad ?
Builder.CreateExtractValue(
Result, 1)
22767 llvm::Value *RetVal =
22777 auto MakeBrevLd = [
this,
E](
unsigned IntID, llvm::Type *DestTy) {
22788 llvm::Value *DestAddress = DestAddr.