541 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
549 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
553 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
554 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
555 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
556 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
557 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
558 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
559 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
560 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
561 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
574 NEONMAP1(vcage_v, arm_neon_vacge, 0),
575 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
576 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
577 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
578 NEONMAP1(vcale_v, arm_neon_vacge, 0),
579 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
580 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
581 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
598 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
601 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
603 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
604 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
605 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
606 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
607 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
608 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
609 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
610 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
611 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
618 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
619 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
620 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
621 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
622 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
623 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
624 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
625 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
626 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
627 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
628 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
629 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
630 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
631 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
632 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
633 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
634 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
635 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
636 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
637 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
638 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
639 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
640 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
641 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
642 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
643 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
644 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
645 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
646 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
647 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
648 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
649 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
650 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
651 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
652 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
653 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
654 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
655 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
656 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
657 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
658 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
659 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
660 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
661 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
662 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
663 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
664 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
665 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
666 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
670 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
671 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
672 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
673 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
674 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
675 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
676 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
677 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
678 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
685 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
686 NEONMAP1(vdot_u32, arm_neon_udot, 0),
687 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
688 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
699 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
700 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
701 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
703 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
704 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
705 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
706 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
707 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
708 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
710 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
711 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
712 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
713 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
714 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
716 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
717 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
718 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
719 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
720 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
722 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
723 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
724 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
733 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
734 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
752 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
753 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
777 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
778 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
782 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
783 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
806 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
807 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
811 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
812 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
813 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
814 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
815 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
816 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
826 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
827 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
828 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
829 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
830 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
831 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
832 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
833 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
835 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
836 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
837 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
839 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
840 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
841 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
843 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
844 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
850 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
851 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
852 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
1178 unsigned BuiltinID,
unsigned LLVMIntrinsic,
unsigned AltLLVMIntrinsic,
1179 const char *NameHint,
unsigned Modifier,
const CallExpr *E,
1181 llvm::Triple::ArchType
Arch) {
1187 std::optional<llvm::APSInt> NeonTypeConst =
1194 const bool Usgn =
Type.isUnsigned();
1195 const bool Quad =
Type.isQuad();
1196 const bool Floating =
Type.isFloatingPoint();
1198 const bool AllowBFloatArgsAndRet =
1201 llvm::FixedVectorType *VTy =
1202 GetNeonType(
this,
Type, HasFastHalfType,
false, AllowBFloatArgsAndRet);
1203 llvm::Type *Ty = VTy;
1207 auto getAlignmentValue32 = [&](
Address addr) ->
Value* {
1208 return Builder.getInt32(addr.getAlignment().getQuantity());
1211 unsigned Int = LLVMIntrinsic;
1213 Int = AltLLVMIntrinsic;
1215 switch (BuiltinID) {
1217 case NEON::BI__builtin_neon_splat_lane_v:
1218 case NEON::BI__builtin_neon_splat_laneq_v:
1219 case NEON::BI__builtin_neon_splatq_lane_v:
1220 case NEON::BI__builtin_neon_splatq_laneq_v: {
1221 auto NumElements = VTy->getElementCount();
1222 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1223 NumElements = NumElements * 2;
1224 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1225 NumElements = NumElements.divideCoefficientBy(2);
1227 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
1230 case NEON::BI__builtin_neon_vpadd_v:
1231 case NEON::BI__builtin_neon_vpaddq_v:
1233 if (VTy->getElementType()->isFloatingPointTy() &&
1234 Int == Intrinsic::aarch64_neon_addp)
1235 Int = Intrinsic::aarch64_neon_faddp;
1237 case NEON::BI__builtin_neon_vabs_v:
1238 case NEON::BI__builtin_neon_vabsq_v:
1239 if (VTy->getElementType()->isFloatingPointTy())
1240 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops,
"vabs");
1241 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops,
"vabs");
1242 case NEON::BI__builtin_neon_vadd_v:
1243 case NEON::BI__builtin_neon_vaddq_v: {
1244 llvm::Type *VTy = llvm::FixedVectorType::get(
Int8Ty, Quad ? 16 : 8);
1245 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
1246 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
1247 Ops[0] =
Builder.CreateXor(Ops[0], Ops[1]);
1248 return Builder.CreateBitCast(Ops[0], Ty);
1250 case NEON::BI__builtin_neon_vaddhn_v: {
1251 llvm::FixedVectorType *SrcTy =
1252 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1255 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
1256 Ops[1] =
Builder.CreateBitCast(Ops[1], SrcTy);
1257 Ops[0] =
Builder.CreateAdd(Ops[0], Ops[1],
"vaddhn");
1260 Constant *ShiftAmt =
1261 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1262 Ops[0] =
Builder.CreateLShr(Ops[0], ShiftAmt,
"vaddhn");
1265 return Builder.CreateTrunc(Ops[0], VTy,
"vaddhn");
1267 case NEON::BI__builtin_neon_vcale_v:
1268 case NEON::BI__builtin_neon_vcaleq_v:
1269 case NEON::BI__builtin_neon_vcalt_v:
1270 case NEON::BI__builtin_neon_vcaltq_v:
1271 std::swap(Ops[0], Ops[1]);
1273 case NEON::BI__builtin_neon_vcage_v:
1274 case NEON::BI__builtin_neon_vcageq_v:
1275 case NEON::BI__builtin_neon_vcagt_v:
1276 case NEON::BI__builtin_neon_vcagtq_v: {
1278 switch (VTy->getScalarSizeInBits()) {
1279 default: llvm_unreachable(
"unexpected type");
1290 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1291 llvm::Type *Tys[] = { VTy, VecFlt };
1292 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1295 case NEON::BI__builtin_neon_vceqz_v:
1296 case NEON::BI__builtin_neon_vceqzq_v:
1298 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ,
"vceqz");
1299 case NEON::BI__builtin_neon_vcgez_v:
1300 case NEON::BI__builtin_neon_vcgezq_v:
1302 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1304 case NEON::BI__builtin_neon_vclez_v:
1305 case NEON::BI__builtin_neon_vclezq_v:
1307 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1309 case NEON::BI__builtin_neon_vcgtz_v:
1310 case NEON::BI__builtin_neon_vcgtzq_v:
1312 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1314 case NEON::BI__builtin_neon_vcltz_v:
1315 case NEON::BI__builtin_neon_vcltzq_v:
1317 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1319 case NEON::BI__builtin_neon_vclz_v:
1320 case NEON::BI__builtin_neon_vclzq_v:
1325 case NEON::BI__builtin_neon_vcvt_f32_v:
1326 case NEON::BI__builtin_neon_vcvtq_f32_v:
1327 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1330 return Usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
1331 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
1332 case NEON::BI__builtin_neon_vcvt_f16_s16:
1333 case NEON::BI__builtin_neon_vcvt_f16_u16:
1334 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1335 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1336 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1339 return Usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
1340 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
1341 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1342 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1343 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1344 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1349 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1350 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1351 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1352 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1354 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1358 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1359 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1360 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1361 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1362 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1363 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1364 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1365 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1366 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1367 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1368 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1369 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1371 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1374 case NEON::BI__builtin_neon_vcvt_s32_v:
1375 case NEON::BI__builtin_neon_vcvt_u32_v:
1376 case NEON::BI__builtin_neon_vcvt_s64_v:
1377 case NEON::BI__builtin_neon_vcvt_u64_v:
1378 case NEON::BI__builtin_neon_vcvt_s16_f16:
1379 case NEON::BI__builtin_neon_vcvt_u16_f16:
1380 case NEON::BI__builtin_neon_vcvtq_s32_v:
1381 case NEON::BI__builtin_neon_vcvtq_u32_v:
1382 case NEON::BI__builtin_neon_vcvtq_s64_v:
1383 case NEON::BI__builtin_neon_vcvtq_u64_v:
1384 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1385 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1387 return Usgn ?
Builder.CreateFPToUI(Ops[0], Ty,
"vcvt")
1388 :
Builder.CreateFPToSI(Ops[0], Ty,
"vcvt");
1390 case NEON::BI__builtin_neon_vcvta_s16_f16:
1391 case NEON::BI__builtin_neon_vcvta_s32_v:
1392 case NEON::BI__builtin_neon_vcvta_s64_v:
1393 case NEON::BI__builtin_neon_vcvta_u16_f16:
1394 case NEON::BI__builtin_neon_vcvta_u32_v:
1395 case NEON::BI__builtin_neon_vcvta_u64_v:
1396 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
1397 case NEON::BI__builtin_neon_vcvtaq_s32_v:
1398 case NEON::BI__builtin_neon_vcvtaq_s64_v:
1399 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
1400 case NEON::BI__builtin_neon_vcvtaq_u32_v:
1401 case NEON::BI__builtin_neon_vcvtaq_u64_v:
1402 case NEON::BI__builtin_neon_vcvtn_s16_f16:
1403 case NEON::BI__builtin_neon_vcvtn_s32_v:
1404 case NEON::BI__builtin_neon_vcvtn_s64_v:
1405 case NEON::BI__builtin_neon_vcvtn_u16_f16:
1406 case NEON::BI__builtin_neon_vcvtn_u32_v:
1407 case NEON::BI__builtin_neon_vcvtn_u64_v:
1408 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
1409 case NEON::BI__builtin_neon_vcvtnq_s32_v:
1410 case NEON::BI__builtin_neon_vcvtnq_s64_v:
1411 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
1412 case NEON::BI__builtin_neon_vcvtnq_u32_v:
1413 case NEON::BI__builtin_neon_vcvtnq_u64_v:
1414 case NEON::BI__builtin_neon_vcvtp_s16_f16:
1415 case NEON::BI__builtin_neon_vcvtp_s32_v:
1416 case NEON::BI__builtin_neon_vcvtp_s64_v:
1417 case NEON::BI__builtin_neon_vcvtp_u16_f16:
1418 case NEON::BI__builtin_neon_vcvtp_u32_v:
1419 case NEON::BI__builtin_neon_vcvtp_u64_v:
1420 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
1421 case NEON::BI__builtin_neon_vcvtpq_s32_v:
1422 case NEON::BI__builtin_neon_vcvtpq_s64_v:
1423 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
1424 case NEON::BI__builtin_neon_vcvtpq_u32_v:
1425 case NEON::BI__builtin_neon_vcvtpq_u64_v:
1426 case NEON::BI__builtin_neon_vcvtm_s16_f16:
1427 case NEON::BI__builtin_neon_vcvtm_s32_v:
1428 case NEON::BI__builtin_neon_vcvtm_s64_v:
1429 case NEON::BI__builtin_neon_vcvtm_u16_f16:
1430 case NEON::BI__builtin_neon_vcvtm_u32_v:
1431 case NEON::BI__builtin_neon_vcvtm_u64_v:
1432 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
1433 case NEON::BI__builtin_neon_vcvtmq_s32_v:
1434 case NEON::BI__builtin_neon_vcvtmq_s64_v:
1435 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
1436 case NEON::BI__builtin_neon_vcvtmq_u32_v:
1437 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
1439 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
1441 case NEON::BI__builtin_neon_vcvtx_f32_v: {
1442 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
1443 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
1446 case NEON::BI__builtin_neon_vext_v:
1447 case NEON::BI__builtin_neon_vextq_v: {
1450 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1451 Indices.push_back(i+CV);
1453 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1454 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
1455 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices,
"vext");
1457 case NEON::BI__builtin_neon_vfma_v:
1458 case NEON::BI__builtin_neon_vfmaq_v: {
1459 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1460 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
1461 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
1465 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
1466 {Ops[1], Ops[2], Ops[0]});
1468 case NEON::BI__builtin_neon_vld1_v:
1469 case NEON::BI__builtin_neon_vld1q_v: {
1471 Ops.push_back(getAlignmentValue32(PtrOp0));
1472 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops,
"vld1");
1474 case NEON::BI__builtin_neon_vld1_x2_v:
1475 case NEON::BI__builtin_neon_vld1q_x2_v:
1476 case NEON::BI__builtin_neon_vld1_x3_v:
1477 case NEON::BI__builtin_neon_vld1q_x3_v:
1478 case NEON::BI__builtin_neon_vld1_x4_v:
1479 case NEON::BI__builtin_neon_vld1q_x4_v: {
1481 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1482 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld1xN");
1483 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1485 case NEON::BI__builtin_neon_vld2_v:
1486 case NEON::BI__builtin_neon_vld2q_v:
1487 case NEON::BI__builtin_neon_vld3_v:
1488 case NEON::BI__builtin_neon_vld3q_v:
1489 case NEON::BI__builtin_neon_vld4_v:
1490 case NEON::BI__builtin_neon_vld4q_v:
1491 case NEON::BI__builtin_neon_vld2_dup_v:
1492 case NEON::BI__builtin_neon_vld2q_dup_v:
1493 case NEON::BI__builtin_neon_vld3_dup_v:
1494 case NEON::BI__builtin_neon_vld3q_dup_v:
1495 case NEON::BI__builtin_neon_vld4_dup_v:
1496 case NEON::BI__builtin_neon_vld4q_dup_v: {
1498 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1499 Value *Align = getAlignmentValue32(PtrOp1);
1500 Ops[1] =
Builder.CreateCall(F, {Ops[1], Align}, NameHint);
1501 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1503 case NEON::BI__builtin_neon_vld1_dup_v:
1504 case NEON::BI__builtin_neon_vld1q_dup_v: {
1505 Value *
V = PoisonValue::get(Ty);
1507 LoadInst *Ld =
Builder.CreateLoad(PtrOp0);
1508 llvm::Constant *CI = ConstantInt::get(
SizeTy, 0);
1509 Ops[0] =
Builder.CreateInsertElement(
V, Ld, CI);
1512 case NEON::BI__builtin_neon_vld2_lane_v:
1513 case NEON::BI__builtin_neon_vld2q_lane_v:
1514 case NEON::BI__builtin_neon_vld3_lane_v:
1515 case NEON::BI__builtin_neon_vld3q_lane_v:
1516 case NEON::BI__builtin_neon_vld4_lane_v:
1517 case NEON::BI__builtin_neon_vld4q_lane_v: {
1519 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1520 for (
unsigned I = 2; I < Ops.size() - 1; ++I)
1521 Ops[I] =
Builder.CreateBitCast(Ops[I], Ty);
1522 Ops.push_back(getAlignmentValue32(PtrOp1));
1524 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1526 case NEON::BI__builtin_neon_vmovl_v: {
1527 llvm::FixedVectorType *DTy =
1528 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
1529 Ops[0] =
Builder.CreateBitCast(Ops[0], DTy);
1531 return Builder.CreateZExt(Ops[0], Ty,
"vmovl");
1532 return Builder.CreateSExt(Ops[0], Ty,
"vmovl");
1534 case NEON::BI__builtin_neon_vmovn_v: {
1535 llvm::FixedVectorType *QTy =
1536 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1537 Ops[0] =
Builder.CreateBitCast(Ops[0], QTy);
1538 return Builder.CreateTrunc(Ops[0], Ty,
"vmovn");
1540 case NEON::BI__builtin_neon_vmull_v:
1546 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
1547 Int =
Type.isPoly() ? (
unsigned)Intrinsic::arm_neon_vmullp : Int;
1549 case NEON::BI__builtin_neon_vpadal_v:
1550 case NEON::BI__builtin_neon_vpadalq_v: {
1552 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1556 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
1557 llvm::Type *Tys[2] = { Ty, NarrowTy };
1560 case NEON::BI__builtin_neon_vpaddl_v:
1561 case NEON::BI__builtin_neon_vpaddlq_v: {
1563 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1564 llvm::Type *EltTy = llvm::IntegerType::get(
getLLVMContext(), EltBits / 2);
1566 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
1567 llvm::Type *Tys[2] = { Ty, NarrowTy };
1570 case NEON::BI__builtin_neon_vqdmlal_v:
1571 case NEON::BI__builtin_neon_vqdmlsl_v: {
1576 return EmitNeonCall(
CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
1578 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
1579 case NEON::BI__builtin_neon_vqdmulh_lane_v:
1580 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
1581 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
1583 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
1584 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
1585 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
1586 RTy->getNumElements() * 2);
1587 llvm::Type *Tys[2] = {
1592 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
1593 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
1594 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
1595 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
1596 llvm::Type *Tys[2] = {
1601 case NEON::BI__builtin_neon_vqshl_n_v:
1602 case NEON::BI__builtin_neon_vqshlq_n_v:
1605 case NEON::BI__builtin_neon_vqshlu_n_v:
1606 case NEON::BI__builtin_neon_vqshluq_n_v:
1609 case NEON::BI__builtin_neon_vrecpe_v:
1610 case NEON::BI__builtin_neon_vrecpeq_v:
1611 case NEON::BI__builtin_neon_vrsqrte_v:
1612 case NEON::BI__builtin_neon_vrsqrteq_v:
1613 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
1615 case NEON::BI__builtin_neon_vrndi_v:
1616 case NEON::BI__builtin_neon_vrndiq_v:
1617 Int =
Builder.getIsFPConstrained()
1618 ? Intrinsic::experimental_constrained_nearbyint
1619 : Intrinsic::nearbyint;
1621 case NEON::BI__builtin_neon_vrshr_n_v:
1622 case NEON::BI__builtin_neon_vrshrq_n_v:
1625 case NEON::BI__builtin_neon_vsha512hq_u64:
1626 case NEON::BI__builtin_neon_vsha512h2q_u64:
1627 case NEON::BI__builtin_neon_vsha512su0q_u64:
1628 case NEON::BI__builtin_neon_vsha512su1q_u64: {
1632 case NEON::BI__builtin_neon_vshl_n_v:
1633 case NEON::BI__builtin_neon_vshlq_n_v:
1635 return Builder.CreateShl(
Builder.CreateBitCast(Ops[0],Ty), Ops[1],
1637 case NEON::BI__builtin_neon_vshll_n_v: {
1638 llvm::FixedVectorType *SrcTy =
1639 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
1640 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
1642 Ops[0] =
Builder.CreateZExt(Ops[0], VTy);
1644 Ops[0] =
Builder.CreateSExt(Ops[0], VTy);
1646 return Builder.CreateShl(Ops[0], Ops[1],
"vshll_n");
1648 case NEON::BI__builtin_neon_vshrn_n_v: {
1649 llvm::FixedVectorType *SrcTy =
1650 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1651 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
1654 Ops[0] =
Builder.CreateLShr(Ops[0], Ops[1]);
1656 Ops[0] =
Builder.CreateAShr(Ops[0], Ops[1]);
1657 return Builder.CreateTrunc(Ops[0], Ty,
"vshrn_n");
1659 case NEON::BI__builtin_neon_vshr_n_v:
1660 case NEON::BI__builtin_neon_vshrq_n_v:
1662 case NEON::BI__builtin_neon_vst1_v:
1663 case NEON::BI__builtin_neon_vst1q_v:
1664 case NEON::BI__builtin_neon_vst2_v:
1665 case NEON::BI__builtin_neon_vst2q_v:
1666 case NEON::BI__builtin_neon_vst3_v:
1667 case NEON::BI__builtin_neon_vst3q_v:
1668 case NEON::BI__builtin_neon_vst4_v:
1669 case NEON::BI__builtin_neon_vst4q_v:
1670 case NEON::BI__builtin_neon_vst2_lane_v:
1671 case NEON::BI__builtin_neon_vst2q_lane_v:
1672 case NEON::BI__builtin_neon_vst3_lane_v:
1673 case NEON::BI__builtin_neon_vst3q_lane_v:
1674 case NEON::BI__builtin_neon_vst4_lane_v:
1675 case NEON::BI__builtin_neon_vst4q_lane_v: {
1677 Ops.push_back(getAlignmentValue32(PtrOp0));
1680 case NEON::BI__builtin_neon_vsm3partw1q_u32:
1681 case NEON::BI__builtin_neon_vsm3partw2q_u32:
1682 case NEON::BI__builtin_neon_vsm3ss1q_u32:
1683 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
1684 case NEON::BI__builtin_neon_vsm4eq_u32: {
1688 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
1689 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
1690 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
1691 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
1696 case NEON::BI__builtin_neon_vst1_x2_v:
1697 case NEON::BI__builtin_neon_vst1q_x2_v:
1698 case NEON::BI__builtin_neon_vst1_x3_v:
1699 case NEON::BI__builtin_neon_vst1q_x3_v:
1700 case NEON::BI__builtin_neon_vst1_x4_v:
1701 case NEON::BI__builtin_neon_vst1q_x4_v: {
1704 if (
Arch == llvm::Triple::aarch64 ||
Arch == llvm::Triple::aarch64_be ||
1705 Arch == llvm::Triple::aarch64_32) {
1707 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
1713 case NEON::BI__builtin_neon_vsubhn_v: {
1714 llvm::FixedVectorType *SrcTy =
1715 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1718 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
1719 Ops[1] =
Builder.CreateBitCast(Ops[1], SrcTy);
1720 Ops[0] =
Builder.CreateSub(Ops[0], Ops[1],
"vsubhn");
1723 Constant *ShiftAmt =
1724 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1725 Ops[0] =
Builder.CreateLShr(Ops[0], ShiftAmt,
"vsubhn");
1728 return Builder.CreateTrunc(Ops[0], VTy,
"vsubhn");
1730 case NEON::BI__builtin_neon_vtrn_v:
1731 case NEON::BI__builtin_neon_vtrnq_v: {
1732 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
1733 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
1734 Value *SV =
nullptr;
1736 for (
unsigned vi = 0; vi != 2; ++vi) {
1738 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1739 Indices.push_back(i+vi);
1740 Indices.push_back(i+e+vi);
1743 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vtrn");
1748 case NEON::BI__builtin_neon_vtst_v:
1749 case NEON::BI__builtin_neon_vtstq_v: {
1750 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1751 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
1752 Ops[0] =
Builder.CreateAnd(Ops[0], Ops[1]);
1753 Ops[0] =
Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
1754 ConstantAggregateZero::get(Ty));
1755 return Builder.CreateSExt(Ops[0], Ty,
"vtst");
1757 case NEON::BI__builtin_neon_vuzp_v:
1758 case NEON::BI__builtin_neon_vuzpq_v: {
1759 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
1760 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
1761 Value *SV =
nullptr;
1763 for (
unsigned vi = 0; vi != 2; ++vi) {
1765 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1766 Indices.push_back(2*i+vi);
1769 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vuzp");
1774 case NEON::BI__builtin_neon_vxarq_u64: {
1779 case NEON::BI__builtin_neon_vzip_v:
1780 case NEON::BI__builtin_neon_vzipq_v: {
1781 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
1782 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
1783 Value *SV =
nullptr;
1785 for (
unsigned vi = 0; vi != 2; ++vi) {
1787 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1788 Indices.push_back((i + vi*e) >> 1);
1789 Indices.push_back(((i + vi*e) >> 1)+e);
1792 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vzip");
1797 case NEON::BI__builtin_neon_vdot_s32:
1798 case NEON::BI__builtin_neon_vdot_u32:
1799 case NEON::BI__builtin_neon_vdotq_s32:
1800 case NEON::BI__builtin_neon_vdotq_u32: {
1802 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1803 llvm::Type *Tys[2] = { Ty, InputTy };
1806 case NEON::BI__builtin_neon_vfmlal_low_f16:
1807 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
1809 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1810 llvm::Type *Tys[2] = { Ty, InputTy };
1813 case NEON::BI__builtin_neon_vfmlsl_low_f16:
1814 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
1816 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1817 llvm::Type *Tys[2] = { Ty, InputTy };
1820 case NEON::BI__builtin_neon_vfmlal_high_f16:
1821 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
1823 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1824 llvm::Type *Tys[2] = { Ty, InputTy };
1827 case NEON::BI__builtin_neon_vfmlsl_high_f16:
1828 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
1830 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1831 llvm::Type *Tys[2] = { Ty, InputTy };
1834 case NEON::BI__builtin_neon_vmmlaq_s32:
1835 case NEON::BI__builtin_neon_vmmlaq_u32: {
1837 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1838 llvm::Type *Tys[2] = { Ty, InputTy };
1839 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops,
"vmmla");
1841 case NEON::BI__builtin_neon_vusmmlaq_s32: {
1843 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1844 llvm::Type *Tys[2] = { Ty, InputTy };
1847 case NEON::BI__builtin_neon_vusdot_s32:
1848 case NEON::BI__builtin_neon_vusdotq_s32: {
1850 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1851 llvm::Type *Tys[2] = { Ty, InputTy };
1854 case NEON::BI__builtin_neon_vbfdot_f32:
1855 case NEON::BI__builtin_neon_vbfdotq_f32: {
1856 llvm::Type *InputTy =
1857 llvm::FixedVectorType::get(
BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
1858 llvm::Type *Tys[2] = { Ty, InputTy };
1861 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
1862 llvm::Type *Tys[1] = { Ty };
1869 assert(Int &&
"Expected valid intrinsic number");
4488 llvm::Triple::ArchType
Arch) {
4497 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
4498 return EmitAArch64CpuSupports(E);
4500 unsigned HintID =
static_cast<unsigned>(-1);
4501 switch (BuiltinID) {
4503 case clang::AArch64::BI__builtin_arm_nop:
4506 case clang::AArch64::BI__builtin_arm_yield:
4507 case clang::AArch64::BI__yield:
4510 case clang::AArch64::BI__builtin_arm_wfe:
4511 case clang::AArch64::BI__wfe:
4514 case clang::AArch64::BI__builtin_arm_wfi:
4515 case clang::AArch64::BI__wfi:
4518 case clang::AArch64::BI__builtin_arm_sev:
4519 case clang::AArch64::BI__sev:
4522 case clang::AArch64::BI__builtin_arm_sevl:
4523 case clang::AArch64::BI__sevl:
4528 if (HintID !=
static_cast<unsigned>(-1)) {
4529 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_hint);
4530 return Builder.CreateCall(F, llvm::ConstantInt::get(
Int32Ty, HintID));
4533 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
4534 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_break);
4539 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
4542 llvm::FunctionType::get(StructType::get(
CGM.Int64Ty,
CGM.Int64Ty), {},
4544 "__arm_sme_state"));
4546 "aarch64_pstate_sm_compatible");
4547 CI->setAttributes(Attrs);
4550 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
4557 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
4559 "rbit of unusual size!");
4562 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
4564 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
4566 "rbit of unusual size!");
4569 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
4572 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
4573 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
4575 Function *F =
CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
4577 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
4582 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
4584 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
4587 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
4589 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
4593 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
4594 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
4596 llvm::Type *Ty = Arg->getType();
4597 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
4601 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
4602 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
4604 llvm::Type *Ty = Arg->getType();
4605 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
4609 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
4610 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
4612 llvm::Type *Ty = Arg->getType();
4613 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
4617 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
4618 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
4620 llvm::Type *Ty = Arg->getType();
4621 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
4625 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
4627 "__jcvt of unusual size!");
4630 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
4633 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
4634 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
4635 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
4636 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
4640 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
4643 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
4644 llvm::Value *Val =
Builder.CreateCall(F, MemAddr);
4646 for (
size_t i = 0; i < 8; i++) {
4647 llvm::Value *ValOffsetPtr =
4659 Args.push_back(MemAddr);
4660 for (
size_t i = 0; i < 8; i++) {
4661 llvm::Value *ValOffsetPtr =
4667 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
4668 ? Intrinsic::aarch64_st64b
4669 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
4670 ? Intrinsic::aarch64_st64bv
4671 : Intrinsic::aarch64_st64bv0);
4673 return Builder.CreateCall(F, Args);
4676 if (BuiltinID == clang::AArch64::BI__builtin_arm_atomic_store_with_stshh) {
4687 auto *SizeC = llvm::ConstantInt::get(
Int32Ty, SizeBits);
4692 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_stshh_atomic_store,
4698 F, {StoreAddr, StoreValue64,
4699 ConstantInt::get(
Int32Ty, OrderC->getZExtValue()),
4700 ConstantInt::get(
Int32Ty, PolicyC->getZExtValue()), SizeC});
4703 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
4704 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
4706 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
4707 ? Intrinsic::aarch64_rndr
4708 : Intrinsic::aarch64_rndrrs);
4710 llvm::Value *Val =
Builder.CreateCall(F);
4711 Value *RandomValue =
Builder.CreateExtractValue(Val, 0);
4715 Builder.CreateStore(RandomValue, MemAddress);
4720 if (BuiltinID == clang::AArch64::BI__clear_cache) {
4721 assert(E->
getNumArgs() == 2 &&
"__clear_cache takes 2 arguments");
4724 for (
unsigned i = 0; i < 2; i++)
4726 llvm::Type *Ty =
CGM.getTypes().ConvertType(FD->
getType());
4728 StringRef Name = FD->
getName();
4732 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
4733 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
4736 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
4737 ? Intrinsic::aarch64_ldaxp
4738 : Intrinsic::aarch64_ldxp);
4745 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
4746 Val0 =
Builder.CreateZExt(Val0, Int128Ty);
4747 Val1 =
Builder.CreateZExt(Val1, Int128Ty);
4749 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
4750 Val =
Builder.CreateShl(Val0, ShiftCst,
"shl",
true );
4751 Val =
Builder.CreateOr(Val, Val1);
4753 }
else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
4754 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
4763 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
4764 ? Intrinsic::aarch64_ldaxr
4765 : Intrinsic::aarch64_ldxr,
4767 CallInst *Val =
Builder.CreateCall(F, LoadAddr,
"ldxr");
4771 if (RealResTy->isPointerTy())
4772 return Builder.CreateIntToPtr(Val, RealResTy);
4774 llvm::Type *IntResTy = llvm::IntegerType::get(
4776 return Builder.CreateBitCast(
Builder.CreateTruncOrBitCast(Val, IntResTy),
4780 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
4781 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
4784 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
4785 ? Intrinsic::aarch64_stlxp
4786 : Intrinsic::aarch64_stxp);
4793 llvm::Value *Val =
Builder.CreateLoad(Tmp);
4798 return Builder.CreateCall(F, {Arg0, Arg1, StPtr},
"stxp");
4801 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
4802 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
4807 llvm::Type *StoreTy =
4810 if (StoreVal->
getType()->isPointerTy())
4813 llvm::Type *
IntTy = llvm::IntegerType::get(
4815 CGM.getDataLayout().getTypeSizeInBits(StoreVal->
getType()));
4821 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
4822 ? Intrinsic::aarch64_stlxr
4823 : Intrinsic::aarch64_stxr,
4825 CallInst *CI =
Builder.CreateCall(F, {StoreVal, StoreAddr},
"stxr");
4827 1, Attribute::get(
getLLVMContext(), Attribute::ElementType, StoreTy));
4831 if (BuiltinID == clang::AArch64::BI__getReg) {
4834 llvm_unreachable(
"Sema will ensure that the parameter is constant");
4837 LLVMContext &Context =
CGM.getLLVMContext();
4840 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
4841 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4842 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4845 CGM.getIntrinsic(Intrinsic::read_register, {
Int64Ty});
4846 return Builder.CreateCall(F, Metadata);
4849 if (BuiltinID == clang::AArch64::BI__break) {
4852 llvm_unreachable(
"Sema will ensure that the parameter is constant");
4854 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_break);
4858 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
4859 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_clrex);
4863 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
4864 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
4865 llvm::SyncScope::SingleThread);
4868 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4869 switch (BuiltinID) {
4870 case clang::AArch64::BI__builtin_arm_crc32b:
4871 CRCIntrinsicID = Intrinsic::aarch64_crc32b;
break;
4872 case clang::AArch64::BI__builtin_arm_crc32cb:
4873 CRCIntrinsicID = Intrinsic::aarch64_crc32cb;
break;
4874 case clang::AArch64::BI__builtin_arm_crc32h:
4875 CRCIntrinsicID = Intrinsic::aarch64_crc32h;
break;
4876 case clang::AArch64::BI__builtin_arm_crc32ch:
4877 CRCIntrinsicID = Intrinsic::aarch64_crc32ch;
break;
4878 case clang::AArch64::BI__builtin_arm_crc32w:
4879 CRCIntrinsicID = Intrinsic::aarch64_crc32w;
break;
4880 case clang::AArch64::BI__builtin_arm_crc32cw:
4881 CRCIntrinsicID = Intrinsic::aarch64_crc32cw;
break;
4882 case clang::AArch64::BI__builtin_arm_crc32d:
4883 CRCIntrinsicID = Intrinsic::aarch64_crc32x;
break;
4884 case clang::AArch64::BI__builtin_arm_crc32cd:
4885 CRCIntrinsicID = Intrinsic::aarch64_crc32cx;
break;
4888 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4893 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
4894 Arg1 =
Builder.CreateZExtOrBitCast(Arg1, DataTy);
4896 return Builder.CreateCall(F, {Arg0, Arg1});
4900 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
4907 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
4910 if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch ||
4911 BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
4915 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
4916 switch (BuiltinID) {
4917 case clang::AArch64::BI__builtin_arm_irg:
4918 MTEIntrinsicID = Intrinsic::aarch64_irg;
break;
4919 case clang::AArch64::BI__builtin_arm_addg:
4920 MTEIntrinsicID = Intrinsic::aarch64_addg;
break;
4921 case clang::AArch64::BI__builtin_arm_gmi:
4922 MTEIntrinsicID = Intrinsic::aarch64_gmi;
break;
4923 case clang::AArch64::BI__builtin_arm_ldg:
4924 MTEIntrinsicID = Intrinsic::aarch64_ldg;
break;
4925 case clang::AArch64::BI__builtin_arm_stg:
4926 MTEIntrinsicID = Intrinsic::aarch64_stg;
break;
4927 case clang::AArch64::BI__builtin_arm_subp:
4928 MTEIntrinsicID = Intrinsic::aarch64_subp;
break;
4931 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
4932 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
4937 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
4940 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
4945 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
4946 {Pointer, TagOffset});
4948 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
4954 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
4959 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
4961 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
4962 {TagAddress, TagAddress});
4967 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
4969 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
4970 {TagAddress, TagAddress});
4972 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
4976 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
4980 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4981 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
4982 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4983 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
4984 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
4985 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
4986 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
4987 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
4990 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4991 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
4992 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4993 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
4996 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
4997 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
4999 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5000 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5002 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5003 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5005 llvm::Type *ValueType;
5009 }
else if (Is128Bit) {
5010 llvm::Type *Int128Ty =
5011 llvm::IntegerType::getInt128Ty(
CGM.getLLVMContext());
5012 ValueType = Int128Ty;
5014 }
else if (IsPointerBuiltin) {
5024 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5025 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
5026 LLVMContext &Context =
CGM.getLLVMContext();
5031 std::string SysRegStr;
5032 llvm::raw_string_ostream(SysRegStr)
5033 << (0b10 | SysReg >> 14) <<
":" << ((SysReg >> 11) & 7) <<
":"
5034 << ((SysReg >> 7) & 15) <<
":" << ((SysReg >> 3) & 15) <<
":"
5037 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
5038 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5039 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5044 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5045 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::read_register, Types);
5047 return Builder.CreateCall(F, Metadata);
5050 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::write_register, Types);
5052 llvm::Value *
Result =
Builder.CreateCall(F, {Metadata, ArgValue});
5057 if (BuiltinID == clang::AArch64::BI__sys) {
5060 const unsigned Op1 = SysReg >> 11;
5061 const unsigned CRn = (SysReg >> 7) & 0xf;
5062 const unsigned CRm = (SysReg >> 3) & 0xf;
5063 const unsigned Op2 = SysReg & 0x7;
5065 Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_sys),
5066 {Builder.getInt32(Op1), Builder.getInt32(CRn),
5067 Builder.getInt32(CRm), Builder.getInt32(Op2),
5068 EmitScalarExpr(E->getArg(1))});
5072 return ConstantInt::get(
Builder.getInt32Ty(), 0);
5075 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5081 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5086 if (BuiltinID == clang::AArch64::BI__mulh ||
5087 BuiltinID == clang::AArch64::BI__umulh) {
5089 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
5091 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5097 Value *MulResult, *HigherBits;
5099 MulResult =
Builder.CreateNSWMul(LHS, RHS);
5100 HigherBits =
Builder.CreateAShr(MulResult, 64);
5102 MulResult =
Builder.CreateNUWMul(LHS, RHS);
5103 HigherBits =
Builder.CreateLShr(MulResult, 64);
5105 HigherBits =
Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5110 if (BuiltinID == AArch64::BI__writex18byte ||
5111 BuiltinID == AArch64::BI__writex18word ||
5112 BuiltinID == AArch64::BI__writex18dword ||
5113 BuiltinID == AArch64::BI__writex18qword) {
5129 if (BuiltinID == AArch64::BI__readx18byte ||
5130 BuiltinID == AArch64::BI__readx18word ||
5131 BuiltinID == AArch64::BI__readx18dword ||
5132 BuiltinID == AArch64::BI__readx18qword) {
5147 if (BuiltinID == AArch64::BI__addx18byte ||
5148 BuiltinID == AArch64::BI__addx18word ||
5149 BuiltinID == AArch64::BI__addx18dword ||
5150 BuiltinID == AArch64::BI__addx18qword ||
5151 BuiltinID == AArch64::BI__incx18byte ||
5152 BuiltinID == AArch64::BI__incx18word ||
5153 BuiltinID == AArch64::BI__incx18dword ||
5154 BuiltinID == AArch64::BI__incx18qword) {
5157 switch (BuiltinID) {
5158 case AArch64::BI__incx18byte:
5162 case AArch64::BI__incx18word:
5166 case AArch64::BI__incx18dword:
5170 case AArch64::BI__incx18qword:
5176 isIncrement =
false;
5201 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5202 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5203 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5204 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5207 return Builder.CreateBitCast(Arg, RetTy);
5210 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5211 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5212 BuiltinID == AArch64::BI_CountLeadingZeros ||
5213 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5217 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5218 BuiltinID == AArch64::BI_CountLeadingOnes64)
5219 Arg =
Builder.CreateXor(Arg, Constant::getAllOnesValue(
ArgType));
5224 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5225 BuiltinID == AArch64::BI_CountLeadingZeros64)
5230 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5231 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5234 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5235 ?
CGM.getIntrinsic(Intrinsic::aarch64_cls)
5236 :
CGM.getIntrinsic(Intrinsic::aarch64_cls64);
5239 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5244 if (BuiltinID == AArch64::BI_CountOneBits ||
5245 BuiltinID == AArch64::BI_CountOneBits64) {
5251 if (BuiltinID == AArch64::BI_CountOneBits64)
5256 if (BuiltinID == AArch64::BI__prefetch) {
5265 if (BuiltinID == AArch64::BI__hlt) {
5266 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_hlt);
5271 return ConstantInt::get(
Builder.getInt32Ty(), 0);
5274 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5282 if (std::optional<MSVCIntrin> MsvcIntId =
5288 return P.first == BuiltinID;
5291 BuiltinID = It->second;
5297 bool IsSISD = (
Builtin !=
nullptr);
5301 unsigned ICEArguments = 0;
5312 unsigned NumArgs = E->
getNumArgs() - (HasExtraArg ? 1 : 0);
5313 for (
unsigned i = 0, e = NumArgs; i != e; i++) {
5315 switch (BuiltinID) {
5316 case NEON::BI__builtin_neon_vld1_v:
5317 case NEON::BI__builtin_neon_vld1q_v:
5318 case NEON::BI__builtin_neon_vld1_dup_v:
5319 case NEON::BI__builtin_neon_vld1q_dup_v:
5320 case NEON::BI__builtin_neon_vld1_lane_v:
5321 case NEON::BI__builtin_neon_vld1q_lane_v:
5322 case NEON::BI__builtin_neon_vst1_v:
5323 case NEON::BI__builtin_neon_vst1q_v:
5324 case NEON::BI__builtin_neon_vst1_lane_v:
5325 case NEON::BI__builtin_neon_vst1q_lane_v:
5326 case NEON::BI__builtin_neon_vldap1_lane_s64:
5327 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5328 case NEON::BI__builtin_neon_vstl1_lane_s64:
5329 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5342 assert(
Result &&
"SISD intrinsic should have been handled");
5348 if (std::optional<llvm::APSInt>
Result =
5353 bool usgn =
Type.isUnsigned();
5354 bool quad =
Type.isQuad();
5372 switch (BuiltinID) {
5374 case NEON::BI__builtin_neon_vabsh_f16:
5376 case NEON::BI__builtin_neon_vaddq_p128: {
5378 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
5379 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
5380 Ops[0] =
Builder.CreateXor(Ops[0], Ops[1]);
5381 llvm::Type *Int128Ty = llvm::Type::getIntNTy(
getLLVMContext(), 128);
5382 return Builder.CreateBitCast(Ops[0], Int128Ty);
5384 case NEON::BI__builtin_neon_vldrq_p128: {
5385 llvm::Type *Int128Ty = llvm::Type::getIntNTy(
getLLVMContext(), 128);
5386 return Builder.CreateAlignedLoad(Int128Ty, Ops[0],
5389 case NEON::BI__builtin_neon_vstrq_p128: {
5390 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5392 case NEON::BI__builtin_neon_vcvts_f32_u32:
5393 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5396 case NEON::BI__builtin_neon_vcvts_f32_s32:
5397 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5398 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5401 Ops[0] =
Builder.CreateBitCast(Ops[0], InTy);
5403 return Builder.CreateUIToFP(Ops[0], FTy);
5404 return Builder.CreateSIToFP(Ops[0], FTy);
5406 case NEON::BI__builtin_neon_vcvth_f16_u16:
5407 case NEON::BI__builtin_neon_vcvth_f16_u32:
5408 case NEON::BI__builtin_neon_vcvth_f16_u64:
5411 case NEON::BI__builtin_neon_vcvth_f16_s16:
5412 case NEON::BI__builtin_neon_vcvth_f16_s32:
5413 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5414 llvm::Type *FTy =
HalfTy;
5416 if (Ops[0]->
getType()->getPrimitiveSizeInBits() == 64)
5418 else if (Ops[0]->
getType()->getPrimitiveSizeInBits() == 32)
5422 Ops[0] =
Builder.CreateBitCast(Ops[0], InTy);
5424 return Builder.CreateUIToFP(Ops[0], FTy);
5425 return Builder.CreateSIToFP(Ops[0], FTy);
5427 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5428 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5429 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5430 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5431 case NEON::BI__builtin_neon_vcvth_u16_f16:
5432 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5433 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5434 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5435 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5436 case NEON::BI__builtin_neon_vcvth_s16_f16: {
5438 llvm::Type* FTy =
HalfTy;
5439 llvm::Type *Tys[2] = {InTy, FTy};
5440 switch (BuiltinID) {
5441 default: llvm_unreachable(
"missing builtin ID in switch!");
5442 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5443 Int = Intrinsic::aarch64_neon_fcvtau;
break;
5444 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5445 Int = Intrinsic::aarch64_neon_fcvtmu;
break;
5446 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5447 Int = Intrinsic::aarch64_neon_fcvtnu;
break;
5448 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5449 Int = Intrinsic::aarch64_neon_fcvtpu;
break;
5450 case NEON::BI__builtin_neon_vcvth_u16_f16:
5451 Int = Intrinsic::aarch64_neon_fcvtzu;
break;
5452 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5453 Int = Intrinsic::aarch64_neon_fcvtas;
break;
5454 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5455 Int = Intrinsic::aarch64_neon_fcvtms;
break;
5456 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5457 Int = Intrinsic::aarch64_neon_fcvtns;
break;
5458 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5459 Int = Intrinsic::aarch64_neon_fcvtps;
break;
5460 case NEON::BI__builtin_neon_vcvth_s16_f16:
5461 Int = Intrinsic::aarch64_neon_fcvtzs;
break;
5465 case NEON::BI__builtin_neon_vcaleh_f16:
5466 case NEON::BI__builtin_neon_vcalth_f16:
5467 case NEON::BI__builtin_neon_vcageh_f16:
5468 case NEON::BI__builtin_neon_vcagth_f16: {
5470 llvm::Type* FTy =
HalfTy;
5471 llvm::Type *Tys[2] = {InTy, FTy};
5472 switch (BuiltinID) {
5473 default: llvm_unreachable(
"missing builtin ID in switch!");
5474 case NEON::BI__builtin_neon_vcageh_f16:
5475 Int = Intrinsic::aarch64_neon_facge;
break;
5476 case NEON::BI__builtin_neon_vcagth_f16:
5477 Int = Intrinsic::aarch64_neon_facgt;
break;
5478 case NEON::BI__builtin_neon_vcaleh_f16:
5479 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]);
break;
5480 case NEON::BI__builtin_neon_vcalth_f16:
5481 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]);
break;
5486 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5487 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
5489 llvm::Type* FTy =
HalfTy;
5490 llvm::Type *Tys[2] = {InTy, FTy};
5491 switch (BuiltinID) {
5492 default: llvm_unreachable(
"missing builtin ID in switch!");
5493 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5494 Int = Intrinsic::aarch64_neon_vcvtfp2fxs;
break;
5495 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
5496 Int = Intrinsic::aarch64_neon_vcvtfp2fxu;
break;
5501 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5502 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
5503 llvm::Type* FTy =
HalfTy;
5505 llvm::Type *Tys[2] = {FTy, InTy};
5506 switch (BuiltinID) {
5507 default: llvm_unreachable(
"missing builtin ID in switch!");
5508 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5509 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
5510 Ops[0] =
Builder.CreateSExt(Ops[0], InTy,
"sext");
5512 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
5513 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
5514 Ops[0] =
Builder.CreateZExt(Ops[0], InTy);
5519 case NEON::BI__builtin_neon_vpaddd_s64: {
5522 auto *Ty = llvm::FixedVectorType::get(
Int64Ty, 2);
5524 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty,
"v2i64");
5525 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
5526 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
5527 Value *Op0 =
Builder.CreateExtractElement(Ops[0], Idx0,
"lane0");
5528 Value *Op1 =
Builder.CreateExtractElement(Ops[0], Idx1,
"lane1");
5530 return Builder.CreateAdd(Op0, Op1,
"vpaddd");
5532 case NEON::BI__builtin_neon_vpaddd_f64: {
5533 auto *Ty = llvm::FixedVectorType::get(
DoubleTy, 2);
5535 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty,
"v2f64");
5536 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
5537 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
5538 Value *Op0 =
Builder.CreateExtractElement(Ops[0], Idx0,
"lane0");
5539 Value *Op1 =
Builder.CreateExtractElement(Ops[0], Idx1,
"lane1");
5541 return Builder.CreateFAdd(Op0, Op1,
"vpaddd");
5543 case NEON::BI__builtin_neon_vpadds_f32: {
5544 auto *Ty = llvm::FixedVectorType::get(
FloatTy, 2);
5546 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty,
"v2f32");
5547 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
5548 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
5549 Value *Op0 =
Builder.CreateExtractElement(Ops[0], Idx0,
"lane0");
5550 Value *Op1 =
Builder.CreateExtractElement(Ops[0], Idx1,
"lane1");
5552 return Builder.CreateFAdd(Op0, Op1,
"vpaddd");
5554 case NEON::BI__builtin_neon_vceqzd_s64:
5557 ICmpInst::ICMP_EQ,
"vceqz");
5558 case NEON::BI__builtin_neon_vceqzd_f64:
5559 case NEON::BI__builtin_neon_vceqzs_f32:
5560 case NEON::BI__builtin_neon_vceqzh_f16:
5563 ICmpInst::FCMP_OEQ,
"vceqz");
5564 case NEON::BI__builtin_neon_vcgezd_s64:
5567 ICmpInst::ICMP_SGE,
"vcgez");
5568 case NEON::BI__builtin_neon_vcgezd_f64:
5569 case NEON::BI__builtin_neon_vcgezs_f32:
5570 case NEON::BI__builtin_neon_vcgezh_f16:
5573 ICmpInst::FCMP_OGE,
"vcgez");
5574 case NEON::BI__builtin_neon_vclezd_s64:
5577 ICmpInst::ICMP_SLE,
"vclez");
5578 case NEON::BI__builtin_neon_vclezd_f64:
5579 case NEON::BI__builtin_neon_vclezs_f32:
5580 case NEON::BI__builtin_neon_vclezh_f16:
5583 ICmpInst::FCMP_OLE,
"vclez");
5584 case NEON::BI__builtin_neon_vcgtzd_s64:
5587 ICmpInst::ICMP_SGT,
"vcgtz");
5588 case NEON::BI__builtin_neon_vcgtzd_f64:
5589 case NEON::BI__builtin_neon_vcgtzs_f32:
5590 case NEON::BI__builtin_neon_vcgtzh_f16:
5593 ICmpInst::FCMP_OGT,
"vcgtz");
5594 case NEON::BI__builtin_neon_vcltzd_s64:
5597 ICmpInst::ICMP_SLT,
"vcltz");
5599 case NEON::BI__builtin_neon_vcltzd_f64:
5600 case NEON::BI__builtin_neon_vcltzs_f32:
5601 case NEON::BI__builtin_neon_vcltzh_f16:
5604 ICmpInst::FCMP_OLT,
"vcltz");
5606 case NEON::BI__builtin_neon_vceqzd_u64: {
5609 ICmpInst::ICMP_EQ,
"vceqzd");
5611 case NEON::BI__builtin_neon_vceqd_f64:
5612 case NEON::BI__builtin_neon_vcled_f64:
5613 case NEON::BI__builtin_neon_vcltd_f64:
5614 case NEON::BI__builtin_neon_vcged_f64:
5615 case NEON::BI__builtin_neon_vcgtd_f64: {
5616 llvm::CmpInst::Predicate P;
5617 switch (BuiltinID) {
5618 default: llvm_unreachable(
"missing builtin ID in switch!");
5619 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ;
break;
5620 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE;
break;
5621 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT;
break;
5622 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE;
break;
5623 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT;
break;
5627 if (P == llvm::FCmpInst::FCMP_OEQ)
5628 Ops[0] =
Builder.CreateFCmp(P, Ops[0], Ops[1]);
5630 Ops[0] =
Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5633 case NEON::BI__builtin_neon_vceqs_f32:
5634 case NEON::BI__builtin_neon_vcles_f32:
5635 case NEON::BI__builtin_neon_vclts_f32:
5636 case NEON::BI__builtin_neon_vcges_f32:
5637 case NEON::BI__builtin_neon_vcgts_f32: {
5638 llvm::CmpInst::Predicate P;
5639 switch (BuiltinID) {
5640 default: llvm_unreachable(
"missing builtin ID in switch!");
5641 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ;
break;
5642 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE;
break;
5643 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT;
break;
5644 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE;
break;
5645 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT;
break;
5649 if (P == llvm::FCmpInst::FCMP_OEQ)
5650 Ops[0] =
Builder.CreateFCmp(P, Ops[0], Ops[1]);
5652 Ops[0] =
Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5655 case NEON::BI__builtin_neon_vceqh_f16:
5656 case NEON::BI__builtin_neon_vcleh_f16:
5657 case NEON::BI__builtin_neon_vclth_f16:
5658 case NEON::BI__builtin_neon_vcgeh_f16:
5659 case NEON::BI__builtin_neon_vcgth_f16: {
5660 llvm::CmpInst::Predicate P;
5661 switch (BuiltinID) {
5662 default: llvm_unreachable(
"missing builtin ID in switch!");
5663 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ;
break;
5664 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE;
break;
5665 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT;
break;
5666 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE;
break;
5667 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT;
break;
5671 if (P == llvm::FCmpInst::FCMP_OEQ)
5672 Ops[0] =
Builder.CreateFCmp(P, Ops[0], Ops[1]);
5674 Ops[0] =
Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5677 case NEON::BI__builtin_neon_vceqd_s64:
5678 case NEON::BI__builtin_neon_vceqd_u64:
5679 case NEON::BI__builtin_neon_vcgtd_s64:
5680 case NEON::BI__builtin_neon_vcgtd_u64:
5681 case NEON::BI__builtin_neon_vcltd_s64:
5682 case NEON::BI__builtin_neon_vcltd_u64:
5683 case NEON::BI__builtin_neon_vcged_u64:
5684 case NEON::BI__builtin_neon_vcged_s64:
5685 case NEON::BI__builtin_neon_vcled_u64:
5686 case NEON::BI__builtin_neon_vcled_s64: {
5687 llvm::CmpInst::Predicate P;
5688 switch (BuiltinID) {
5689 default: llvm_unreachable(
"missing builtin ID in switch!");
5690 case NEON::BI__builtin_neon_vceqd_s64:
5691 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;
break;
5692 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;
break;
5693 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;
break;
5694 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;
break;
5695 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;
break;
5696 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;
break;
5697 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;
break;
5698 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;
break;
5699 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;
break;
5703 Ops[0] =
Builder.CreateICmp(P, Ops[0], Ops[1]);
5706 case NEON::BI__builtin_neon_vnegd_s64:
5707 return Builder.CreateNeg(Ops[0],
"vnegd");
5708 case NEON::BI__builtin_neon_vnegh_f16:
5709 return Builder.CreateFNeg(Ops[0],
"vnegh");
5710 case NEON::BI__builtin_neon_vtstd_s64:
5711 case NEON::BI__builtin_neon_vtstd_u64: {
5714 Ops[0] =
Builder.CreateAnd(Ops[0], Ops[1]);
5715 Ops[0] =
Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5716 llvm::Constant::getNullValue(
Int64Ty));
5719 case NEON::BI__builtin_neon_vset_lane_i8:
5720 case NEON::BI__builtin_neon_vset_lane_i16:
5721 case NEON::BI__builtin_neon_vset_lane_i32:
5722 case NEON::BI__builtin_neon_vset_lane_i64:
5723 case NEON::BI__builtin_neon_vset_lane_bf16:
5724 case NEON::BI__builtin_neon_vset_lane_f32:
5725 case NEON::BI__builtin_neon_vsetq_lane_i8:
5726 case NEON::BI__builtin_neon_vsetq_lane_i16:
5727 case NEON::BI__builtin_neon_vsetq_lane_i32:
5728 case NEON::BI__builtin_neon_vsetq_lane_i64:
5729 case NEON::BI__builtin_neon_vsetq_lane_bf16:
5730 case NEON::BI__builtin_neon_vsetq_lane_f32:
5731 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
5732 case NEON::BI__builtin_neon_vset_lane_f64:
5735 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(
DoubleTy, 1));
5736 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
5737 case NEON::BI__builtin_neon_vset_lane_mf8:
5738 case NEON::BI__builtin_neon_vsetq_lane_mf8:
5742 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
5743 case NEON::BI__builtin_neon_vsetq_lane_f64:
5746 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(
DoubleTy, 2));
5747 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
5749 case NEON::BI__builtin_neon_vget_lane_i8:
5750 case NEON::BI__builtin_neon_vdupb_lane_i8:
5752 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int8Ty, 8));
5753 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5754 case NEON::BI__builtin_neon_vgetq_lane_i8:
5755 case NEON::BI__builtin_neon_vdupb_laneq_i8:
5757 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int8Ty, 16));
5758 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5759 case NEON::BI__builtin_neon_vget_lane_mf8:
5760 case NEON::BI__builtin_neon_vdupb_lane_mf8:
5761 case NEON::BI__builtin_neon_vgetq_lane_mf8:
5762 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
5763 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5764 case NEON::BI__builtin_neon_vget_lane_i16:
5765 case NEON::BI__builtin_neon_vduph_lane_i16:
5767 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int16Ty, 4));
5768 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5769 case NEON::BI__builtin_neon_vgetq_lane_i16:
5770 case NEON::BI__builtin_neon_vduph_laneq_i16:
5772 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int16Ty, 8));
5773 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5774 case NEON::BI__builtin_neon_vget_lane_i32:
5775 case NEON::BI__builtin_neon_vdups_lane_i32:
5777 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int32Ty, 2));
5778 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5779 case NEON::BI__builtin_neon_vdups_lane_f32:
5781 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 2));
5782 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vdups_lane");
5783 case NEON::BI__builtin_neon_vgetq_lane_i32:
5784 case NEON::BI__builtin_neon_vdups_laneq_i32:
5786 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int32Ty, 4));
5787 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5788 case NEON::BI__builtin_neon_vget_lane_i64:
5789 case NEON::BI__builtin_neon_vdupd_lane_i64:
5791 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int64Ty, 1));
5792 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5793 case NEON::BI__builtin_neon_vdupd_lane_f64:
5795 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 1));
5796 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vdupd_lane");
5797 case NEON::BI__builtin_neon_vgetq_lane_i64:
5798 case NEON::BI__builtin_neon_vdupd_laneq_i64:
5800 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int64Ty, 2));
5801 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5802 case NEON::BI__builtin_neon_vget_lane_f32:
5804 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 2));
5805 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5806 case NEON::BI__builtin_neon_vget_lane_f64:
5808 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 1));
5809 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5810 case NEON::BI__builtin_neon_vgetq_lane_f32:
5811 case NEON::BI__builtin_neon_vdups_laneq_f32:
5813 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 4));
5814 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5815 case NEON::BI__builtin_neon_vgetq_lane_f64:
5816 case NEON::BI__builtin_neon_vdupd_laneq_f64:
5818 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 2));
5819 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5820 case NEON::BI__builtin_neon_vaddh_f16:
5821 return Builder.CreateFAdd(Ops[0], Ops[1],
"vaddh");
5822 case NEON::BI__builtin_neon_vsubh_f16:
5823 return Builder.CreateFSub(Ops[0], Ops[1],
"vsubh");
5824 case NEON::BI__builtin_neon_vmulh_f16:
5825 return Builder.CreateFMul(Ops[0], Ops[1],
"vmulh");
5826 case NEON::BI__builtin_neon_vdivh_f16:
5827 return Builder.CreateFDiv(Ops[0], Ops[1],
"vdivh");
5828 case NEON::BI__builtin_neon_vfmah_f16:
5831 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
HalfTy,
5832 {Ops[1], Ops[2], Ops[0]});
5833 case NEON::BI__builtin_neon_vfmsh_f16: {
5838 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
HalfTy,
5839 {Neg, Ops[2], Ops[0]});
5841 case NEON::BI__builtin_neon_vaddd_s64:
5842 case NEON::BI__builtin_neon_vaddd_u64:
5843 return Builder.CreateAdd(Ops[0], Ops[1],
"vaddd");
5844 case NEON::BI__builtin_neon_vsubd_s64:
5845 case NEON::BI__builtin_neon_vsubd_u64:
5846 return Builder.CreateSub(Ops[0], Ops[1],
"vsubd");
5847 case NEON::BI__builtin_neon_vqdmlalh_s16:
5848 case NEON::BI__builtin_neon_vqdmlslh_s16: {
5852 auto *VTy = llvm::FixedVectorType::get(
Int32Ty, 4);
5853 Ops[1] =
EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5854 ProductOps,
"vqdmlXl");
5855 Constant *CI = ConstantInt::get(
SizeTy, 0);
5856 Ops[1] =
Builder.CreateExtractElement(Ops[1], CI,
"lane0");
5858 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5859 ? Intrinsic::aarch64_neon_sqadd
5860 : Intrinsic::aarch64_neon_sqsub;
5865 case NEON::BI__builtin_neon_vqshlud_n_s64: {
5870 case NEON::BI__builtin_neon_vqshld_n_u64:
5871 case NEON::BI__builtin_neon_vqshld_n_s64: {
5872 Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5873 ? Intrinsic::aarch64_neon_uqshl
5874 : Intrinsic::aarch64_neon_sqshl;
5878 case NEON::BI__builtin_neon_vrshrd_n_u64:
5879 case NEON::BI__builtin_neon_vrshrd_n_s64: {
5880 Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5881 ? Intrinsic::aarch64_neon_urshl
5882 : Intrinsic::aarch64_neon_srshl;
5884 Ops[1] = ConstantInt::get(
Int64Ty, -SV);
5887 case NEON::BI__builtin_neon_vrsrad_n_u64:
5888 case NEON::BI__builtin_neon_vrsrad_n_s64: {
5889 Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5890 ? Intrinsic::aarch64_neon_urshl
5891 : Intrinsic::aarch64_neon_srshl;
5893 Ops[2] =
Builder.CreateNeg(Ops[2]);
5895 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5898 case NEON::BI__builtin_neon_vshld_n_s64:
5899 case NEON::BI__builtin_neon_vshld_n_u64: {
5902 Ops[0], ConstantInt::get(
Int64Ty, Amt->getZExtValue()),
"shld_n");
5904 case NEON::BI__builtin_neon_vshrd_n_s64: {
5907 Ops[0], ConstantInt::get(
Int64Ty, std::min(
static_cast<uint64_t
>(63),
5908 Amt->getZExtValue())),
5911 case NEON::BI__builtin_neon_vshrd_n_u64: {
5913 uint64_t ShiftAmt = Amt->getZExtValue();
5916 return ConstantInt::get(
Int64Ty, 0);
5917 return Builder.CreateLShr(Ops[0], ConstantInt::get(
Int64Ty, ShiftAmt),
5920 case NEON::BI__builtin_neon_vsrad_n_s64: {
5923 Ops[1], ConstantInt::get(
Int64Ty, std::min(
static_cast<uint64_t
>(63),
5924 Amt->getZExtValue())),
5926 return Builder.CreateAdd(Ops[0], Ops[1]);
5928 case NEON::BI__builtin_neon_vsrad_n_u64: {
5930 uint64_t ShiftAmt = Amt->getZExtValue();
5935 Ops[1] =
Builder.CreateLShr(Ops[1], ConstantInt::get(
Int64Ty, ShiftAmt),
5937 return Builder.CreateAdd(Ops[0], Ops[1]);
5939 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5940 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5941 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5942 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5943 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"lane");
5947 auto *VTy = llvm::FixedVectorType::get(
Int32Ty, 4);
5948 Ops[1] =
EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5949 ProductOps,
"vqdmlXl");
5950 Constant *CI = ConstantInt::get(
SizeTy, 0);
5951 Ops[1] =
Builder.CreateExtractElement(Ops[1], CI,
"lane0");
5956 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5957 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5958 ? Intrinsic::aarch64_neon_sqadd
5959 : Intrinsic::aarch64_neon_sqsub;
5962 case NEON::BI__builtin_neon_vqdmlals_s32:
5963 case NEON::BI__builtin_neon_vqdmlsls_s32: {
5965 ProductOps.push_back(Ops[1]);
5966 ProductOps.push_back(Ops[2]);
5968 EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5969 ProductOps,
"vqdmlXl");
5971 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5972 ? Intrinsic::aarch64_neon_sqadd
5973 : Intrinsic::aarch64_neon_sqsub;
5978 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5979 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5980 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5981 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5982 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"lane");
5984 ProductOps.push_back(Ops[1]);
5985 ProductOps.push_back(Ops[2]);
5987 EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5988 ProductOps,
"vqdmlXl");
5993 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
5994 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
5995 ? Intrinsic::aarch64_neon_sqadd
5996 : Intrinsic::aarch64_neon_sqsub;
5999 case NEON::BI__builtin_neon_vget_lane_bf16:
6000 case NEON::BI__builtin_neon_vduph_lane_bf16:
6001 case NEON::BI__builtin_neon_vduph_lane_f16: {
6002 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
6004 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6005 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6006 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6007 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
6009 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6010 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6011 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
6012 return Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6014 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6016 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6017 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6018 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
6019 llvm::Value *Trunc =
6020 Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6021 return Builder.CreateShuffleVector(
6022 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
6024 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6026 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6028 std::iota(LoMask.begin(), LoMask.end(), 0);
6029 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6030 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
6031 llvm::Type *V8BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 8);
6032 llvm::Value *Inactive =
Builder.CreateShuffleVector(
6033 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
6034 llvm::Value *Trunc =
6035 Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
6036 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
6039 case clang::AArch64::BI_InterlockedAdd:
6040 case clang::AArch64::BI_InterlockedAdd_acq:
6041 case clang::AArch64::BI_InterlockedAdd_rel:
6042 case clang::AArch64::BI_InterlockedAdd_nf:
6043 case clang::AArch64::BI_InterlockedAdd64:
6044 case clang::AArch64::BI_InterlockedAdd64_acq:
6045 case clang::AArch64::BI_InterlockedAdd64_rel:
6046 case clang::AArch64::BI_InterlockedAdd64_nf: {
6048 Value *Val = Ops[1];
6049 llvm::AtomicOrdering Ordering;
6050 switch (BuiltinID) {
6051 case clang::AArch64::BI_InterlockedAdd:
6052 case clang::AArch64::BI_InterlockedAdd64:
6053 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6055 case clang::AArch64::BI_InterlockedAdd_acq:
6056 case clang::AArch64::BI_InterlockedAdd64_acq:
6057 Ordering = llvm::AtomicOrdering::Acquire;
6059 case clang::AArch64::BI_InterlockedAdd_rel:
6060 case clang::AArch64::BI_InterlockedAdd64_rel:
6061 Ordering = llvm::AtomicOrdering::Release;
6063 case clang::AArch64::BI_InterlockedAdd_nf:
6064 case clang::AArch64::BI_InterlockedAdd64_nf:
6065 Ordering = llvm::AtomicOrdering::Monotonic;
6068 llvm_unreachable(
"missing builtin ID in switch!");
6070 AtomicRMWInst *RMWI =
6071 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, Ordering);
6072 return Builder.CreateAdd(RMWI, Val);
6077 llvm::Type *Ty = VTy;
6081 bool ExtractLow =
false;
6082 bool ExtendLaneArg =
false;
6083 switch (BuiltinID) {
6084 default:
return nullptr;
6085 case NEON::BI__builtin_neon_vbsl_v:
6086 case NEON::BI__builtin_neon_vbslq_v: {
6087 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6088 Ops[0] =
Builder.CreateBitCast(Ops[0], BitTy,
"vbsl");
6089 Ops[1] =
Builder.CreateBitCast(Ops[1], BitTy,
"vbsl");
6090 Ops[2] =
Builder.CreateBitCast(Ops[2], BitTy,
"vbsl");
6092 Ops[1] =
Builder.CreateAnd(Ops[0], Ops[1],
"vbsl");
6093 Ops[2] =
Builder.CreateAnd(
Builder.CreateNot(Ops[0]), Ops[2],
"vbsl");
6094 Ops[0] =
Builder.CreateOr(Ops[1], Ops[2],
"vbsl");
6095 return Builder.CreateBitCast(Ops[0], Ty);
6097 case NEON::BI__builtin_neon_vfma_lane_v:
6098 case NEON::BI__builtin_neon_vfmaq_lane_v: {
6101 Value *Addend = Ops[0];
6102 Value *Multiplicand = Ops[1];
6103 Value *LaneSource = Ops[2];
6104 Ops[0] = Multiplicand;
6105 Ops[1] = LaneSource;
6109 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6110 ? llvm::FixedVectorType::get(VTy->getElementType(),
6111 VTy->getNumElements() / 2)
6114 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6115 Ops[1] =
Builder.CreateBitCast(Ops[1], SourceTy);
6116 Ops[1] =
Builder.CreateShuffleVector(Ops[1], Ops[1], SV,
"lane");
6119 Int =
Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6123 case NEON::BI__builtin_neon_vfma_laneq_v: {
6126 if (VTy && VTy->getElementType() ==
DoubleTy) {
6129 llvm::FixedVectorType *VTy =
6131 Ops[2] =
Builder.CreateBitCast(Ops[2], VTy);
6132 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"extract");
6135 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6136 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6139 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6140 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6142 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6143 VTy->getNumElements() * 2);
6144 Ops[2] =
Builder.CreateBitCast(Ops[2], STy);
6145 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6147 Ops[2] =
Builder.CreateShuffleVector(Ops[2], Ops[2], SV,
"lane");
6150 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6151 {Ops[2], Ops[1], Ops[0]});
6153 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6154 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6155 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6157 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6160 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6161 {Ops[2], Ops[1], Ops[0]});
6163 case NEON::BI__builtin_neon_vfmah_lane_f16:
6164 case NEON::BI__builtin_neon_vfmas_lane_f32:
6165 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6166 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6167 case NEON::BI__builtin_neon_vfmad_lane_f64:
6168 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6170 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"extract");
6172 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6173 {Ops[1], Ops[2], Ops[0]});
6175 case NEON::BI__builtin_neon_vmull_v:
6177 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6178 if (
Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6180 case NEON::BI__builtin_neon_vmax_v:
6181 case NEON::BI__builtin_neon_vmaxq_v:
6183 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6184 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6186 case NEON::BI__builtin_neon_vmaxh_f16: {
6187 Int = Intrinsic::aarch64_neon_fmax;
6190 case NEON::BI__builtin_neon_vmin_v:
6191 case NEON::BI__builtin_neon_vminq_v:
6193 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6194 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6196 case NEON::BI__builtin_neon_vminh_f16: {
6197 Int = Intrinsic::aarch64_neon_fmin;
6200 case NEON::BI__builtin_neon_vabd_v:
6201 case NEON::BI__builtin_neon_vabdq_v:
6203 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6204 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6206 case NEON::BI__builtin_neon_vpadal_v:
6207 case NEON::BI__builtin_neon_vpadalq_v: {
6208 unsigned ArgElts = VTy->getNumElements();
6210 unsigned BitWidth = EltTy->getBitWidth();
6211 auto *ArgTy = llvm::FixedVectorType::get(
6212 llvm::IntegerType::get(
getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6213 llvm::Type* Tys[2] = { VTy, ArgTy };
6214 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6216 TmpOps.push_back(Ops[1]);
6219 llvm::Value *addend =
Builder.CreateBitCast(Ops[0], tmp->getType());
6220 return Builder.CreateAdd(tmp, addend);
6222 case NEON::BI__builtin_neon_vpmin_v:
6223 case NEON::BI__builtin_neon_vpminq_v:
6225 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6226 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6228 case NEON::BI__builtin_neon_vpmax_v:
6229 case NEON::BI__builtin_neon_vpmaxq_v:
6231 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6232 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6234 case NEON::BI__builtin_neon_vminnm_v:
6235 case NEON::BI__builtin_neon_vminnmq_v:
6236 Int = Intrinsic::aarch64_neon_fminnm;
6238 case NEON::BI__builtin_neon_vminnmh_f16:
6239 Int = Intrinsic::aarch64_neon_fminnm;
6241 case NEON::BI__builtin_neon_vmaxnm_v:
6242 case NEON::BI__builtin_neon_vmaxnmq_v:
6243 Int = Intrinsic::aarch64_neon_fmaxnm;
6245 case NEON::BI__builtin_neon_vmaxnmh_f16:
6246 Int = Intrinsic::aarch64_neon_fmaxnm;
6248 case NEON::BI__builtin_neon_vrecpss_f32: {
6252 case NEON::BI__builtin_neon_vrecpsd_f64:
6255 case NEON::BI__builtin_neon_vrecpsh_f16:
6258 case NEON::BI__builtin_neon_vqshrun_n_v:
6259 Int = Intrinsic::aarch64_neon_sqshrun;
6261 case NEON::BI__builtin_neon_vqrshrun_n_v:
6262 Int = Intrinsic::aarch64_neon_sqrshrun;
6264 case NEON::BI__builtin_neon_vqshrn_n_v:
6265 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6267 case NEON::BI__builtin_neon_vrshrn_n_v:
6268 Int = Intrinsic::aarch64_neon_rshrn;
6270 case NEON::BI__builtin_neon_vqrshrn_n_v:
6271 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6273 case NEON::BI__builtin_neon_vrndah_f16: {
6274 Int =
Builder.getIsFPConstrained()
6275 ? Intrinsic::experimental_constrained_round
6279 case NEON::BI__builtin_neon_vrnda_v:
6280 case NEON::BI__builtin_neon_vrndaq_v: {
6281 Int =
Builder.getIsFPConstrained()
6282 ? Intrinsic::experimental_constrained_round
6286 case NEON::BI__builtin_neon_vrndih_f16: {
6287 Int =
Builder.getIsFPConstrained()
6288 ? Intrinsic::experimental_constrained_nearbyint
6289 : Intrinsic::nearbyint;
6292 case NEON::BI__builtin_neon_vrndmh_f16: {
6293 Int =
Builder.getIsFPConstrained()
6294 ? Intrinsic::experimental_constrained_floor
6298 case NEON::BI__builtin_neon_vrndm_v:
6299 case NEON::BI__builtin_neon_vrndmq_v: {
6300 Int =
Builder.getIsFPConstrained()
6301 ? Intrinsic::experimental_constrained_floor
6305 case NEON::BI__builtin_neon_vrndnh_f16: {
6306 Int =
Builder.getIsFPConstrained()
6307 ? Intrinsic::experimental_constrained_roundeven
6308 : Intrinsic::roundeven;
6311 case NEON::BI__builtin_neon_vrndn_v:
6312 case NEON::BI__builtin_neon_vrndnq_v: {
6313 Int =
Builder.getIsFPConstrained()
6314 ? Intrinsic::experimental_constrained_roundeven
6315 : Intrinsic::roundeven;
6318 case NEON::BI__builtin_neon_vrndns_f32: {
6319 Int =
Builder.getIsFPConstrained()
6320 ? Intrinsic::experimental_constrained_roundeven
6321 : Intrinsic::roundeven;
6324 case NEON::BI__builtin_neon_vrndph_f16: {
6325 Int =
Builder.getIsFPConstrained()
6326 ? Intrinsic::experimental_constrained_ceil
6330 case NEON::BI__builtin_neon_vrndp_v:
6331 case NEON::BI__builtin_neon_vrndpq_v: {
6332 Int =
Builder.getIsFPConstrained()
6333 ? Intrinsic::experimental_constrained_ceil
6337 case NEON::BI__builtin_neon_vrndxh_f16: {
6338 Int =
Builder.getIsFPConstrained()
6339 ? Intrinsic::experimental_constrained_rint
6343 case NEON::BI__builtin_neon_vrndx_v:
6344 case NEON::BI__builtin_neon_vrndxq_v: {
6345 Int =
Builder.getIsFPConstrained()
6346 ? Intrinsic::experimental_constrained_rint
6350 case NEON::BI__builtin_neon_vrndh_f16: {
6351 Int =
Builder.getIsFPConstrained()
6352 ? Intrinsic::experimental_constrained_trunc
6356 case NEON::BI__builtin_neon_vrnd32x_f32:
6357 case NEON::BI__builtin_neon_vrnd32xq_f32:
6358 case NEON::BI__builtin_neon_vrnd32x_f64:
6359 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6360 Int = Intrinsic::aarch64_neon_frint32x;
6363 case NEON::BI__builtin_neon_vrnd32z_f32:
6364 case NEON::BI__builtin_neon_vrnd32zq_f32:
6365 case NEON::BI__builtin_neon_vrnd32z_f64:
6366 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6367 Int = Intrinsic::aarch64_neon_frint32z;
6370 case NEON::BI__builtin_neon_vrnd64x_f32:
6371 case NEON::BI__builtin_neon_vrnd64xq_f32:
6372 case NEON::BI__builtin_neon_vrnd64x_f64:
6373 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6374 Int = Intrinsic::aarch64_neon_frint64x;
6377 case NEON::BI__builtin_neon_vrnd64z_f32:
6378 case NEON::BI__builtin_neon_vrnd64zq_f32:
6379 case NEON::BI__builtin_neon_vrnd64z_f64:
6380 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6381 Int = Intrinsic::aarch64_neon_frint64z;
6384 case NEON::BI__builtin_neon_vrnd_v:
6385 case NEON::BI__builtin_neon_vrndq_v: {
6386 Int =
Builder.getIsFPConstrained()
6387 ? Intrinsic::experimental_constrained_trunc
6391 case NEON::BI__builtin_neon_vcvt_f64_v:
6392 case NEON::BI__builtin_neon_vcvtq_f64_v:
6393 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6395 return usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
6396 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
6397 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6399 "unexpected vcvt_f64_f32 builtin");
6403 return Builder.CreateFPExt(Ops[0], Ty,
"vcvt");
6405 case NEON::BI__builtin_neon_vcvt_f32_f64: {
6407 "unexpected vcvt_f32_f64 builtin");
6411 return Builder.CreateFPTrunc(Ops[0], Ty,
"vcvt");
6413 case NEON::BI__builtin_neon_vcvt_s32_v:
6414 case NEON::BI__builtin_neon_vcvt_u32_v:
6415 case NEON::BI__builtin_neon_vcvt_s64_v:
6416 case NEON::BI__builtin_neon_vcvt_u64_v:
6417 case NEON::BI__builtin_neon_vcvt_s16_f16:
6418 case NEON::BI__builtin_neon_vcvt_u16_f16:
6419 case NEON::BI__builtin_neon_vcvtq_s32_v:
6420 case NEON::BI__builtin_neon_vcvtq_u32_v:
6421 case NEON::BI__builtin_neon_vcvtq_s64_v:
6422 case NEON::BI__builtin_neon_vcvtq_u64_v:
6423 case NEON::BI__builtin_neon_vcvtq_s16_f16:
6424 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
6426 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
6430 case NEON::BI__builtin_neon_vcvta_s16_f16:
6431 case NEON::BI__builtin_neon_vcvta_u16_f16:
6432 case NEON::BI__builtin_neon_vcvta_s32_v:
6433 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
6434 case NEON::BI__builtin_neon_vcvtaq_s32_v:
6435 case NEON::BI__builtin_neon_vcvta_u32_v:
6436 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
6437 case NEON::BI__builtin_neon_vcvtaq_u32_v:
6438 case NEON::BI__builtin_neon_vcvta_s64_v:
6439 case NEON::BI__builtin_neon_vcvtaq_s64_v:
6440 case NEON::BI__builtin_neon_vcvta_u64_v:
6441 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6442 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6446 case NEON::BI__builtin_neon_vcvtm_s16_f16:
6447 case NEON::BI__builtin_neon_vcvtm_s32_v:
6448 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
6449 case NEON::BI__builtin_neon_vcvtmq_s32_v:
6450 case NEON::BI__builtin_neon_vcvtm_u16_f16:
6451 case NEON::BI__builtin_neon_vcvtm_u32_v:
6452 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
6453 case NEON::BI__builtin_neon_vcvtmq_u32_v:
6454 case NEON::BI__builtin_neon_vcvtm_s64_v:
6455 case NEON::BI__builtin_neon_vcvtmq_s64_v:
6456 case NEON::BI__builtin_neon_vcvtm_u64_v:
6457 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6458 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6462 case NEON::BI__builtin_neon_vcvtn_s16_f16:
6463 case NEON::BI__builtin_neon_vcvtn_s32_v:
6464 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
6465 case NEON::BI__builtin_neon_vcvtnq_s32_v:
6466 case NEON::BI__builtin_neon_vcvtn_u16_f16:
6467 case NEON::BI__builtin_neon_vcvtn_u32_v:
6468 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
6469 case NEON::BI__builtin_neon_vcvtnq_u32_v:
6470 case NEON::BI__builtin_neon_vcvtn_s64_v:
6471 case NEON::BI__builtin_neon_vcvtnq_s64_v:
6472 case NEON::BI__builtin_neon_vcvtn_u64_v:
6473 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6474 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6478 case NEON::BI__builtin_neon_vcvtp_s16_f16:
6479 case NEON::BI__builtin_neon_vcvtp_s32_v:
6480 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
6481 case NEON::BI__builtin_neon_vcvtpq_s32_v:
6482 case NEON::BI__builtin_neon_vcvtp_u16_f16:
6483 case NEON::BI__builtin_neon_vcvtp_u32_v:
6484 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
6485 case NEON::BI__builtin_neon_vcvtpq_u32_v:
6486 case NEON::BI__builtin_neon_vcvtp_s64_v:
6487 case NEON::BI__builtin_neon_vcvtpq_s64_v:
6488 case NEON::BI__builtin_neon_vcvtp_u64_v:
6489 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6490 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6494 case NEON::BI__builtin_neon_vmulx_v:
6495 case NEON::BI__builtin_neon_vmulxq_v: {
6496 Int = Intrinsic::aarch64_neon_fmulx;
6499 case NEON::BI__builtin_neon_vmulxh_lane_f16:
6500 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
6503 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2],
"extract");
6505 Int = Intrinsic::aarch64_neon_fmulx;
6508 case NEON::BI__builtin_neon_vmul_lane_v:
6509 case NEON::BI__builtin_neon_vmul_laneq_v: {
6512 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6515 llvm::FixedVectorType *VTy =
6517 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
6518 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2],
"extract");
6522 case NEON::BI__builtin_neon_vpmaxnm_v:
6523 case NEON::BI__builtin_neon_vpmaxnmq_v: {
6524 Int = Intrinsic::aarch64_neon_fmaxnmp;
6527 case NEON::BI__builtin_neon_vpminnm_v:
6528 case NEON::BI__builtin_neon_vpminnmq_v: {
6529 Int = Intrinsic::aarch64_neon_fminnmp;
6532 case NEON::BI__builtin_neon_vsqrth_f16: {
6533 Int =
Builder.getIsFPConstrained()
6534 ? Intrinsic::experimental_constrained_sqrt
6538 case NEON::BI__builtin_neon_vsqrt_v:
6539 case NEON::BI__builtin_neon_vsqrtq_v: {
6540 Int =
Builder.getIsFPConstrained()
6541 ? Intrinsic::experimental_constrained_sqrt
6543 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6546 case NEON::BI__builtin_neon_vrbit_v:
6547 case NEON::BI__builtin_neon_vrbitq_v: {
6548 Int = Intrinsic::bitreverse;
6551 case NEON::BI__builtin_neon_vmaxv_f16: {
6552 Int = Intrinsic::aarch64_neon_fmaxv;
6554 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
6555 llvm::Type *Tys[2] = {Ty, VTy};
6559 case NEON::BI__builtin_neon_vmaxvq_f16: {
6560 Int = Intrinsic::aarch64_neon_fmaxv;
6562 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
6563 llvm::Type *Tys[2] = {Ty, VTy};
6567 case NEON::BI__builtin_neon_vminv_f16: {
6568 Int = Intrinsic::aarch64_neon_fminv;
6570 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
6571 llvm::Type *Tys[2] = {Ty, VTy};
6575 case NEON::BI__builtin_neon_vminvq_f16: {
6576 Int = Intrinsic::aarch64_neon_fminv;
6578 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
6579 llvm::Type *Tys[2] = {Ty, VTy};
6583 case NEON::BI__builtin_neon_vmaxnmv_f16: {
6584 Int = Intrinsic::aarch64_neon_fmaxnmv;
6586 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
6587 llvm::Type *Tys[2] = {Ty, VTy};
6591 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
6592 Int = Intrinsic::aarch64_neon_fmaxnmv;
6594 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
6595 llvm::Type *Tys[2] = {Ty, VTy};
6599 case NEON::BI__builtin_neon_vminnmv_f16: {
6600 Int = Intrinsic::aarch64_neon_fminnmv;
6602 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
6603 llvm::Type *Tys[2] = {Ty, VTy};
6607 case NEON::BI__builtin_neon_vminnmvq_f16: {
6608 Int = Intrinsic::aarch64_neon_fminnmv;
6610 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
6611 llvm::Type *Tys[2] = {Ty, VTy};
6615 case NEON::BI__builtin_neon_vmul_n_f64: {
6618 return Builder.CreateFMul(Ops[0], RHS);
6620 case NEON::BI__builtin_neon_vaddlv_u8: {
6621 Int = Intrinsic::aarch64_neon_uaddlv;
6623 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
6624 llvm::Type *Tys[2] = {Ty, VTy};
6628 case NEON::BI__builtin_neon_vaddlv_u16: {
6629 Int = Intrinsic::aarch64_neon_uaddlv;
6631 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
6632 llvm::Type *Tys[2] = {Ty, VTy};
6635 case NEON::BI__builtin_neon_vaddlvq_u8: {
6636 Int = Intrinsic::aarch64_neon_uaddlv;
6638 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
6639 llvm::Type *Tys[2] = {Ty, VTy};
6643 case NEON::BI__builtin_neon_vaddlvq_u16: {
6644 Int = Intrinsic::aarch64_neon_uaddlv;
6646 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
6647 llvm::Type *Tys[2] = {Ty, VTy};
6650 case NEON::BI__builtin_neon_vaddlv_s8: {
6651 Int = Intrinsic::aarch64_neon_saddlv;
6653 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
6654 llvm::Type *Tys[2] = {Ty, VTy};
6658 case NEON::BI__builtin_neon_vaddlv_s16: {
6659 Int = Intrinsic::aarch64_neon_saddlv;
6661 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
6662 llvm::Type *Tys[2] = {Ty, VTy};
6665 case NEON::BI__builtin_neon_vaddlvq_s8: {
6666 Int = Intrinsic::aarch64_neon_saddlv;
6668 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
6669 llvm::Type *Tys[2] = {Ty, VTy};
6673 case NEON::BI__builtin_neon_vaddlvq_s16: {
6674 Int = Intrinsic::aarch64_neon_saddlv;
6676 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
6677 llvm::Type *Tys[2] = {Ty, VTy};
6680 case NEON::BI__builtin_neon_vsri_n_v:
6681 case NEON::BI__builtin_neon_vsriq_n_v: {
6682 Int = Intrinsic::aarch64_neon_vsri;
6683 llvm::Function *Intrin =
CGM.getIntrinsic(Int, Ty);
6686 case NEON::BI__builtin_neon_vsli_n_v:
6687 case NEON::BI__builtin_neon_vsliq_n_v: {
6688 Int = Intrinsic::aarch64_neon_vsli;
6689 llvm::Function *Intrin =
CGM.getIntrinsic(Int, Ty);
6692 case NEON::BI__builtin_neon_vsra_n_v:
6693 case NEON::BI__builtin_neon_vsraq_n_v:
6694 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6696 return Builder.CreateAdd(Ops[0], Ops[1]);
6697 case NEON::BI__builtin_neon_vrsra_n_v:
6698 case NEON::BI__builtin_neon_vrsraq_n_v: {
6699 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6701 TmpOps.push_back(Ops[1]);
6702 TmpOps.push_back(Ops[2]);
6704 llvm::Value *tmp =
EmitNeonCall(F, TmpOps,
"vrshr_n", 1,
true);
6705 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
6706 return Builder.CreateAdd(Ops[0], tmp);
6708 case NEON::BI__builtin_neon_vld1_v:
6709 case NEON::BI__builtin_neon_vld1q_v: {
6712 case NEON::BI__builtin_neon_vst1_v:
6713 case NEON::BI__builtin_neon_vst1q_v:
6714 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
6716 case NEON::BI__builtin_neon_vld1_lane_v:
6717 case NEON::BI__builtin_neon_vld1q_lane_v: {
6718 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6719 Ops[0] =
Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
6721 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vld1_lane");
6723 case NEON::BI__builtin_neon_vldap1_lane_s64:
6724 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
6725 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6726 llvm::LoadInst *LI =
Builder.CreateAlignedLoad(
6728 LI->setAtomic(llvm::AtomicOrdering::Acquire);
6730 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vldap1_lane");
6732 case NEON::BI__builtin_neon_vld1_dup_v:
6733 case NEON::BI__builtin_neon_vld1q_dup_v: {
6734 Value *
V = PoisonValue::get(Ty);
6735 Ops[0] =
Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
6737 llvm::Constant *CI = ConstantInt::get(
Int32Ty, 0);
6738 Ops[0] =
Builder.CreateInsertElement(
V, Ops[0], CI);
6741 case NEON::BI__builtin_neon_vst1_lane_v:
6742 case NEON::BI__builtin_neon_vst1q_lane_v:
6743 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6744 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
6746 case NEON::BI__builtin_neon_vstl1_lane_s64:
6747 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
6748 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6749 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
6750 llvm::StoreInst *SI =
6752 SI->setAtomic(llvm::AtomicOrdering::Release);
6755 case NEON::BI__builtin_neon_vld2_v:
6756 case NEON::BI__builtin_neon_vld2q_v: {
6758 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6759 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld2");
6760 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6762 case NEON::BI__builtin_neon_vld3_v:
6763 case NEON::BI__builtin_neon_vld3q_v: {
6765 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6766 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld3");
6767 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6769 case NEON::BI__builtin_neon_vld4_v:
6770 case NEON::BI__builtin_neon_vld4q_v: {
6772 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6773 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld4");
6774 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6776 case NEON::BI__builtin_neon_vld2_dup_v:
6777 case NEON::BI__builtin_neon_vld2q_dup_v: {
6779 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6780 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld2");
6781 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6783 case NEON::BI__builtin_neon_vld3_dup_v:
6784 case NEON::BI__builtin_neon_vld3q_dup_v: {
6786 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6787 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld3");
6788 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6790 case NEON::BI__builtin_neon_vld4_dup_v:
6791 case NEON::BI__builtin_neon_vld4q_dup_v: {
6793 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6794 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld4");
6795 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6797 case NEON::BI__builtin_neon_vld2_lane_v:
6798 case NEON::BI__builtin_neon_vld2q_lane_v: {
6799 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6800 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6801 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6802 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6803 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6806 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6808 case NEON::BI__builtin_neon_vld3_lane_v:
6809 case NEON::BI__builtin_neon_vld3q_lane_v: {
6810 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6811 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6812 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6813 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6814 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6815 Ops[3] =
Builder.CreateBitCast(Ops[3], Ty);
6818 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6820 case NEON::BI__builtin_neon_vld4_lane_v:
6821 case NEON::BI__builtin_neon_vld4q_lane_v: {
6822 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6823 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6824 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6825 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6826 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6827 Ops[3] =
Builder.CreateBitCast(Ops[3], Ty);
6828 Ops[4] =
Builder.CreateBitCast(Ops[4], Ty);
6831 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6833 case NEON::BI__builtin_neon_vst2_v:
6834 case NEON::BI__builtin_neon_vst2q_v: {
6835 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6836 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6837 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6840 case NEON::BI__builtin_neon_vst2_lane_v:
6841 case NEON::BI__builtin_neon_vst2q_lane_v: {
6842 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6844 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6845 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6848 case NEON::BI__builtin_neon_vst3_v:
6849 case NEON::BI__builtin_neon_vst3q_v: {
6850 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6851 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6852 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6855 case NEON::BI__builtin_neon_vst3_lane_v:
6856 case NEON::BI__builtin_neon_vst3q_lane_v: {
6857 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6859 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6860 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6863 case NEON::BI__builtin_neon_vst4_v:
6864 case NEON::BI__builtin_neon_vst4q_v: {
6865 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6866 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6867 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6870 case NEON::BI__builtin_neon_vst4_lane_v:
6871 case NEON::BI__builtin_neon_vst4q_lane_v: {
6872 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6874 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6875 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6878 case NEON::BI__builtin_neon_vtrn_v:
6879 case NEON::BI__builtin_neon_vtrnq_v: {
6880 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6881 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6882 Value *SV =
nullptr;
6884 for (
unsigned vi = 0; vi != 2; ++vi) {
6886 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6887 Indices.push_back(i+vi);
6888 Indices.push_back(i+e+vi);
6891 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vtrn");
6896 case NEON::BI__builtin_neon_vuzp_v:
6897 case NEON::BI__builtin_neon_vuzpq_v: {
6898 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6899 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6900 Value *SV =
nullptr;
6902 for (
unsigned vi = 0; vi != 2; ++vi) {
6904 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6905 Indices.push_back(2*i+vi);
6908 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vuzp");
6913 case NEON::BI__builtin_neon_vzip_v:
6914 case NEON::BI__builtin_neon_vzipq_v: {
6915 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6916 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6917 Value *SV =
nullptr;
6919 for (
unsigned vi = 0; vi != 2; ++vi) {
6921 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6922 Indices.push_back((i + vi*e) >> 1);
6923 Indices.push_back(((i + vi*e) >> 1)+e);
6926 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vzip");
6931 case NEON::BI__builtin_neon_vqtbl1q_v: {
6932 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6935 case NEON::BI__builtin_neon_vqtbl2q_v: {
6936 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6939 case NEON::BI__builtin_neon_vqtbl3q_v: {
6940 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6943 case NEON::BI__builtin_neon_vqtbl4q_v: {
6944 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6947 case NEON::BI__builtin_neon_vqtbx1q_v: {
6948 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6951 case NEON::BI__builtin_neon_vqtbx2q_v: {
6952 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6955 case NEON::BI__builtin_neon_vqtbx3q_v: {
6956 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
6959 case NEON::BI__builtin_neon_vqtbx4q_v: {
6960 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
6963 case NEON::BI__builtin_neon_vsqadd_v:
6964 case NEON::BI__builtin_neon_vsqaddq_v: {
6965 Int = Intrinsic::aarch64_neon_usqadd;
6968 case NEON::BI__builtin_neon_vuqadd_v:
6969 case NEON::BI__builtin_neon_vuqaddq_v: {
6970 Int = Intrinsic::aarch64_neon_suqadd;
6974 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
6975 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
6976 case NEON::BI__builtin_neon_vluti2_laneq_f16:
6977 case NEON::BI__builtin_neon_vluti2_laneq_p16:
6978 case NEON::BI__builtin_neon_vluti2_laneq_p8:
6979 case NEON::BI__builtin_neon_vluti2_laneq_s16:
6980 case NEON::BI__builtin_neon_vluti2_laneq_s8:
6981 case NEON::BI__builtin_neon_vluti2_laneq_u16:
6982 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
6983 Int = Intrinsic::aarch64_neon_vluti2_laneq;
6990 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
6991 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
6992 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
6993 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
6994 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
6995 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
6996 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
6997 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
6998 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
6999 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7006 case NEON::BI__builtin_neon_vluti2_lane_mf8:
7007 case NEON::BI__builtin_neon_vluti2_lane_bf16:
7008 case NEON::BI__builtin_neon_vluti2_lane_f16:
7009 case NEON::BI__builtin_neon_vluti2_lane_p16:
7010 case NEON::BI__builtin_neon_vluti2_lane_p8:
7011 case NEON::BI__builtin_neon_vluti2_lane_s16:
7012 case NEON::BI__builtin_neon_vluti2_lane_s8:
7013 case NEON::BI__builtin_neon_vluti2_lane_u16:
7014 case NEON::BI__builtin_neon_vluti2_lane_u8: {
7015 Int = Intrinsic::aarch64_neon_vluti2_lane;
7022 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
7023 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
7024 case NEON::BI__builtin_neon_vluti2q_lane_f16:
7025 case NEON::BI__builtin_neon_vluti2q_lane_p16:
7026 case NEON::BI__builtin_neon_vluti2q_lane_p8:
7027 case NEON::BI__builtin_neon_vluti2q_lane_s16:
7028 case NEON::BI__builtin_neon_vluti2q_lane_s8:
7029 case NEON::BI__builtin_neon_vluti2q_lane_u16:
7030 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
7031 Int = Intrinsic::aarch64_neon_vluti2_lane;
7038 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
7039 case NEON::BI__builtin_neon_vluti4q_lane_p8:
7040 case NEON::BI__builtin_neon_vluti4q_lane_s8:
7041 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
7042 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7045 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7046 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7047 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7048 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7049 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7052 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7053 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7054 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7055 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7056 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7057 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7058 return EmitNeonCall(
CGM.getIntrinsic(Int, Ty), Ops,
"vluti4q_lane_x2");
7060 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7061 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7062 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7063 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7064 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7065 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7066 return EmitNeonCall(
CGM.getIntrinsic(Int, Ty), Ops,
"vluti4q_laneq_x2");
7068 case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
7070 {llvm::FixedVectorType::get(
HalfTy, 8),
7071 llvm::FixedVectorType::get(
Int8Ty, 16)},
7073 case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
7075 {llvm::FixedVectorType::get(
FloatTy, 4),
7076 llvm::FixedVectorType::get(
Int8Ty, 16)},
7078 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7081 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7082 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7084 llvm::FixedVectorType::get(
BFloatTy, 8),
7085 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt1");
7086 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7089 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7090 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7092 llvm::FixedVectorType::get(
BFloatTy, 8),
7093 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt2");
7094 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7097 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7098 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7100 llvm::FixedVectorType::get(
HalfTy, 8),
7101 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt1");
7102 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7105 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7106 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7108 llvm::FixedVectorType::get(
HalfTy, 8),
7109 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt2");
7110 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7112 llvm::FixedVectorType::get(
Int8Ty, 8),
7113 Ops[0]->
getType(),
false, Ops, E,
"vfcvtn");
7114 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7116 llvm::FixedVectorType::get(
Int8Ty, 8),
7117 llvm::FixedVectorType::get(
HalfTy, 4),
false, Ops,
7119 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7121 llvm::FixedVectorType::get(
Int8Ty, 16),
7122 llvm::FixedVectorType::get(
HalfTy, 8),
false, Ops,
7124 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7125 llvm::Type *Ty = llvm::FixedVectorType::get(
Int8Ty, 16);
7126 Ops[0] =
Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7129 Ops[1]->
getType(),
false, Ops, E,
"vfcvtn2");
7132 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7133 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7136 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7137 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7138 ExtendLaneArg =
true;
7140 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7141 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7143 ExtendLaneArg,
HalfTy, Ops, E,
"fdot2_lane");
7144 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7145 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7148 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7149 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7150 ExtendLaneArg =
true;
7152 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7153 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7155 ExtendLaneArg,
FloatTy, Ops, E,
"fdot4_lane");
7157 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7159 {llvm::FixedVectorType::get(
HalfTy, 8)}, Ops, E,
7161 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7163 {llvm::FixedVectorType::get(
HalfTy, 8)}, Ops, E,
7165 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7167 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7169 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7171 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7173 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7175 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7177 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7179 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7181 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7182 ExtendLaneArg =
true;
7184 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7186 ExtendLaneArg,
HalfTy, Ops, E,
"vmlal_lane");
7187 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7188 ExtendLaneArg =
true;
7190 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7192 ExtendLaneArg,
HalfTy, Ops, E,
"vmlal_lane");
7193 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7194 ExtendLaneArg =
true;
7196 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7198 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7199 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7200 ExtendLaneArg =
true;
7202 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7204 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7205 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7206 ExtendLaneArg =
true;
7208 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7210 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7211 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7212 ExtendLaneArg =
true;
7214 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7216 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7217 case NEON::BI__builtin_neon_vamin_f16:
7218 case NEON::BI__builtin_neon_vaminq_f16:
7219 case NEON::BI__builtin_neon_vamin_f32:
7220 case NEON::BI__builtin_neon_vaminq_f32:
7221 case NEON::BI__builtin_neon_vaminq_f64: {
7222 Int = Intrinsic::aarch64_neon_famin;
7225 case NEON::BI__builtin_neon_vamax_f16:
7226 case NEON::BI__builtin_neon_vamaxq_f16:
7227 case NEON::BI__builtin_neon_vamax_f32:
7228 case NEON::BI__builtin_neon_vamaxq_f32:
7229 case NEON::BI__builtin_neon_vamaxq_f64: {
7230 Int = Intrinsic::aarch64_neon_famax;
7233 case NEON::BI__builtin_neon_vscale_f16:
7234 case NEON::BI__builtin_neon_vscaleq_f16:
7235 case NEON::BI__builtin_neon_vscale_f32:
7236 case NEON::BI__builtin_neon_vscaleq_f32:
7237 case NEON::BI__builtin_neon_vscaleq_f64: {
7238 Int = Intrinsic::aarch64_neon_fp8_fscale;