541 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
549 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
553 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
554 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
555 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
556 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
557 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
558 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
559 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
560 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
561 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
574 NEONMAP1(vcage_v, arm_neon_vacge, 0),
575 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
576 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
577 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
578 NEONMAP1(vcale_v, arm_neon_vacge, 0),
579 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
580 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
581 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
598 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
601 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
603 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
604 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
605 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
606 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
607 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
608 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
609 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
610 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
611 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
618 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
619 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
620 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
621 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
622 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
623 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
624 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
625 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
626 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
627 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
628 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
629 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
630 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
631 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
632 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
633 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
634 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
635 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
636 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
637 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
638 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
639 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
640 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
641 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
642 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
643 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
644 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
645 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
646 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
647 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
648 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
649 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
650 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
651 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
652 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
653 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
654 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
655 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
656 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
657 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
658 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
659 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
660 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
661 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
662 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
663 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
664 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
665 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
666 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
670 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
671 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
672 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
673 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
674 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
675 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
676 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
677 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
678 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
685 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
686 NEONMAP1(vdot_u32, arm_neon_udot, 0),
687 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
688 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
699 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
700 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
701 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
703 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
704 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
705 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
706 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
707 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
708 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
710 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
711 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
712 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
713 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
714 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
716 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
717 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
718 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
719 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
720 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
722 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
723 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
724 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
733 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
734 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
752 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
753 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
777 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
778 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
782 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
783 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
806 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
807 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
811 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
812 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
813 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
814 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
815 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
816 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
826 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
827 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
828 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
829 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
830 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
831 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
832 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
833 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
835 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
836 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
837 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
839 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
840 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
841 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
843 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
844 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
850 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
851 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
852 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
1185 unsigned BuiltinID,
unsigned LLVMIntrinsic,
unsigned AltLLVMIntrinsic,
1186 const char *NameHint,
unsigned Modifier,
const CallExpr *E,
1188 llvm::Triple::ArchType
Arch) {
1194 std::optional<llvm::APSInt> NeonTypeConst =
1201 const bool Usgn =
Type.isUnsigned();
1202 const bool Quad =
Type.isQuad();
1203 const bool Floating =
Type.isFloatingPoint();
1205 const bool AllowBFloatArgsAndRet =
1208 llvm::FixedVectorType *VTy =
1209 GetNeonType(
this,
Type, HasFastHalfType,
false, AllowBFloatArgsAndRet);
1210 llvm::Type *Ty = VTy;
1214 auto getAlignmentValue32 = [&](
Address addr) ->
Value* {
1215 return Builder.getInt32(addr.getAlignment().getQuantity());
1218 unsigned Int = LLVMIntrinsic;
1220 Int = AltLLVMIntrinsic;
1222 switch (BuiltinID) {
1224 case NEON::BI__builtin_neon_splat_lane_v:
1225 case NEON::BI__builtin_neon_splat_laneq_v:
1226 case NEON::BI__builtin_neon_splatq_lane_v:
1227 case NEON::BI__builtin_neon_splatq_laneq_v: {
1228 auto NumElements = VTy->getElementCount();
1229 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1230 NumElements = NumElements * 2;
1231 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1232 NumElements = NumElements.divideCoefficientBy(2);
1234 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
1237 case NEON::BI__builtin_neon_vpadd_v:
1238 case NEON::BI__builtin_neon_vpaddq_v:
1240 if (VTy->getElementType()->isFloatingPointTy() &&
1241 Int == Intrinsic::aarch64_neon_addp)
1242 Int = Intrinsic::aarch64_neon_faddp;
1244 case NEON::BI__builtin_neon_vabs_v:
1245 case NEON::BI__builtin_neon_vabsq_v:
1246 if (VTy->getElementType()->isFloatingPointTy())
1247 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops,
"vabs");
1248 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops,
"vabs");
1249 case NEON::BI__builtin_neon_vadd_v:
1250 case NEON::BI__builtin_neon_vaddq_v: {
1251 llvm::Type *VTy = llvm::FixedVectorType::get(
Int8Ty, Quad ? 16 : 8);
1252 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
1253 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
1254 Ops[0] =
Builder.CreateXor(Ops[0], Ops[1]);
1255 return Builder.CreateBitCast(Ops[0], Ty);
1257 case NEON::BI__builtin_neon_vaddhn_v: {
1258 llvm::FixedVectorType *SrcTy =
1259 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1262 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
1263 Ops[1] =
Builder.CreateBitCast(Ops[1], SrcTy);
1264 Ops[0] =
Builder.CreateAdd(Ops[0], Ops[1],
"vaddhn");
1268 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1269 Ops[0] =
Builder.CreateLShr(Ops[0], ShiftAmt,
"vaddhn");
1272 return Builder.CreateTrunc(Ops[0], VTy,
"vaddhn");
1274 case NEON::BI__builtin_neon_vcale_v:
1275 case NEON::BI__builtin_neon_vcaleq_v:
1276 case NEON::BI__builtin_neon_vcalt_v:
1277 case NEON::BI__builtin_neon_vcaltq_v:
1278 std::swap(Ops[0], Ops[1]);
1280 case NEON::BI__builtin_neon_vcage_v:
1281 case NEON::BI__builtin_neon_vcageq_v:
1282 case NEON::BI__builtin_neon_vcagt_v:
1283 case NEON::BI__builtin_neon_vcagtq_v: {
1285 switch (VTy->getScalarSizeInBits()) {
1286 default: llvm_unreachable(
"unexpected type");
1297 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1298 llvm::Type *Tys[] = { VTy, VecFlt };
1299 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1302 case NEON::BI__builtin_neon_vceqz_v:
1303 case NEON::BI__builtin_neon_vceqzq_v:
1305 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ,
"vceqz");
1306 case NEON::BI__builtin_neon_vcgez_v:
1307 case NEON::BI__builtin_neon_vcgezq_v:
1309 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1311 case NEON::BI__builtin_neon_vclez_v:
1312 case NEON::BI__builtin_neon_vclezq_v:
1314 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1316 case NEON::BI__builtin_neon_vcgtz_v:
1317 case NEON::BI__builtin_neon_vcgtzq_v:
1319 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1321 case NEON::BI__builtin_neon_vcltz_v:
1322 case NEON::BI__builtin_neon_vcltzq_v:
1324 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1326 case NEON::BI__builtin_neon_vclz_v:
1327 case NEON::BI__builtin_neon_vclzq_v:
1332 case NEON::BI__builtin_neon_vcvt_f32_v:
1333 case NEON::BI__builtin_neon_vcvtq_f32_v:
1334 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1337 return Usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
1338 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
1339 case NEON::BI__builtin_neon_vcvt_f16_s16:
1340 case NEON::BI__builtin_neon_vcvt_f16_u16:
1341 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1342 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1343 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1346 return Usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
1347 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
1348 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1349 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1350 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1351 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1356 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1357 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1358 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1359 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1361 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1365 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1366 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1367 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1368 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1369 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1370 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1371 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1372 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1373 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1374 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1375 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1376 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1378 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1381 case NEON::BI__builtin_neon_vcvt_s32_v:
1382 case NEON::BI__builtin_neon_vcvt_u32_v:
1383 case NEON::BI__builtin_neon_vcvt_s64_v:
1384 case NEON::BI__builtin_neon_vcvt_u64_v:
1385 case NEON::BI__builtin_neon_vcvt_s16_f16:
1386 case NEON::BI__builtin_neon_vcvt_u16_f16:
1387 case NEON::BI__builtin_neon_vcvtq_s32_v:
1388 case NEON::BI__builtin_neon_vcvtq_u32_v:
1389 case NEON::BI__builtin_neon_vcvtq_s64_v:
1390 case NEON::BI__builtin_neon_vcvtq_u64_v:
1391 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1392 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1396 if (!
Builder.getIsFPConstrained())
1397 Int = Usgn ? Intrinsic::fptoui_sat : Intrinsic::fptosi_sat;
1398 llvm::Type *Tys[2] = {Ty, Ops[0]->getType()};
1403 return Usgn ?
Builder.CreateFPToUI(Ops[0], Ty,
"vcvt")
1404 :
Builder.CreateFPToSI(Ops[0], Ty,
"vcvt");
1406 case NEON::BI__builtin_neon_vcvta_s16_f16:
1407 case NEON::BI__builtin_neon_vcvta_s32_v:
1408 case NEON::BI__builtin_neon_vcvta_s64_v:
1409 case NEON::BI__builtin_neon_vcvta_u16_f16:
1410 case NEON::BI__builtin_neon_vcvta_u32_v:
1411 case NEON::BI__builtin_neon_vcvta_u64_v:
1412 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
1413 case NEON::BI__builtin_neon_vcvtaq_s32_v:
1414 case NEON::BI__builtin_neon_vcvtaq_s64_v:
1415 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
1416 case NEON::BI__builtin_neon_vcvtaq_u32_v:
1417 case NEON::BI__builtin_neon_vcvtaq_u64_v:
1418 case NEON::BI__builtin_neon_vcvtn_s16_f16:
1419 case NEON::BI__builtin_neon_vcvtn_s32_v:
1420 case NEON::BI__builtin_neon_vcvtn_s64_v:
1421 case NEON::BI__builtin_neon_vcvtn_u16_f16:
1422 case NEON::BI__builtin_neon_vcvtn_u32_v:
1423 case NEON::BI__builtin_neon_vcvtn_u64_v:
1424 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
1425 case NEON::BI__builtin_neon_vcvtnq_s32_v:
1426 case NEON::BI__builtin_neon_vcvtnq_s64_v:
1427 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
1428 case NEON::BI__builtin_neon_vcvtnq_u32_v:
1429 case NEON::BI__builtin_neon_vcvtnq_u64_v:
1430 case NEON::BI__builtin_neon_vcvtp_s16_f16:
1431 case NEON::BI__builtin_neon_vcvtp_s32_v:
1432 case NEON::BI__builtin_neon_vcvtp_s64_v:
1433 case NEON::BI__builtin_neon_vcvtp_u16_f16:
1434 case NEON::BI__builtin_neon_vcvtp_u32_v:
1435 case NEON::BI__builtin_neon_vcvtp_u64_v:
1436 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
1437 case NEON::BI__builtin_neon_vcvtpq_s32_v:
1438 case NEON::BI__builtin_neon_vcvtpq_s64_v:
1439 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
1440 case NEON::BI__builtin_neon_vcvtpq_u32_v:
1441 case NEON::BI__builtin_neon_vcvtpq_u64_v:
1442 case NEON::BI__builtin_neon_vcvtm_s16_f16:
1443 case NEON::BI__builtin_neon_vcvtm_s32_v:
1444 case NEON::BI__builtin_neon_vcvtm_s64_v:
1445 case NEON::BI__builtin_neon_vcvtm_u16_f16:
1446 case NEON::BI__builtin_neon_vcvtm_u32_v:
1447 case NEON::BI__builtin_neon_vcvtm_u64_v:
1448 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
1449 case NEON::BI__builtin_neon_vcvtmq_s32_v:
1450 case NEON::BI__builtin_neon_vcvtmq_s64_v:
1451 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
1452 case NEON::BI__builtin_neon_vcvtmq_u32_v:
1453 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
1455 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
1457 case NEON::BI__builtin_neon_vcvtx_f32_v: {
1458 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
1459 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
1462 case NEON::BI__builtin_neon_vext_v:
1463 case NEON::BI__builtin_neon_vextq_v: {
1466 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1467 Indices.push_back(i+CV);
1469 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1470 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
1471 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices,
"vext");
1473 case NEON::BI__builtin_neon_vfma_v:
1474 case NEON::BI__builtin_neon_vfmaq_v: {
1475 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1476 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
1477 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
1481 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
1482 {Ops[1], Ops[2], Ops[0]});
1484 case NEON::BI__builtin_neon_vld1_v:
1485 case NEON::BI__builtin_neon_vld1q_v: {
1487 Ops.push_back(getAlignmentValue32(PtrOp0));
1488 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops,
"vld1");
1490 case NEON::BI__builtin_neon_vld1_x2_v:
1491 case NEON::BI__builtin_neon_vld1q_x2_v:
1492 case NEON::BI__builtin_neon_vld1_x3_v:
1493 case NEON::BI__builtin_neon_vld1q_x3_v:
1494 case NEON::BI__builtin_neon_vld1_x4_v:
1495 case NEON::BI__builtin_neon_vld1q_x4_v: {
1497 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1498 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld1xN");
1499 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1501 case NEON::BI__builtin_neon_vld2_v:
1502 case NEON::BI__builtin_neon_vld2q_v:
1503 case NEON::BI__builtin_neon_vld3_v:
1504 case NEON::BI__builtin_neon_vld3q_v:
1505 case NEON::BI__builtin_neon_vld4_v:
1506 case NEON::BI__builtin_neon_vld4q_v:
1507 case NEON::BI__builtin_neon_vld2_dup_v:
1508 case NEON::BI__builtin_neon_vld2q_dup_v:
1509 case NEON::BI__builtin_neon_vld3_dup_v:
1510 case NEON::BI__builtin_neon_vld3q_dup_v:
1511 case NEON::BI__builtin_neon_vld4_dup_v:
1512 case NEON::BI__builtin_neon_vld4q_dup_v: {
1514 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1515 Value *Align = getAlignmentValue32(PtrOp1);
1516 Ops[1] =
Builder.CreateCall(F, {Ops[1], Align}, NameHint);
1517 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1519 case NEON::BI__builtin_neon_vld1_dup_v:
1520 case NEON::BI__builtin_neon_vld1q_dup_v: {
1521 Value *
V = PoisonValue::get(Ty);
1523 LoadInst *Ld =
Builder.CreateLoad(PtrOp0);
1524 llvm::Constant *CI = ConstantInt::get(
SizeTy, 0);
1525 Ops[0] =
Builder.CreateInsertElement(
V, Ld, CI);
1528 case NEON::BI__builtin_neon_vld2_lane_v:
1529 case NEON::BI__builtin_neon_vld2q_lane_v:
1530 case NEON::BI__builtin_neon_vld3_lane_v:
1531 case NEON::BI__builtin_neon_vld3q_lane_v:
1532 case NEON::BI__builtin_neon_vld4_lane_v:
1533 case NEON::BI__builtin_neon_vld4q_lane_v: {
1535 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1536 for (
unsigned I = 2; I < Ops.size() - 1; ++I)
1537 Ops[I] =
Builder.CreateBitCast(Ops[I], Ty);
1538 Ops.push_back(getAlignmentValue32(PtrOp1));
1540 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1542 case NEON::BI__builtin_neon_vmovl_v: {
1543 llvm::FixedVectorType *DTy =
1544 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
1545 Ops[0] =
Builder.CreateBitCast(Ops[0], DTy);
1547 return Builder.CreateZExt(Ops[0], Ty,
"vmovl");
1548 return Builder.CreateSExt(Ops[0], Ty,
"vmovl");
1550 case NEON::BI__builtin_neon_vmovn_v: {
1551 llvm::FixedVectorType *QTy =
1552 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1553 Ops[0] =
Builder.CreateBitCast(Ops[0], QTy);
1554 return Builder.CreateTrunc(Ops[0], Ty,
"vmovn");
1556 case NEON::BI__builtin_neon_vmull_v:
1562 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
1563 Int =
Type.isPoly() ? (
unsigned)Intrinsic::arm_neon_vmullp : Int;
1565 case NEON::BI__builtin_neon_vpadal_v:
1566 case NEON::BI__builtin_neon_vpadalq_v: {
1568 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1572 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
1573 llvm::Type *Tys[2] = { Ty, NarrowTy };
1576 case NEON::BI__builtin_neon_vpaddl_v:
1577 case NEON::BI__builtin_neon_vpaddlq_v: {
1579 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1580 llvm::Type *EltTy = llvm::IntegerType::get(
getLLVMContext(), EltBits / 2);
1582 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
1583 llvm::Type *Tys[2] = { Ty, NarrowTy };
1586 case NEON::BI__builtin_neon_vqdmlal_v:
1587 case NEON::BI__builtin_neon_vqdmlsl_v: {
1592 return EmitNeonCall(
CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
1594 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
1595 case NEON::BI__builtin_neon_vqdmulh_lane_v:
1596 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
1597 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
1599 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
1600 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
1601 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
1602 RTy->getNumElements() * 2);
1603 llvm::Type *Tys[2] = {
1608 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
1609 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
1610 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
1611 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
1612 llvm::Type *Tys[2] = {
1617 case NEON::BI__builtin_neon_vqshl_n_v:
1618 case NEON::BI__builtin_neon_vqshlq_n_v:
1621 case NEON::BI__builtin_neon_vqshlu_n_v:
1622 case NEON::BI__builtin_neon_vqshluq_n_v:
1625 case NEON::BI__builtin_neon_vrecpe_v:
1626 case NEON::BI__builtin_neon_vrecpeq_v:
1627 case NEON::BI__builtin_neon_vrsqrte_v:
1628 case NEON::BI__builtin_neon_vrsqrteq_v:
1629 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
1631 case NEON::BI__builtin_neon_vrndi_v:
1632 case NEON::BI__builtin_neon_vrndiq_v:
1633 Int =
Builder.getIsFPConstrained()
1634 ? Intrinsic::experimental_constrained_nearbyint
1635 : Intrinsic::nearbyint;
1637 case NEON::BI__builtin_neon_vrshr_n_v:
1638 case NEON::BI__builtin_neon_vrshrq_n_v:
1641 case NEON::BI__builtin_neon_vsha512hq_u64:
1642 case NEON::BI__builtin_neon_vsha512h2q_u64:
1643 case NEON::BI__builtin_neon_vsha512su0q_u64:
1644 case NEON::BI__builtin_neon_vsha512su1q_u64: {
1648 case NEON::BI__builtin_neon_vshl_n_v:
1649 case NEON::BI__builtin_neon_vshlq_n_v:
1651 return Builder.CreateShl(
Builder.CreateBitCast(Ops[0],Ty), Ops[1],
1653 case NEON::BI__builtin_neon_vshll_n_v: {
1654 llvm::FixedVectorType *SrcTy =
1655 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
1656 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
1658 Ops[0] =
Builder.CreateZExt(Ops[0], VTy);
1660 Ops[0] =
Builder.CreateSExt(Ops[0], VTy);
1662 return Builder.CreateShl(Ops[0], Ops[1],
"vshll_n");
1664 case NEON::BI__builtin_neon_vshrn_n_v: {
1665 llvm::FixedVectorType *SrcTy =
1666 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1667 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
1670 Ops[0] =
Builder.CreateLShr(Ops[0], Ops[1]);
1672 Ops[0] =
Builder.CreateAShr(Ops[0], Ops[1]);
1673 return Builder.CreateTrunc(Ops[0], Ty,
"vshrn_n");
1675 case NEON::BI__builtin_neon_vshr_n_v:
1676 case NEON::BI__builtin_neon_vshrq_n_v:
1678 case NEON::BI__builtin_neon_vst1_v:
1679 case NEON::BI__builtin_neon_vst1q_v:
1680 case NEON::BI__builtin_neon_vst2_v:
1681 case NEON::BI__builtin_neon_vst2q_v:
1682 case NEON::BI__builtin_neon_vst3_v:
1683 case NEON::BI__builtin_neon_vst3q_v:
1684 case NEON::BI__builtin_neon_vst4_v:
1685 case NEON::BI__builtin_neon_vst4q_v:
1686 case NEON::BI__builtin_neon_vst2_lane_v:
1687 case NEON::BI__builtin_neon_vst2q_lane_v:
1688 case NEON::BI__builtin_neon_vst3_lane_v:
1689 case NEON::BI__builtin_neon_vst3q_lane_v:
1690 case NEON::BI__builtin_neon_vst4_lane_v:
1691 case NEON::BI__builtin_neon_vst4q_lane_v: {
1693 Ops.push_back(getAlignmentValue32(PtrOp0));
1696 case NEON::BI__builtin_neon_vsm3partw1q_u32:
1697 case NEON::BI__builtin_neon_vsm3partw2q_u32:
1698 case NEON::BI__builtin_neon_vsm3ss1q_u32:
1699 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
1700 case NEON::BI__builtin_neon_vsm4eq_u32: {
1704 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
1705 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
1706 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
1707 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
1712 case NEON::BI__builtin_neon_vst1_x2_v:
1713 case NEON::BI__builtin_neon_vst1q_x2_v:
1714 case NEON::BI__builtin_neon_vst1_x3_v:
1715 case NEON::BI__builtin_neon_vst1q_x3_v:
1716 case NEON::BI__builtin_neon_vst1_x4_v:
1717 case NEON::BI__builtin_neon_vst1q_x4_v: {
1720 if (
Arch == llvm::Triple::aarch64 ||
Arch == llvm::Triple::aarch64_be ||
1721 Arch == llvm::Triple::aarch64_32) {
1723 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
1729 case NEON::BI__builtin_neon_vsubhn_v: {
1730 llvm::FixedVectorType *SrcTy =
1731 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1734 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
1735 Ops[1] =
Builder.CreateBitCast(Ops[1], SrcTy);
1736 Ops[0] =
Builder.CreateSub(Ops[0], Ops[1],
"vsubhn");
1740 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1741 Ops[0] =
Builder.CreateLShr(Ops[0], ShiftAmt,
"vsubhn");
1744 return Builder.CreateTrunc(Ops[0], VTy,
"vsubhn");
1746 case NEON::BI__builtin_neon_vtrn_v:
1747 case NEON::BI__builtin_neon_vtrnq_v: {
1748 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
1749 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
1750 Value *SV =
nullptr;
1752 for (
unsigned vi = 0; vi != 2; ++vi) {
1754 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1755 Indices.push_back(i+vi);
1756 Indices.push_back(i+e+vi);
1759 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vtrn");
1764 case NEON::BI__builtin_neon_vtst_v:
1765 case NEON::BI__builtin_neon_vtstq_v: {
1766 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1767 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
1768 Ops[0] =
Builder.CreateAnd(Ops[0], Ops[1]);
1769 Ops[0] =
Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
1770 ConstantAggregateZero::get(Ty));
1771 return Builder.CreateSExt(Ops[0], Ty,
"vtst");
1773 case NEON::BI__builtin_neon_vuzp_v:
1774 case NEON::BI__builtin_neon_vuzpq_v: {
1775 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
1776 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
1777 Value *SV =
nullptr;
1779 for (
unsigned vi = 0; vi != 2; ++vi) {
1781 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1782 Indices.push_back(2*i+vi);
1785 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vuzp");
1790 case NEON::BI__builtin_neon_vxarq_u64: {
1795 case NEON::BI__builtin_neon_vzip_v:
1796 case NEON::BI__builtin_neon_vzipq_v: {
1797 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
1798 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
1799 Value *SV =
nullptr;
1801 for (
unsigned vi = 0; vi != 2; ++vi) {
1803 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1804 Indices.push_back((i + vi*e) >> 1);
1805 Indices.push_back(((i + vi*e) >> 1)+e);
1808 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vzip");
1813 case NEON::BI__builtin_neon_vdot_s32:
1814 case NEON::BI__builtin_neon_vdot_u32:
1815 case NEON::BI__builtin_neon_vdotq_s32:
1816 case NEON::BI__builtin_neon_vdotq_u32: {
1818 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1819 llvm::Type *Tys[2] = { Ty, InputTy };
1822 case NEON::BI__builtin_neon_vfmlal_low_f16:
1823 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
1825 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1826 llvm::Type *Tys[2] = { Ty, InputTy };
1829 case NEON::BI__builtin_neon_vfmlsl_low_f16:
1830 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
1832 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1833 llvm::Type *Tys[2] = { Ty, InputTy };
1836 case NEON::BI__builtin_neon_vfmlal_high_f16:
1837 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
1839 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1840 llvm::Type *Tys[2] = { Ty, InputTy };
1843 case NEON::BI__builtin_neon_vfmlsl_high_f16:
1844 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
1846 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1847 llvm::Type *Tys[2] = { Ty, InputTy };
1850 case NEON::BI__builtin_neon_vmmlaq_s32:
1851 case NEON::BI__builtin_neon_vmmlaq_u32: {
1853 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1854 llvm::Type *Tys[2] = { Ty, InputTy };
1855 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops,
"vmmla");
1857 case NEON::BI__builtin_neon_vmmlaq_f16_f16:
1858 case NEON::BI__builtin_neon_vmmlaq_f32_f16: {
1860 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1861 llvm::Type *Tys[2] = {Ty, InputTy};
1864 case NEON::BI__builtin_neon_vusmmlaq_s32: {
1866 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1867 llvm::Type *Tys[2] = { Ty, InputTy };
1870 case NEON::BI__builtin_neon_vusdot_s32:
1871 case NEON::BI__builtin_neon_vusdotq_s32: {
1873 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1874 llvm::Type *Tys[2] = { Ty, InputTy };
1877 case NEON::BI__builtin_neon_vbfdot_f32:
1878 case NEON::BI__builtin_neon_vbfdotq_f32: {
1879 llvm::Type *InputTy =
1880 llvm::FixedVectorType::get(
BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
1881 llvm::Type *Tys[2] = { Ty, InputTy };
1884 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
1885 llvm::Type *Tys[1] = { Ty };
1892 assert(Int &&
"Expected valid intrinsic number");
2167 llvm::Triple::ArchType
Arch) {
2168 if (
auto Hint = GetValueForARMHint(BuiltinID))
2171 if (BuiltinID == clang::ARM::BI__emit) {
2173 llvm::FunctionType *FTy =
2174 llvm::FunctionType::get(
VoidTy,
false);
2178 llvm_unreachable(
"Sema will ensure that the parameter is constant");
2181 uint64_t ZExtValue =
Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
2183 llvm::InlineAsm *Emit =
2184 IsThumb ? InlineAsm::get(FTy,
".inst.n 0x" + utohexstr(ZExtValue),
"",
2186 : InlineAsm::get(FTy,
".inst 0x" + utohexstr(ZExtValue),
"",
2189 return Builder.CreateCall(Emit);
2192 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2194 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
2197 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2209 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2212 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
2215 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2216 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2218 Function *F =
CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
2220 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2226 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2228 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::arm_cls), Arg,
"cls");
2230 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2232 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
2236 if (BuiltinID == clang::ARM::BI__clear_cache) {
2237 assert(E->
getNumArgs() == 2 &&
"__clear_cache takes 2 arguments");
2240 for (
unsigned i = 0; i < 2; i++)
2242 llvm::Type *Ty =
CGM.getTypes().ConvertType(FD->
getType());
2244 StringRef Name = FD->
getName();
2248 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2249 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2252 switch (BuiltinID) {
2253 default: llvm_unreachable(
"unexpected builtin");
2254 case clang::ARM::BI__builtin_arm_mcrr:
2255 F =
CGM.getIntrinsic(Intrinsic::arm_mcrr);
2257 case clang::ARM::BI__builtin_arm_mcrr2:
2258 F =
CGM.getIntrinsic(Intrinsic::arm_mcrr2);
2279 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
2282 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2283 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2286 switch (BuiltinID) {
2287 default: llvm_unreachable(
"unexpected builtin");
2288 case clang::ARM::BI__builtin_arm_mrrc:
2289 F =
CGM.getIntrinsic(Intrinsic::arm_mrrc);
2291 case clang::ARM::BI__builtin_arm_mrrc2:
2292 F =
CGM.getIntrinsic(Intrinsic::arm_mrrc2);
2299 Value *RtAndRt2 =
Builder.CreateCall(F, {Coproc, Opc1, CRm});
2309 Value *ShiftCast = llvm::ConstantInt::get(
Int64Ty, 32);
2310 RtAndRt2 =
Builder.CreateShl(Rt, ShiftCast,
"shl",
true);
2311 RtAndRt2 =
Builder.CreateOr(RtAndRt2, Rt1);
2316 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2317 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2318 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2320 BuiltinID == clang::ARM::BI__ldrexd) {
2323 switch (BuiltinID) {
2324 default: llvm_unreachable(
"unexpected builtin");
2325 case clang::ARM::BI__builtin_arm_ldaex:
2326 F =
CGM.getIntrinsic(Intrinsic::arm_ldaexd);
2328 case clang::ARM::BI__builtin_arm_ldrexd:
2329 case clang::ARM::BI__builtin_arm_ldrex:
2330 case clang::ARM::BI__ldrexd:
2331 F =
CGM.getIntrinsic(Intrinsic::arm_ldrexd);
2344 Val =
Builder.CreateShl(Val0, ShiftCst,
"shl",
true );
2345 Val =
Builder.CreateOr(Val, Val1);
2349 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2350 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2359 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2360 : Intrinsic::arm_ldrex,
2362 CallInst *Val =
Builder.CreateCall(F, LoadAddr,
"ldrex");
2366 if (RealResTy->isPointerTy())
2367 return Builder.CreateIntToPtr(Val, RealResTy);
2369 llvm::Type *IntResTy = llvm::IntegerType::get(
2371 return Builder.CreateBitCast(
Builder.CreateTruncOrBitCast(Val, IntResTy),
2376 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2377 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2378 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2381 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2382 : Intrinsic::arm_strexd);
2387 Builder.CreateStore(Val, Tmp);
2390 Val =
Builder.CreateLoad(LdPtr);
2395 return Builder.CreateCall(F, {Arg0, Arg1, StPtr},
"strexd");
2398 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
2399 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
2404 llvm::Type *StoreTy =
2407 if (StoreVal->
getType()->isPointerTy())
2410 llvm::Type *
IntTy = llvm::IntegerType::get(
2412 CGM.getDataLayout().getTypeSizeInBits(StoreVal->
getType()));
2418 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
2419 : Intrinsic::arm_strex,
2422 CallInst *CI =
Builder.CreateCall(F, {StoreVal, StoreAddr},
"strex");
2424 1, Attribute::get(
getLLVMContext(), Attribute::ElementType, StoreTy));
2428 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
2429 Function *F =
CGM.getIntrinsic(Intrinsic::arm_clrex);
2434 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
2435 switch (BuiltinID) {
2436 case clang::ARM::BI__builtin_arm_crc32b:
2437 CRCIntrinsicID = Intrinsic::arm_crc32b;
break;
2438 case clang::ARM::BI__builtin_arm_crc32cb:
2439 CRCIntrinsicID = Intrinsic::arm_crc32cb;
break;
2440 case clang::ARM::BI__builtin_arm_crc32h:
2441 CRCIntrinsicID = Intrinsic::arm_crc32h;
break;
2442 case clang::ARM::BI__builtin_arm_crc32ch:
2443 CRCIntrinsicID = Intrinsic::arm_crc32ch;
break;
2444 case clang::ARM::BI__builtin_arm_crc32w:
2445 case clang::ARM::BI__builtin_arm_crc32d:
2446 CRCIntrinsicID = Intrinsic::arm_crc32w;
break;
2447 case clang::ARM::BI__builtin_arm_crc32cw:
2448 case clang::ARM::BI__builtin_arm_crc32cd:
2449 CRCIntrinsicID = Intrinsic::arm_crc32cw;
break;
2452 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
2458 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
2459 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
2467 return Builder.CreateCall(F, {Res, Arg1b});
2472 return Builder.CreateCall(F, {Arg0, Arg1});
2476 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2477 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2478 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2479 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
2480 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
2481 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
2484 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2485 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2486 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
2489 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2490 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
2492 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2493 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
2495 llvm::Type *ValueType;
2497 if (IsPointerBuiltin) {
2500 }
else if (Is64Bit) {
2510 if (BuiltinID == ARM::BI__builtin_sponentry) {
2529 return P.first == BuiltinID;
2532 BuiltinID = It->second;
2536 unsigned ICEArguments = 0;
2541 auto getAlignmentValue32 = [&](
Address addr) ->
Value* {
2542 return Builder.getInt32(addr.getAlignment().getQuantity());
2549 unsigned NumArgs = E->
getNumArgs() - (HasExtraArg ? 1 : 0);
2550 for (
unsigned i = 0, e = NumArgs; i != e; i++) {
2552 switch (BuiltinID) {
2553 case NEON::BI__builtin_neon_vld1_v:
2554 case NEON::BI__builtin_neon_vld1q_v:
2555 case NEON::BI__builtin_neon_vld1q_lane_v:
2556 case NEON::BI__builtin_neon_vld1_lane_v:
2557 case NEON::BI__builtin_neon_vld1_dup_v:
2558 case NEON::BI__builtin_neon_vld1q_dup_v:
2559 case NEON::BI__builtin_neon_vst1_v:
2560 case NEON::BI__builtin_neon_vst1q_v:
2561 case NEON::BI__builtin_neon_vst1q_lane_v:
2562 case NEON::BI__builtin_neon_vst1_lane_v:
2563 case NEON::BI__builtin_neon_vst2_v:
2564 case NEON::BI__builtin_neon_vst2q_v:
2565 case NEON::BI__builtin_neon_vst2_lane_v:
2566 case NEON::BI__builtin_neon_vst2q_lane_v:
2567 case NEON::BI__builtin_neon_vst3_v:
2568 case NEON::BI__builtin_neon_vst3q_v:
2569 case NEON::BI__builtin_neon_vst3_lane_v:
2570 case NEON::BI__builtin_neon_vst3q_lane_v:
2571 case NEON::BI__builtin_neon_vst4_v:
2572 case NEON::BI__builtin_neon_vst4q_v:
2573 case NEON::BI__builtin_neon_vst4_lane_v:
2574 case NEON::BI__builtin_neon_vst4q_lane_v:
2583 switch (BuiltinID) {
2584 case NEON::BI__builtin_neon_vld2_v:
2585 case NEON::BI__builtin_neon_vld2q_v:
2586 case NEON::BI__builtin_neon_vld3_v:
2587 case NEON::BI__builtin_neon_vld3q_v:
2588 case NEON::BI__builtin_neon_vld4_v:
2589 case NEON::BI__builtin_neon_vld4q_v:
2590 case NEON::BI__builtin_neon_vld2_lane_v:
2591 case NEON::BI__builtin_neon_vld2q_lane_v:
2592 case NEON::BI__builtin_neon_vld3_lane_v:
2593 case NEON::BI__builtin_neon_vld3q_lane_v:
2594 case NEON::BI__builtin_neon_vld4_lane_v:
2595 case NEON::BI__builtin_neon_vld4q_lane_v:
2596 case NEON::BI__builtin_neon_vld2_dup_v:
2597 case NEON::BI__builtin_neon_vld2q_dup_v:
2598 case NEON::BI__builtin_neon_vld3_dup_v:
2599 case NEON::BI__builtin_neon_vld3q_dup_v:
2600 case NEON::BI__builtin_neon_vld4_dup_v:
2601 case NEON::BI__builtin_neon_vld4q_dup_v:
2613 switch (BuiltinID) {
2616 case NEON::BI__builtin_neon_vget_lane_i8:
2617 case NEON::BI__builtin_neon_vget_lane_i16:
2618 case NEON::BI__builtin_neon_vget_lane_i32:
2619 case NEON::BI__builtin_neon_vget_lane_i64:
2620 case NEON::BI__builtin_neon_vget_lane_bf16:
2621 case NEON::BI__builtin_neon_vget_lane_f32:
2622 case NEON::BI__builtin_neon_vgetq_lane_i8:
2623 case NEON::BI__builtin_neon_vgetq_lane_i16:
2624 case NEON::BI__builtin_neon_vgetq_lane_i32:
2625 case NEON::BI__builtin_neon_vgetq_lane_i64:
2626 case NEON::BI__builtin_neon_vgetq_lane_bf16:
2627 case NEON::BI__builtin_neon_vgetq_lane_f32:
2628 case NEON::BI__builtin_neon_vduph_lane_bf16:
2629 case NEON::BI__builtin_neon_vduph_laneq_bf16:
2630 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
2632 case NEON::BI__builtin_neon_vrndns_f32: {
2634 llvm::Type *Tys[] = {Arg->
getType()};
2635 Function *F =
CGM.getIntrinsic(Intrinsic::roundeven, Tys);
2636 return Builder.CreateCall(F, {Arg},
"vrndn"); }
2638 case NEON::BI__builtin_neon_vset_lane_i8:
2639 case NEON::BI__builtin_neon_vset_lane_i16:
2640 case NEON::BI__builtin_neon_vset_lane_i32:
2641 case NEON::BI__builtin_neon_vset_lane_i64:
2642 case NEON::BI__builtin_neon_vset_lane_bf16:
2643 case NEON::BI__builtin_neon_vset_lane_f32:
2644 case NEON::BI__builtin_neon_vsetq_lane_i8:
2645 case NEON::BI__builtin_neon_vsetq_lane_i16:
2646 case NEON::BI__builtin_neon_vsetq_lane_i32:
2647 case NEON::BI__builtin_neon_vsetq_lane_i64:
2648 case NEON::BI__builtin_neon_vsetq_lane_bf16:
2649 case NEON::BI__builtin_neon_vsetq_lane_f32:
2650 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
2652 case NEON::BI__builtin_neon_vsha1h_u32:
2655 case NEON::BI__builtin_neon_vsha1cq_u32:
2658 case NEON::BI__builtin_neon_vsha1pq_u32:
2661 case NEON::BI__builtin_neon_vsha1mq_u32:
2665 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
2666 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
2672 case clang::ARM::BI_MoveToCoprocessor:
2673 case clang::ARM::BI_MoveToCoprocessor2: {
2674 Function *F =
CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
2675 ? Intrinsic::arm_mcr
2676 : Intrinsic::arm_mcr2);
2677 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
2678 Ops[3], Ops[4], Ops[5]});
2683 assert(HasExtraArg);
2685 std::optional<llvm::APSInt>
Result =
2690 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
2691 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
2694 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
2700 bool usgn =
Result->getZExtValue() == 1;
2701 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
2705 return Builder.CreateCall(F, Ops,
"vcvtr");
2710 bool usgn =
Type.isUnsigned();
2711 bool rightShift =
false;
2713 llvm::FixedVectorType *VTy =
2716 llvm::Type *Ty = VTy;
2731 switch (BuiltinID) {
2732 default:
return nullptr;
2733 case NEON::BI__builtin_neon_vld1q_lane_v:
2736 if (VTy->getElementType()->isIntegerTy(64)) {
2738 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2740 Value *SV = llvm::ConstantVector::get(ConstantInt::get(
Int32Ty, 1-Lane));
2741 Ops[1] =
Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
2743 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
2745 Function *F =
CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
2746 Value *Align = getAlignmentValue32(PtrOp0);
2749 int Indices[] = {1 - Lane, Lane};
2750 return Builder.CreateShuffleVector(Ops[1], Ld, Indices,
"vld1q_lane");
2753 case NEON::BI__builtin_neon_vld1_lane_v: {
2754 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2757 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2],
"vld1_lane");
2759 case NEON::BI__builtin_neon_vqrshrn_n_v:
2761 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
2764 case NEON::BI__builtin_neon_vqrshrun_n_v:
2765 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
2766 Ops,
"vqrshrun_n", 1,
true);
2767 case NEON::BI__builtin_neon_vqshrn_n_v:
2768 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
2771 case NEON::BI__builtin_neon_vqshrun_n_v:
2772 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
2773 Ops,
"vqshrun_n", 1,
true);
2774 case NEON::BI__builtin_neon_vrecpe_v:
2775 case NEON::BI__builtin_neon_vrecpeq_v:
2778 case NEON::BI__builtin_neon_vrshrn_n_v:
2779 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
2780 Ops,
"vrshrn_n", 1,
true);
2781 case NEON::BI__builtin_neon_vrsra_n_v:
2782 case NEON::BI__builtin_neon_vrsraq_n_v:
2783 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
2784 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2786 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
2787 Ops[1] =
Builder.CreateCall(
CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
2788 return Builder.CreateAdd(Ops[0], Ops[1],
"vrsra_n");
2789 case NEON::BI__builtin_neon_vsri_n_v:
2790 case NEON::BI__builtin_neon_vsriq_n_v:
2793 case NEON::BI__builtin_neon_vsli_n_v:
2794 case NEON::BI__builtin_neon_vsliq_n_v:
2796 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
2798 case NEON::BI__builtin_neon_vsra_n_v:
2799 case NEON::BI__builtin_neon_vsraq_n_v:
2800 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
2802 return Builder.CreateAdd(Ops[0], Ops[1]);
2803 case NEON::BI__builtin_neon_vst1q_lane_v:
2806 if (VTy->getElementType()->isIntegerTy(64)) {
2807 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2809 Ops[1] =
Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
2810 Ops[2] = getAlignmentValue32(PtrOp0);
2811 llvm::Type *Tys[] = {
Int8PtrTy, Ops[1]->getType()};
2812 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
2816 case NEON::BI__builtin_neon_vst1_lane_v: {
2817 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2818 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
2819 return Builder.CreateStore(Ops[1],
2822 case NEON::BI__builtin_neon_vtbl1_v:
2825 case NEON::BI__builtin_neon_vtbl2_v:
2828 case NEON::BI__builtin_neon_vtbl3_v:
2831 case NEON::BI__builtin_neon_vtbl4_v:
2834 case NEON::BI__builtin_neon_vtbx1_v:
2837 case NEON::BI__builtin_neon_vtbx2_v:
2840 case NEON::BI__builtin_neon_vtbx3_v:
2843 case NEON::BI__builtin_neon_vtbx4_v:
4497 llvm::Triple::ArchType
Arch) {
4506 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
4507 return EmitAArch64CpuSupports(E);
4509 unsigned HintID =
static_cast<unsigned>(-1);
4510 switch (BuiltinID) {
4512 case clang::AArch64::BI__builtin_arm_nop:
4515 case clang::AArch64::BI__builtin_arm_yield:
4516 case clang::AArch64::BI__yield:
4519 case clang::AArch64::BI__builtin_arm_wfe:
4520 case clang::AArch64::BI__wfe:
4523 case clang::AArch64::BI__builtin_arm_wfi:
4524 case clang::AArch64::BI__wfi:
4527 case clang::AArch64::BI__builtin_arm_sev:
4528 case clang::AArch64::BI__sev:
4531 case clang::AArch64::BI__builtin_arm_sevl:
4532 case clang::AArch64::BI__sevl:
4537 if (HintID !=
static_cast<unsigned>(-1)) {
4538 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_hint);
4539 return Builder.CreateCall(F, llvm::ConstantInt::get(
Int32Ty, HintID));
4542 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
4543 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_break);
4548 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
4551 llvm::FunctionType::get(StructType::get(
CGM.Int64Ty,
CGM.Int64Ty), {},
4553 "__arm_sme_state"));
4555 "aarch64_pstate_sm_compatible");
4556 CI->setAttributes(Attrs);
4559 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
4566 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
4568 "rbit of unusual size!");
4571 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
4573 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
4575 "rbit of unusual size!");
4578 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
4581 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
4582 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
4584 Function *F =
CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
4586 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
4591 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
4593 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
4596 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
4598 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
4602 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
4603 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
4605 llvm::Type *Ty = Arg->getType();
4606 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
4610 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
4611 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
4613 llvm::Type *Ty = Arg->getType();
4614 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
4618 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
4619 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
4621 llvm::Type *Ty = Arg->getType();
4622 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
4626 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
4627 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
4629 llvm::Type *Ty = Arg->getType();
4630 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
4634 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
4636 "__jcvt of unusual size!");
4639 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
4642 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
4643 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
4644 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
4645 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
4649 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
4652 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
4653 llvm::Value *Val =
Builder.CreateCall(F, MemAddr);
4655 for (
size_t i = 0; i < 8; i++) {
4656 llvm::Value *ValOffsetPtr =
4668 Args.push_back(MemAddr);
4669 for (
size_t i = 0; i < 8; i++) {
4670 llvm::Value *ValOffsetPtr =
4676 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
4677 ? Intrinsic::aarch64_st64b
4678 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
4679 ? Intrinsic::aarch64_st64bv
4680 : Intrinsic::aarch64_st64bv0);
4682 return Builder.CreateCall(F, Args);
4685 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
4686 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
4688 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
4689 ? Intrinsic::aarch64_rndr
4690 : Intrinsic::aarch64_rndrrs);
4692 llvm::Value *Val =
Builder.CreateCall(F);
4693 Value *RandomValue =
Builder.CreateExtractValue(Val, 0);
4697 Builder.CreateStore(RandomValue, MemAddress);
4702 if (BuiltinID == clang::AArch64::BI__clear_cache) {
4703 assert(E->
getNumArgs() == 2 &&
"__clear_cache takes 2 arguments");
4706 for (
unsigned i = 0; i < 2; i++)
4708 llvm::Type *Ty =
CGM.getTypes().ConvertType(FD->
getType());
4710 StringRef Name = FD->
getName();
4714 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
4715 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
4718 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
4719 ? Intrinsic::aarch64_ldaxp
4720 : Intrinsic::aarch64_ldxp);
4727 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
4728 Val0 =
Builder.CreateZExt(Val0, Int128Ty);
4729 Val1 =
Builder.CreateZExt(Val1, Int128Ty);
4731 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
4732 Val =
Builder.CreateShl(Val0, ShiftCst,
"shl",
true );
4733 Val =
Builder.CreateOr(Val, Val1);
4735 }
else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
4736 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
4745 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
4746 ? Intrinsic::aarch64_ldaxr
4747 : Intrinsic::aarch64_ldxr,
4749 CallInst *Val =
Builder.CreateCall(F, LoadAddr,
"ldxr");
4753 if (RealResTy->isPointerTy())
4754 return Builder.CreateIntToPtr(Val, RealResTy);
4756 llvm::Type *IntResTy = llvm::IntegerType::get(
4758 return Builder.CreateBitCast(
Builder.CreateTruncOrBitCast(Val, IntResTy),
4762 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
4763 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
4766 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
4767 ? Intrinsic::aarch64_stlxp
4768 : Intrinsic::aarch64_stxp);
4775 llvm::Value *Val =
Builder.CreateLoad(Tmp);
4780 return Builder.CreateCall(F, {Arg0, Arg1, StPtr},
"stxp");
4783 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
4784 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
4789 llvm::Type *StoreTy =
4792 if (StoreVal->
getType()->isPointerTy())
4795 llvm::Type *
IntTy = llvm::IntegerType::get(
4797 CGM.getDataLayout().getTypeSizeInBits(StoreVal->
getType()));
4803 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
4804 ? Intrinsic::aarch64_stlxr
4805 : Intrinsic::aarch64_stxr,
4807 CallInst *CI =
Builder.CreateCall(F, {StoreVal, StoreAddr},
"stxr");
4809 1, Attribute::get(
getLLVMContext(), Attribute::ElementType, StoreTy));
4813 if (BuiltinID == clang::AArch64::BI__getReg) {
4816 llvm_unreachable(
"Sema will ensure that the parameter is constant");
4819 LLVMContext &Context =
CGM.getLLVMContext();
4822 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
4823 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4824 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4827 CGM.getIntrinsic(Intrinsic::read_register, {
Int64Ty});
4828 return Builder.CreateCall(F, Metadata);
4831 if (BuiltinID == clang::AArch64::BI__break) {
4834 llvm_unreachable(
"Sema will ensure that the parameter is constant");
4836 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_break);
4840 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
4841 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_clrex);
4845 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
4846 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
4847 llvm::SyncScope::SingleThread);
4850 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4851 switch (BuiltinID) {
4852 case clang::AArch64::BI__builtin_arm_crc32b:
4853 CRCIntrinsicID = Intrinsic::aarch64_crc32b;
break;
4854 case clang::AArch64::BI__builtin_arm_crc32cb:
4855 CRCIntrinsicID = Intrinsic::aarch64_crc32cb;
break;
4856 case clang::AArch64::BI__builtin_arm_crc32h:
4857 CRCIntrinsicID = Intrinsic::aarch64_crc32h;
break;
4858 case clang::AArch64::BI__builtin_arm_crc32ch:
4859 CRCIntrinsicID = Intrinsic::aarch64_crc32ch;
break;
4860 case clang::AArch64::BI__builtin_arm_crc32w:
4861 CRCIntrinsicID = Intrinsic::aarch64_crc32w;
break;
4862 case clang::AArch64::BI__builtin_arm_crc32cw:
4863 CRCIntrinsicID = Intrinsic::aarch64_crc32cw;
break;
4864 case clang::AArch64::BI__builtin_arm_crc32d:
4865 CRCIntrinsicID = Intrinsic::aarch64_crc32x;
break;
4866 case clang::AArch64::BI__builtin_arm_crc32cd:
4867 CRCIntrinsicID = Intrinsic::aarch64_crc32cx;
break;
4870 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4875 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
4876 Arg1 =
Builder.CreateZExtOrBitCast(Arg1, DataTy);
4878 return Builder.CreateCall(F, {Arg0, Arg1});
4882 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
4889 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
4892 if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch ||
4893 BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
4897 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
4898 switch (BuiltinID) {
4899 case clang::AArch64::BI__builtin_arm_irg:
4900 MTEIntrinsicID = Intrinsic::aarch64_irg;
break;
4901 case clang::AArch64::BI__builtin_arm_addg:
4902 MTEIntrinsicID = Intrinsic::aarch64_addg;
break;
4903 case clang::AArch64::BI__builtin_arm_gmi:
4904 MTEIntrinsicID = Intrinsic::aarch64_gmi;
break;
4905 case clang::AArch64::BI__builtin_arm_ldg:
4906 MTEIntrinsicID = Intrinsic::aarch64_ldg;
break;
4907 case clang::AArch64::BI__builtin_arm_stg:
4908 MTEIntrinsicID = Intrinsic::aarch64_stg;
break;
4909 case clang::AArch64::BI__builtin_arm_subp:
4910 MTEIntrinsicID = Intrinsic::aarch64_subp;
break;
4913 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
4914 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
4919 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
4922 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
4927 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
4928 {Pointer, TagOffset});
4930 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
4936 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
4941 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
4943 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
4944 {TagAddress, TagAddress});
4949 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
4951 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
4952 {TagAddress, TagAddress});
4954 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
4958 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
4962 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4963 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
4964 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4965 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
4966 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
4967 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
4968 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
4969 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
4972 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4973 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
4974 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4975 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
4978 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
4979 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
4981 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4982 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
4984 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4985 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
4987 llvm::Type *ValueType;
4991 }
else if (Is128Bit) {
4992 llvm::Type *Int128Ty =
4993 llvm::IntegerType::getInt128Ty(
CGM.getLLVMContext());
4994 ValueType = Int128Ty;
4996 }
else if (IsPointerBuiltin) {
5006 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5007 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
5008 LLVMContext &Context =
CGM.getLLVMContext();
5013 std::string SysRegStr;
5014 llvm::raw_string_ostream(SysRegStr)
5015 << (0b10 | SysReg >> 14) <<
":" << ((SysReg >> 11) & 7) <<
":"
5016 << ((SysReg >> 7) & 15) <<
":" << ((SysReg >> 3) & 15) <<
":"
5019 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
5020 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5021 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5026 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5027 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::read_register, Types);
5029 return Builder.CreateCall(F, Metadata);
5032 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::write_register, Types);
5034 llvm::Value *
Result =
Builder.CreateCall(F, {Metadata, ArgValue});
5039 if (BuiltinID == clang::AArch64::BI__sys) {
5042 const unsigned Op1 = SysReg >> 11;
5043 const unsigned CRn = (SysReg >> 7) & 0xf;
5044 const unsigned CRm = (SysReg >> 3) & 0xf;
5045 const unsigned Op2 = SysReg & 0x7;
5047 Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_sys),
5048 {Builder.getInt32(Op1), Builder.getInt32(CRn),
5049 Builder.getInt32(CRm), Builder.getInt32(Op2),
5050 EmitScalarExpr(E->getArg(1))});
5054 return ConstantInt::get(
Builder.getInt32Ty(), 0);
5057 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5063 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5068 if (BuiltinID == clang::AArch64::BI__mulh ||
5069 BuiltinID == clang::AArch64::BI__umulh) {
5071 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
5073 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5079 Value *MulResult, *HigherBits;
5081 MulResult =
Builder.CreateNSWMul(LHS, RHS);
5082 HigherBits =
Builder.CreateAShr(MulResult, 64);
5084 MulResult =
Builder.CreateNUWMul(LHS, RHS);
5085 HigherBits =
Builder.CreateLShr(MulResult, 64);
5087 HigherBits =
Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5092 if (BuiltinID == AArch64::BI__writex18byte ||
5093 BuiltinID == AArch64::BI__writex18word ||
5094 BuiltinID == AArch64::BI__writex18dword ||
5095 BuiltinID == AArch64::BI__writex18qword) {
5111 if (BuiltinID == AArch64::BI__readx18byte ||
5112 BuiltinID == AArch64::BI__readx18word ||
5113 BuiltinID == AArch64::BI__readx18dword ||
5114 BuiltinID == AArch64::BI__readx18qword) {
5129 if (BuiltinID == AArch64::BI__addx18byte ||
5130 BuiltinID == AArch64::BI__addx18word ||
5131 BuiltinID == AArch64::BI__addx18dword ||
5132 BuiltinID == AArch64::BI__addx18qword ||
5133 BuiltinID == AArch64::BI__incx18byte ||
5134 BuiltinID == AArch64::BI__incx18word ||
5135 BuiltinID == AArch64::BI__incx18dword ||
5136 BuiltinID == AArch64::BI__incx18qword) {
5139 switch (BuiltinID) {
5140 case AArch64::BI__incx18byte:
5144 case AArch64::BI__incx18word:
5148 case AArch64::BI__incx18dword:
5152 case AArch64::BI__incx18qword:
5158 isIncrement =
false;
5183 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5184 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5185 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5186 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5189 return Builder.CreateBitCast(Arg, RetTy);
5192 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5193 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5194 BuiltinID == AArch64::BI_CountLeadingZeros ||
5195 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5199 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5200 BuiltinID == AArch64::BI_CountLeadingOnes64)
5201 Arg =
Builder.CreateXor(Arg, Constant::getAllOnesValue(
ArgType));
5206 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5207 BuiltinID == AArch64::BI_CountLeadingZeros64)
5212 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5213 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5216 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5217 ?
CGM.getIntrinsic(Intrinsic::aarch64_cls)
5218 :
CGM.getIntrinsic(Intrinsic::aarch64_cls64);
5221 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5226 if (BuiltinID == AArch64::BI_CountOneBits ||
5227 BuiltinID == AArch64::BI_CountOneBits64) {
5233 if (BuiltinID == AArch64::BI_CountOneBits64)
5238 if (BuiltinID == AArch64::BI__prefetch) {
5247 if (BuiltinID == AArch64::BI__hlt) {
5248 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_hlt);
5253 return ConstantInt::get(
Builder.getInt32Ty(), 0);
5256 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5264 if (std::optional<MSVCIntrin> MsvcIntId =
5270 return P.first == BuiltinID;
5273 BuiltinID = It->second;
5279 bool IsSISD = (
Builtin !=
nullptr);
5283 unsigned ICEArguments = 0;
5294 unsigned NumArgs = E->
getNumArgs() - (HasExtraArg ? 1 : 0);
5295 for (
unsigned i = 0, e = NumArgs; i != e; i++) {
5297 switch (BuiltinID) {
5298 case NEON::BI__builtin_neon_vld1_v:
5299 case NEON::BI__builtin_neon_vld1q_v:
5300 case NEON::BI__builtin_neon_vld1_dup_v:
5301 case NEON::BI__builtin_neon_vld1q_dup_v:
5302 case NEON::BI__builtin_neon_vld1_lane_v:
5303 case NEON::BI__builtin_neon_vld1q_lane_v:
5304 case NEON::BI__builtin_neon_vst1_v:
5305 case NEON::BI__builtin_neon_vst1q_v:
5306 case NEON::BI__builtin_neon_vst1_lane_v:
5307 case NEON::BI__builtin_neon_vst1q_lane_v:
5308 case NEON::BI__builtin_neon_vldap1_lane_s64:
5309 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5310 case NEON::BI__builtin_neon_vstl1_lane_s64:
5311 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5324 assert(
Result &&
"SISD intrinsic should have been handled");
5330 if (std::optional<llvm::APSInt>
Result =
5335 bool usgn =
Type.isUnsigned();
5336 bool quad =
Type.isQuad();
5354 switch (BuiltinID) {
5356 case NEON::BI__builtin_neon_vabsh_f16:
5358 case NEON::BI__builtin_neon_vaddq_p128: {
5360 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
5361 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
5362 Ops[0] =
Builder.CreateXor(Ops[0], Ops[1]);
5363 llvm::Type *Int128Ty = llvm::Type::getIntNTy(
getLLVMContext(), 128);
5364 return Builder.CreateBitCast(Ops[0], Int128Ty);
5366 case NEON::BI__builtin_neon_vldrq_p128: {
5367 llvm::Type *Int128Ty = llvm::Type::getIntNTy(
getLLVMContext(), 128);
5368 return Builder.CreateAlignedLoad(Int128Ty, Ops[0],
5371 case NEON::BI__builtin_neon_vstrq_p128: {
5372 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5374 case NEON::BI__builtin_neon_vcvts_f32_u32:
5375 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5378 case NEON::BI__builtin_neon_vcvts_f32_s32:
5379 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5380 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5383 Ops[0] =
Builder.CreateBitCast(Ops[0], InTy);
5385 return Builder.CreateUIToFP(Ops[0], FTy);
5386 return Builder.CreateSIToFP(Ops[0], FTy);
5388 case NEON::BI__builtin_neon_vcvth_f16_u16:
5389 case NEON::BI__builtin_neon_vcvth_f16_u32:
5390 case NEON::BI__builtin_neon_vcvth_f16_u64:
5393 case NEON::BI__builtin_neon_vcvth_f16_s16:
5394 case NEON::BI__builtin_neon_vcvth_f16_s32:
5395 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5396 llvm::Type *FTy =
HalfTy;
5398 if (Ops[0]->
getType()->getPrimitiveSizeInBits() == 64)
5400 else if (Ops[0]->
getType()->getPrimitiveSizeInBits() == 32)
5404 Ops[0] =
Builder.CreateBitCast(Ops[0], InTy);
5406 return Builder.CreateUIToFP(Ops[0], FTy);
5407 return Builder.CreateSIToFP(Ops[0], FTy);
5409 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5410 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5411 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5412 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5413 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5414 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5415 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5416 case NEON::BI__builtin_neon_vcvtph_s16_f16: {
5418 llvm::Type* FTy =
HalfTy;
5419 llvm::Type *Tys[2] = {InTy, FTy};
5420 switch (BuiltinID) {
5421 default: llvm_unreachable(
"missing builtin ID in switch!");
5422 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5423 Int = Intrinsic::aarch64_neon_fcvtau;
break;
5424 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5425 Int = Intrinsic::aarch64_neon_fcvtmu;
break;
5426 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5427 Int = Intrinsic::aarch64_neon_fcvtnu;
break;
5428 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5429 Int = Intrinsic::aarch64_neon_fcvtpu;
break;
5430 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5431 Int = Intrinsic::aarch64_neon_fcvtas;
break;
5432 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5433 Int = Intrinsic::aarch64_neon_fcvtms;
break;
5434 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5435 Int = Intrinsic::aarch64_neon_fcvtns;
break;
5436 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5437 Int = Intrinsic::aarch64_neon_fcvtps;
break;
5441 case NEON::BI__builtin_neon_vcaleh_f16:
5442 case NEON::BI__builtin_neon_vcalth_f16:
5443 case NEON::BI__builtin_neon_vcageh_f16:
5444 case NEON::BI__builtin_neon_vcagth_f16: {
5446 llvm::Type* FTy =
HalfTy;
5447 llvm::Type *Tys[2] = {InTy, FTy};
5448 switch (BuiltinID) {
5449 default: llvm_unreachable(
"missing builtin ID in switch!");
5450 case NEON::BI__builtin_neon_vcageh_f16:
5451 Int = Intrinsic::aarch64_neon_facge;
break;
5452 case NEON::BI__builtin_neon_vcagth_f16:
5453 Int = Intrinsic::aarch64_neon_facgt;
break;
5454 case NEON::BI__builtin_neon_vcaleh_f16:
5455 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]);
break;
5456 case NEON::BI__builtin_neon_vcalth_f16:
5457 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]);
break;
5462 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5463 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
5465 llvm::Type* FTy =
HalfTy;
5466 llvm::Type *Tys[2] = {InTy, FTy};
5467 switch (BuiltinID) {
5468 default: llvm_unreachable(
"missing builtin ID in switch!");
5469 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5470 Int = Intrinsic::aarch64_neon_vcvtfp2fxs;
break;
5471 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
5472 Int = Intrinsic::aarch64_neon_vcvtfp2fxu;
break;
5477 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5478 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
5479 llvm::Type* FTy =
HalfTy;
5481 llvm::Type *Tys[2] = {FTy, InTy};
5482 switch (BuiltinID) {
5483 default: llvm_unreachable(
"missing builtin ID in switch!");
5484 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5485 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
5486 Ops[0] =
Builder.CreateSExt(Ops[0], InTy,
"sext");
5488 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
5489 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
5490 Ops[0] =
Builder.CreateZExt(Ops[0], InTy);
5495 case NEON::BI__builtin_neon_vpaddd_s64: {
5498 auto *Ty = llvm::FixedVectorType::get(
Int64Ty, 2);
5500 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty,
"v2i64");
5501 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
5502 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
5503 Value *Op0 =
Builder.CreateExtractElement(Ops[0], Idx0,
"lane0");
5504 Value *Op1 =
Builder.CreateExtractElement(Ops[0], Idx1,
"lane1");
5506 return Builder.CreateAdd(Op0, Op1,
"vpaddd");
5508 case NEON::BI__builtin_neon_vpaddd_f64: {
5509 auto *Ty = llvm::FixedVectorType::get(
DoubleTy, 2);
5511 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty,
"v2f64");
5512 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
5513 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
5514 Value *Op0 =
Builder.CreateExtractElement(Ops[0], Idx0,
"lane0");
5515 Value *Op1 =
Builder.CreateExtractElement(Ops[0], Idx1,
"lane1");
5517 return Builder.CreateFAdd(Op0, Op1,
"vpaddd");
5519 case NEON::BI__builtin_neon_vpadds_f32: {
5520 auto *Ty = llvm::FixedVectorType::get(
FloatTy, 2);
5522 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty,
"v2f32");
5523 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
5524 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
5525 Value *Op0 =
Builder.CreateExtractElement(Ops[0], Idx0,
"lane0");
5526 Value *Op1 =
Builder.CreateExtractElement(Ops[0], Idx1,
"lane1");
5528 return Builder.CreateFAdd(Op0, Op1,
"vpaddd");
5530 case NEON::BI__builtin_neon_vceqzd_s64:
5533 ICmpInst::ICMP_EQ,
"vceqz");
5534 case NEON::BI__builtin_neon_vceqzd_f64:
5535 case NEON::BI__builtin_neon_vceqzs_f32:
5536 case NEON::BI__builtin_neon_vceqzh_f16:
5539 ICmpInst::FCMP_OEQ,
"vceqz");
5540 case NEON::BI__builtin_neon_vcgezd_s64:
5543 ICmpInst::ICMP_SGE,
"vcgez");
5544 case NEON::BI__builtin_neon_vcgezd_f64:
5545 case NEON::BI__builtin_neon_vcgezs_f32:
5546 case NEON::BI__builtin_neon_vcgezh_f16:
5549 ICmpInst::FCMP_OGE,
"vcgez");
5550 case NEON::BI__builtin_neon_vclezd_s64:
5553 ICmpInst::ICMP_SLE,
"vclez");
5554 case NEON::BI__builtin_neon_vclezd_f64:
5555 case NEON::BI__builtin_neon_vclezs_f32:
5556 case NEON::BI__builtin_neon_vclezh_f16:
5559 ICmpInst::FCMP_OLE,
"vclez");
5560 case NEON::BI__builtin_neon_vcgtzd_s64:
5563 ICmpInst::ICMP_SGT,
"vcgtz");
5564 case NEON::BI__builtin_neon_vcgtzd_f64:
5565 case NEON::BI__builtin_neon_vcgtzs_f32:
5566 case NEON::BI__builtin_neon_vcgtzh_f16:
5569 ICmpInst::FCMP_OGT,
"vcgtz");
5570 case NEON::BI__builtin_neon_vcltzd_s64:
5573 ICmpInst::ICMP_SLT,
"vcltz");
5575 case NEON::BI__builtin_neon_vcltzd_f64:
5576 case NEON::BI__builtin_neon_vcltzs_f32:
5577 case NEON::BI__builtin_neon_vcltzh_f16:
5580 ICmpInst::FCMP_OLT,
"vcltz");
5582 case NEON::BI__builtin_neon_vceqzd_u64: {
5585 ICmpInst::ICMP_EQ,
"vceqzd");
5587 case NEON::BI__builtin_neon_vceqd_f64:
5588 case NEON::BI__builtin_neon_vcled_f64:
5589 case NEON::BI__builtin_neon_vcltd_f64:
5590 case NEON::BI__builtin_neon_vcged_f64:
5591 case NEON::BI__builtin_neon_vcgtd_f64: {
5592 llvm::CmpInst::Predicate P;
5593 switch (BuiltinID) {
5594 default: llvm_unreachable(
"missing builtin ID in switch!");
5595 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ;
break;
5596 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE;
break;
5597 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT;
break;
5598 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE;
break;
5599 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT;
break;
5603 if (P == llvm::FCmpInst::FCMP_OEQ)
5604 Ops[0] =
Builder.CreateFCmp(P, Ops[0], Ops[1]);
5606 Ops[0] =
Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5609 case NEON::BI__builtin_neon_vceqs_f32:
5610 case NEON::BI__builtin_neon_vcles_f32:
5611 case NEON::BI__builtin_neon_vclts_f32:
5612 case NEON::BI__builtin_neon_vcges_f32:
5613 case NEON::BI__builtin_neon_vcgts_f32: {
5614 llvm::CmpInst::Predicate P;
5615 switch (BuiltinID) {
5616 default: llvm_unreachable(
"missing builtin ID in switch!");
5617 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ;
break;
5618 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE;
break;
5619 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT;
break;
5620 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE;
break;
5621 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT;
break;
5625 if (P == llvm::FCmpInst::FCMP_OEQ)
5626 Ops[0] =
Builder.CreateFCmp(P, Ops[0], Ops[1]);
5628 Ops[0] =
Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5631 case NEON::BI__builtin_neon_vceqh_f16:
5632 case NEON::BI__builtin_neon_vcleh_f16:
5633 case NEON::BI__builtin_neon_vclth_f16:
5634 case NEON::BI__builtin_neon_vcgeh_f16:
5635 case NEON::BI__builtin_neon_vcgth_f16: {
5636 llvm::CmpInst::Predicate P;
5637 switch (BuiltinID) {
5638 default: llvm_unreachable(
"missing builtin ID in switch!");
5639 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ;
break;
5640 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE;
break;
5641 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT;
break;
5642 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE;
break;
5643 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT;
break;
5647 if (P == llvm::FCmpInst::FCMP_OEQ)
5648 Ops[0] =
Builder.CreateFCmp(P, Ops[0], Ops[1]);
5650 Ops[0] =
Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5653 case NEON::BI__builtin_neon_vceqd_s64:
5654 case NEON::BI__builtin_neon_vceqd_u64:
5655 case NEON::BI__builtin_neon_vcgtd_s64:
5656 case NEON::BI__builtin_neon_vcgtd_u64:
5657 case NEON::BI__builtin_neon_vcltd_s64:
5658 case NEON::BI__builtin_neon_vcltd_u64:
5659 case NEON::BI__builtin_neon_vcged_u64:
5660 case NEON::BI__builtin_neon_vcged_s64:
5661 case NEON::BI__builtin_neon_vcled_u64:
5662 case NEON::BI__builtin_neon_vcled_s64: {
5663 llvm::CmpInst::Predicate P;
5664 switch (BuiltinID) {
5665 default: llvm_unreachable(
"missing builtin ID in switch!");
5666 case NEON::BI__builtin_neon_vceqd_s64:
5667 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;
break;
5668 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;
break;
5669 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;
break;
5670 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;
break;
5671 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;
break;
5672 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;
break;
5673 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;
break;
5674 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;
break;
5675 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;
break;
5679 Ops[0] =
Builder.CreateICmp(P, Ops[0], Ops[1]);
5682 case NEON::BI__builtin_neon_vnegd_s64:
5683 return Builder.CreateNeg(Ops[0],
"vnegd");
5684 case NEON::BI__builtin_neon_vnegh_f16:
5685 return Builder.CreateFNeg(Ops[0],
"vnegh");
5686 case NEON::BI__builtin_neon_vtstd_s64:
5687 case NEON::BI__builtin_neon_vtstd_u64: {
5690 Ops[0] =
Builder.CreateAnd(Ops[0], Ops[1]);
5691 Ops[0] =
Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5692 llvm::Constant::getNullValue(
Int64Ty));
5695 case NEON::BI__builtin_neon_vset_lane_i8:
5696 case NEON::BI__builtin_neon_vset_lane_i16:
5697 case NEON::BI__builtin_neon_vset_lane_i32:
5698 case NEON::BI__builtin_neon_vset_lane_i64:
5699 case NEON::BI__builtin_neon_vset_lane_bf16:
5700 case NEON::BI__builtin_neon_vset_lane_f32:
5701 case NEON::BI__builtin_neon_vsetq_lane_i8:
5702 case NEON::BI__builtin_neon_vsetq_lane_i16:
5703 case NEON::BI__builtin_neon_vsetq_lane_i32:
5704 case NEON::BI__builtin_neon_vsetq_lane_i64:
5705 case NEON::BI__builtin_neon_vsetq_lane_bf16:
5706 case NEON::BI__builtin_neon_vsetq_lane_f32:
5707 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
5708 case NEON::BI__builtin_neon_vset_lane_f64:
5711 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(
DoubleTy, 1));
5712 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
5713 case NEON::BI__builtin_neon_vset_lane_mf8:
5714 case NEON::BI__builtin_neon_vsetq_lane_mf8:
5718 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
5719 case NEON::BI__builtin_neon_vsetq_lane_f64:
5722 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(
DoubleTy, 2));
5723 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
5725 case NEON::BI__builtin_neon_vget_lane_i8:
5726 case NEON::BI__builtin_neon_vdupb_lane_i8:
5728 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int8Ty, 8));
5729 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5730 case NEON::BI__builtin_neon_vgetq_lane_i8:
5731 case NEON::BI__builtin_neon_vdupb_laneq_i8:
5733 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int8Ty, 16));
5734 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5735 case NEON::BI__builtin_neon_vget_lane_mf8:
5736 case NEON::BI__builtin_neon_vdupb_lane_mf8:
5737 case NEON::BI__builtin_neon_vgetq_lane_mf8:
5738 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
5739 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5740 case NEON::BI__builtin_neon_vget_lane_i16:
5741 case NEON::BI__builtin_neon_vduph_lane_i16:
5743 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int16Ty, 4));
5744 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5745 case NEON::BI__builtin_neon_vgetq_lane_i16:
5746 case NEON::BI__builtin_neon_vduph_laneq_i16:
5748 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int16Ty, 8));
5749 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5750 case NEON::BI__builtin_neon_vget_lane_i32:
5751 case NEON::BI__builtin_neon_vdups_lane_i32:
5753 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int32Ty, 2));
5754 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5755 case NEON::BI__builtin_neon_vdups_lane_f32:
5757 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 2));
5758 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vdups_lane");
5759 case NEON::BI__builtin_neon_vgetq_lane_i32:
5760 case NEON::BI__builtin_neon_vdups_laneq_i32:
5762 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int32Ty, 4));
5763 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5764 case NEON::BI__builtin_neon_vget_lane_i64:
5765 case NEON::BI__builtin_neon_vdupd_lane_i64:
5767 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int64Ty, 1));
5768 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5769 case NEON::BI__builtin_neon_vdupd_lane_f64:
5771 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 1));
5772 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vdupd_lane");
5773 case NEON::BI__builtin_neon_vgetq_lane_i64:
5774 case NEON::BI__builtin_neon_vdupd_laneq_i64:
5776 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int64Ty, 2));
5777 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5778 case NEON::BI__builtin_neon_vget_lane_f32:
5780 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 2));
5781 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5782 case NEON::BI__builtin_neon_vget_lane_f64:
5784 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 1));
5785 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5786 case NEON::BI__builtin_neon_vgetq_lane_f32:
5787 case NEON::BI__builtin_neon_vdups_laneq_f32:
5789 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 4));
5790 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5791 case NEON::BI__builtin_neon_vgetq_lane_f64:
5792 case NEON::BI__builtin_neon_vdupd_laneq_f64:
5794 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 2));
5795 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5796 case NEON::BI__builtin_neon_vaddh_f16:
5797 return Builder.CreateFAdd(Ops[0], Ops[1],
"vaddh");
5798 case NEON::BI__builtin_neon_vsubh_f16:
5799 return Builder.CreateFSub(Ops[0], Ops[1],
"vsubh");
5800 case NEON::BI__builtin_neon_vmulh_f16:
5801 return Builder.CreateFMul(Ops[0], Ops[1],
"vmulh");
5802 case NEON::BI__builtin_neon_vdivh_f16:
5803 return Builder.CreateFDiv(Ops[0], Ops[1],
"vdivh");
5804 case NEON::BI__builtin_neon_vfmah_f16:
5807 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
HalfTy,
5808 {Ops[1], Ops[2], Ops[0]});
5809 case NEON::BI__builtin_neon_vfmsh_f16: {
5814 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
HalfTy,
5815 {Neg, Ops[2], Ops[0]});
5817 case NEON::BI__builtin_neon_vaddd_s64:
5818 case NEON::BI__builtin_neon_vaddd_u64:
5819 return Builder.CreateAdd(Ops[0], Ops[1],
"vaddd");
5820 case NEON::BI__builtin_neon_vsubd_s64:
5821 case NEON::BI__builtin_neon_vsubd_u64:
5822 return Builder.CreateSub(Ops[0], Ops[1],
"vsubd");
5823 case NEON::BI__builtin_neon_vqdmlalh_s16:
5824 case NEON::BI__builtin_neon_vqdmlslh_s16: {
5828 auto *VTy = llvm::FixedVectorType::get(
Int32Ty, 4);
5829 Ops[1] =
EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5830 ProductOps,
"vqdmlXl");
5832 Ops[1] =
Builder.CreateExtractElement(Ops[1], CI,
"lane0");
5834 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5835 ? Intrinsic::aarch64_neon_sqadd
5836 : Intrinsic::aarch64_neon_sqsub;
5841 case NEON::BI__builtin_neon_vqshlud_n_s64: {
5846 case NEON::BI__builtin_neon_vqshld_n_u64:
5847 case NEON::BI__builtin_neon_vqshld_n_s64: {
5848 Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5849 ? Intrinsic::aarch64_neon_uqshl
5850 : Intrinsic::aarch64_neon_sqshl;
5854 case NEON::BI__builtin_neon_vrshrd_n_u64:
5855 case NEON::BI__builtin_neon_vrshrd_n_s64: {
5856 Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5857 ? Intrinsic::aarch64_neon_urshl
5858 : Intrinsic::aarch64_neon_srshl;
5860 Ops[1] = ConstantInt::get(
Int64Ty, -SV);
5863 case NEON::BI__builtin_neon_vrsrad_n_u64:
5864 case NEON::BI__builtin_neon_vrsrad_n_s64: {
5865 Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5866 ? Intrinsic::aarch64_neon_urshl
5867 : Intrinsic::aarch64_neon_srshl;
5869 Ops[2] =
Builder.CreateNeg(Ops[2]);
5871 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5874 case NEON::BI__builtin_neon_vshld_n_s64:
5875 case NEON::BI__builtin_neon_vshld_n_u64: {
5878 Ops[0], ConstantInt::get(
Int64Ty, Amt->getZExtValue()),
"shld_n");
5880 case NEON::BI__builtin_neon_vshrd_n_s64: {
5883 Ops[0], ConstantInt::get(
Int64Ty, std::min(
static_cast<uint64_t
>(63),
5884 Amt->getZExtValue())),
5887 case NEON::BI__builtin_neon_vshrd_n_u64: {
5889 uint64_t ShiftAmt = Amt->getZExtValue();
5892 return ConstantInt::get(
Int64Ty, 0);
5893 return Builder.CreateLShr(Ops[0], ConstantInt::get(
Int64Ty, ShiftAmt),
5896 case NEON::BI__builtin_neon_vsrad_n_s64: {
5899 Ops[1], ConstantInt::get(
Int64Ty, std::min(
static_cast<uint64_t
>(63),
5900 Amt->getZExtValue())),
5902 return Builder.CreateAdd(Ops[0], Ops[1]);
5904 case NEON::BI__builtin_neon_vsrad_n_u64: {
5906 uint64_t ShiftAmt = Amt->getZExtValue();
5911 Ops[1] =
Builder.CreateLShr(Ops[1], ConstantInt::get(
Int64Ty, ShiftAmt),
5913 return Builder.CreateAdd(Ops[0], Ops[1]);
5915 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5916 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5917 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5918 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5919 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"lane");
5923 auto *VTy = llvm::FixedVectorType::get(
Int32Ty, 4);
5924 Ops[1] =
EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5925 ProductOps,
"vqdmlXl");
5927 Ops[1] =
Builder.CreateExtractElement(Ops[1], CI,
"lane0");
5932 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5933 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5934 ? Intrinsic::aarch64_neon_sqadd
5935 : Intrinsic::aarch64_neon_sqsub;
5938 case NEON::BI__builtin_neon_vqdmlals_s32:
5939 case NEON::BI__builtin_neon_vqdmlsls_s32: {
5941 ProductOps.push_back(Ops[1]);
5942 ProductOps.push_back(Ops[2]);
5944 EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5945 ProductOps,
"vqdmlXl");
5947 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5948 ? Intrinsic::aarch64_neon_sqadd
5949 : Intrinsic::aarch64_neon_sqsub;
5954 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5955 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5956 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5957 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5958 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"lane");
5960 ProductOps.push_back(Ops[1]);
5961 ProductOps.push_back(Ops[2]);
5963 EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5964 ProductOps,
"vqdmlXl");
5969 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
5970 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
5971 ? Intrinsic::aarch64_neon_sqadd
5972 : Intrinsic::aarch64_neon_sqsub;
5975 case NEON::BI__builtin_neon_vget_lane_bf16:
5976 case NEON::BI__builtin_neon_vduph_lane_bf16:
5977 case NEON::BI__builtin_neon_vduph_lane_f16: {
5978 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5980 case NEON::BI__builtin_neon_vgetq_lane_bf16:
5981 case NEON::BI__builtin_neon_vduph_laneq_bf16:
5982 case NEON::BI__builtin_neon_vduph_laneq_f16: {
5983 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5985 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
5986 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
5987 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
5988 return Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
5990 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
5992 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
5993 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
5994 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
5995 llvm::Value *Trunc =
5996 Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
5997 return Builder.CreateShuffleVector(
5998 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
6000 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6002 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6004 std::iota(LoMask.begin(), LoMask.end(), 0);
6005 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6006 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
6007 llvm::Type *V8BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 8);
6008 llvm::Value *Inactive =
Builder.CreateShuffleVector(
6009 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
6010 llvm::Value *Trunc =
6011 Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
6012 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
6015 case clang::AArch64::BI_InterlockedAdd:
6016 case clang::AArch64::BI_InterlockedAdd_acq:
6017 case clang::AArch64::BI_InterlockedAdd_rel:
6018 case clang::AArch64::BI_InterlockedAdd_nf:
6019 case clang::AArch64::BI_InterlockedAdd64:
6020 case clang::AArch64::BI_InterlockedAdd64_acq:
6021 case clang::AArch64::BI_InterlockedAdd64_rel:
6022 case clang::AArch64::BI_InterlockedAdd64_nf: {
6024 Value *Val = Ops[1];
6025 llvm::AtomicOrdering Ordering;
6026 switch (BuiltinID) {
6027 case clang::AArch64::BI_InterlockedAdd:
6028 case clang::AArch64::BI_InterlockedAdd64:
6029 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6031 case clang::AArch64::BI_InterlockedAdd_acq:
6032 case clang::AArch64::BI_InterlockedAdd64_acq:
6033 Ordering = llvm::AtomicOrdering::Acquire;
6035 case clang::AArch64::BI_InterlockedAdd_rel:
6036 case clang::AArch64::BI_InterlockedAdd64_rel:
6037 Ordering = llvm::AtomicOrdering::Release;
6039 case clang::AArch64::BI_InterlockedAdd_nf:
6040 case clang::AArch64::BI_InterlockedAdd64_nf:
6041 Ordering = llvm::AtomicOrdering::Monotonic;
6044 llvm_unreachable(
"missing builtin ID in switch!");
6046 AtomicRMWInst *RMWI =
6047 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, Ordering);
6048 return Builder.CreateAdd(RMWI, Val);
6053 llvm::Type *Ty = VTy;
6057 bool ExtractLow =
false;
6058 bool ExtendLaneArg =
false;
6059 switch (BuiltinID) {
6060 default:
return nullptr;
6061 case NEON::BI__builtin_neon_vbsl_v:
6062 case NEON::BI__builtin_neon_vbslq_v: {
6063 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6064 Ops[0] =
Builder.CreateBitCast(Ops[0], BitTy,
"vbsl");
6065 Ops[1] =
Builder.CreateBitCast(Ops[1], BitTy,
"vbsl");
6066 Ops[2] =
Builder.CreateBitCast(Ops[2], BitTy,
"vbsl");
6068 Ops[1] =
Builder.CreateAnd(Ops[0], Ops[1],
"vbsl");
6069 Ops[2] =
Builder.CreateAnd(
Builder.CreateNot(Ops[0]), Ops[2],
"vbsl");
6070 Ops[0] =
Builder.CreateOr(Ops[1], Ops[2],
"vbsl");
6071 return Builder.CreateBitCast(Ops[0], Ty);
6073 case NEON::BI__builtin_neon_vfma_lane_v:
6074 case NEON::BI__builtin_neon_vfmaq_lane_v: {
6077 Value *Addend = Ops[0];
6078 Value *Multiplicand = Ops[1];
6079 Value *LaneSource = Ops[2];
6080 Ops[0] = Multiplicand;
6081 Ops[1] = LaneSource;
6085 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6086 ? llvm::FixedVectorType::get(VTy->getElementType(),
6087 VTy->getNumElements() / 2)
6090 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6091 Ops[1] =
Builder.CreateBitCast(Ops[1], SourceTy);
6092 Ops[1] =
Builder.CreateShuffleVector(Ops[1], Ops[1], SV,
"lane");
6095 Int =
Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6099 case NEON::BI__builtin_neon_vfma_laneq_v: {
6102 if (VTy && VTy->getElementType() ==
DoubleTy) {
6105 llvm::FixedVectorType *VTy =
6107 Ops[2] =
Builder.CreateBitCast(Ops[2], VTy);
6108 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"extract");
6111 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6112 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6115 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6116 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6118 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6119 VTy->getNumElements() * 2);
6120 Ops[2] =
Builder.CreateBitCast(Ops[2], STy);
6121 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6123 Ops[2] =
Builder.CreateShuffleVector(Ops[2], Ops[2], SV,
"lane");
6126 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6127 {Ops[2], Ops[1], Ops[0]});
6129 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6130 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6131 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6133 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6136 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6137 {Ops[2], Ops[1], Ops[0]});
6139 case NEON::BI__builtin_neon_vfmah_lane_f16:
6140 case NEON::BI__builtin_neon_vfmas_lane_f32:
6141 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6142 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6143 case NEON::BI__builtin_neon_vfmad_lane_f64:
6144 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6146 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"extract");
6148 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6149 {Ops[1], Ops[2], Ops[0]});
6151 case NEON::BI__builtin_neon_vmull_v:
6153 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6154 if (
Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6156 case NEON::BI__builtin_neon_vmax_v:
6157 case NEON::BI__builtin_neon_vmaxq_v:
6159 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6160 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6162 case NEON::BI__builtin_neon_vmaxh_f16: {
6163 Int = Intrinsic::aarch64_neon_fmax;
6166 case NEON::BI__builtin_neon_vmin_v:
6167 case NEON::BI__builtin_neon_vminq_v:
6169 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6170 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6172 case NEON::BI__builtin_neon_vminh_f16: {
6173 Int = Intrinsic::aarch64_neon_fmin;
6176 case NEON::BI__builtin_neon_vabd_v:
6177 case NEON::BI__builtin_neon_vabdq_v:
6179 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6180 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6182 case NEON::BI__builtin_neon_vpadal_v:
6183 case NEON::BI__builtin_neon_vpadalq_v: {
6184 unsigned ArgElts = VTy->getNumElements();
6186 unsigned BitWidth = EltTy->getBitWidth();
6187 auto *ArgTy = llvm::FixedVectorType::get(
6188 llvm::IntegerType::get(
getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6189 llvm::Type* Tys[2] = { VTy, ArgTy };
6190 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6192 TmpOps.push_back(Ops[1]);
6195 llvm::Value *addend =
Builder.CreateBitCast(Ops[0], tmp->getType());
6196 return Builder.CreateAdd(tmp, addend);
6198 case NEON::BI__builtin_neon_vpmin_v:
6199 case NEON::BI__builtin_neon_vpminq_v:
6201 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6202 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6204 case NEON::BI__builtin_neon_vpmax_v:
6205 case NEON::BI__builtin_neon_vpmaxq_v:
6207 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6208 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6210 case NEON::BI__builtin_neon_vminnm_v:
6211 case NEON::BI__builtin_neon_vminnmq_v:
6212 Int = Intrinsic::aarch64_neon_fminnm;
6214 case NEON::BI__builtin_neon_vminnmh_f16:
6215 Int = Intrinsic::aarch64_neon_fminnm;
6217 case NEON::BI__builtin_neon_vmaxnm_v:
6218 case NEON::BI__builtin_neon_vmaxnmq_v:
6219 Int = Intrinsic::aarch64_neon_fmaxnm;
6221 case NEON::BI__builtin_neon_vmaxnmh_f16:
6222 Int = Intrinsic::aarch64_neon_fmaxnm;
6224 case NEON::BI__builtin_neon_vrecpss_f32: {
6228 case NEON::BI__builtin_neon_vrecpsd_f64:
6231 case NEON::BI__builtin_neon_vrecpsh_f16:
6234 case NEON::BI__builtin_neon_vqshrun_n_v:
6235 Int = Intrinsic::aarch64_neon_sqshrun;
6237 case NEON::BI__builtin_neon_vqrshrun_n_v:
6238 Int = Intrinsic::aarch64_neon_sqrshrun;
6240 case NEON::BI__builtin_neon_vqshrn_n_v:
6241 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6243 case NEON::BI__builtin_neon_vrshrn_n_v:
6244 Int = Intrinsic::aarch64_neon_rshrn;
6246 case NEON::BI__builtin_neon_vqrshrn_n_v:
6247 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6249 case NEON::BI__builtin_neon_vrndah_f16: {
6250 Int =
Builder.getIsFPConstrained()
6251 ? Intrinsic::experimental_constrained_round
6255 case NEON::BI__builtin_neon_vrnda_v:
6256 case NEON::BI__builtin_neon_vrndaq_v: {
6257 Int =
Builder.getIsFPConstrained()
6258 ? Intrinsic::experimental_constrained_round
6262 case NEON::BI__builtin_neon_vrndih_f16: {
6263 Int =
Builder.getIsFPConstrained()
6264 ? Intrinsic::experimental_constrained_nearbyint
6265 : Intrinsic::nearbyint;
6268 case NEON::BI__builtin_neon_vrndmh_f16: {
6269 Int =
Builder.getIsFPConstrained()
6270 ? Intrinsic::experimental_constrained_floor
6274 case NEON::BI__builtin_neon_vrndm_v:
6275 case NEON::BI__builtin_neon_vrndmq_v: {
6276 Int =
Builder.getIsFPConstrained()
6277 ? Intrinsic::experimental_constrained_floor
6281 case NEON::BI__builtin_neon_vrndnh_f16: {
6282 Int =
Builder.getIsFPConstrained()
6283 ? Intrinsic::experimental_constrained_roundeven
6284 : Intrinsic::roundeven;
6287 case NEON::BI__builtin_neon_vrndn_v:
6288 case NEON::BI__builtin_neon_vrndnq_v: {
6289 Int =
Builder.getIsFPConstrained()
6290 ? Intrinsic::experimental_constrained_roundeven
6291 : Intrinsic::roundeven;
6294 case NEON::BI__builtin_neon_vrndns_f32: {
6295 Int =
Builder.getIsFPConstrained()
6296 ? Intrinsic::experimental_constrained_roundeven
6297 : Intrinsic::roundeven;
6300 case NEON::BI__builtin_neon_vrndph_f16: {
6301 Int =
Builder.getIsFPConstrained()
6302 ? Intrinsic::experimental_constrained_ceil
6306 case NEON::BI__builtin_neon_vrndp_v:
6307 case NEON::BI__builtin_neon_vrndpq_v: {
6308 Int =
Builder.getIsFPConstrained()
6309 ? Intrinsic::experimental_constrained_ceil
6313 case NEON::BI__builtin_neon_vrndxh_f16: {
6314 Int =
Builder.getIsFPConstrained()
6315 ? Intrinsic::experimental_constrained_rint
6319 case NEON::BI__builtin_neon_vrndx_v:
6320 case NEON::BI__builtin_neon_vrndxq_v: {
6321 Int =
Builder.getIsFPConstrained()
6322 ? Intrinsic::experimental_constrained_rint
6326 case NEON::BI__builtin_neon_vrndh_f16: {
6327 Int =
Builder.getIsFPConstrained()
6328 ? Intrinsic::experimental_constrained_trunc
6332 case NEON::BI__builtin_neon_vrnd32x_f32:
6333 case NEON::BI__builtin_neon_vrnd32xq_f32:
6334 case NEON::BI__builtin_neon_vrnd32x_f64:
6335 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6336 Int = Intrinsic::aarch64_neon_frint32x;
6339 case NEON::BI__builtin_neon_vrnd32z_f32:
6340 case NEON::BI__builtin_neon_vrnd32zq_f32:
6341 case NEON::BI__builtin_neon_vrnd32z_f64:
6342 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6343 Int = Intrinsic::aarch64_neon_frint32z;
6346 case NEON::BI__builtin_neon_vrnd64x_f32:
6347 case NEON::BI__builtin_neon_vrnd64xq_f32:
6348 case NEON::BI__builtin_neon_vrnd64x_f64:
6349 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6350 Int = Intrinsic::aarch64_neon_frint64x;
6353 case NEON::BI__builtin_neon_vrnd64z_f32:
6354 case NEON::BI__builtin_neon_vrnd64zq_f32:
6355 case NEON::BI__builtin_neon_vrnd64z_f64:
6356 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6357 Int = Intrinsic::aarch64_neon_frint64z;
6360 case NEON::BI__builtin_neon_vrnd_v:
6361 case NEON::BI__builtin_neon_vrndq_v: {
6362 Int =
Builder.getIsFPConstrained()
6363 ? Intrinsic::experimental_constrained_trunc
6367 case NEON::BI__builtin_neon_vcvt_f64_v:
6368 case NEON::BI__builtin_neon_vcvtq_f64_v:
6369 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6371 return usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
6372 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
6373 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6375 "unexpected vcvt_f64_f32 builtin");
6379 return Builder.CreateFPExt(Ops[0], Ty,
"vcvt");
6381 case NEON::BI__builtin_neon_vcvt_f32_f64: {
6383 "unexpected vcvt_f32_f64 builtin");
6387 return Builder.CreateFPTrunc(Ops[0], Ty,
"vcvt");
6389 case NEON::BI__builtin_neon_vcvta_s16_f16:
6390 case NEON::BI__builtin_neon_vcvta_u16_f16:
6391 case NEON::BI__builtin_neon_vcvta_s32_v:
6392 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
6393 case NEON::BI__builtin_neon_vcvtaq_s32_v:
6394 case NEON::BI__builtin_neon_vcvta_u32_v:
6395 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
6396 case NEON::BI__builtin_neon_vcvtaq_u32_v:
6397 case NEON::BI__builtin_neon_vcvta_s64_v:
6398 case NEON::BI__builtin_neon_vcvtaq_s64_v:
6399 case NEON::BI__builtin_neon_vcvta_u64_v:
6400 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6401 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6405 case NEON::BI__builtin_neon_vcvtm_s16_f16:
6406 case NEON::BI__builtin_neon_vcvtm_s32_v:
6407 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
6408 case NEON::BI__builtin_neon_vcvtmq_s32_v:
6409 case NEON::BI__builtin_neon_vcvtm_u16_f16:
6410 case NEON::BI__builtin_neon_vcvtm_u32_v:
6411 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
6412 case NEON::BI__builtin_neon_vcvtmq_u32_v:
6413 case NEON::BI__builtin_neon_vcvtm_s64_v:
6414 case NEON::BI__builtin_neon_vcvtmq_s64_v:
6415 case NEON::BI__builtin_neon_vcvtm_u64_v:
6416 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6417 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6421 case NEON::BI__builtin_neon_vcvtn_s16_f16:
6422 case NEON::BI__builtin_neon_vcvtn_s32_v:
6423 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
6424 case NEON::BI__builtin_neon_vcvtnq_s32_v:
6425 case NEON::BI__builtin_neon_vcvtn_u16_f16:
6426 case NEON::BI__builtin_neon_vcvtn_u32_v:
6427 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
6428 case NEON::BI__builtin_neon_vcvtnq_u32_v:
6429 case NEON::BI__builtin_neon_vcvtn_s64_v:
6430 case NEON::BI__builtin_neon_vcvtnq_s64_v:
6431 case NEON::BI__builtin_neon_vcvtn_u64_v:
6432 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6433 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6437 case NEON::BI__builtin_neon_vcvtp_s16_f16:
6438 case NEON::BI__builtin_neon_vcvtp_s32_v:
6439 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
6440 case NEON::BI__builtin_neon_vcvtpq_s32_v:
6441 case NEON::BI__builtin_neon_vcvtp_u16_f16:
6442 case NEON::BI__builtin_neon_vcvtp_u32_v:
6443 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
6444 case NEON::BI__builtin_neon_vcvtpq_u32_v:
6445 case NEON::BI__builtin_neon_vcvtp_s64_v:
6446 case NEON::BI__builtin_neon_vcvtpq_s64_v:
6447 case NEON::BI__builtin_neon_vcvtp_u64_v:
6448 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6449 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6453 case NEON::BI__builtin_neon_vmulx_v:
6454 case NEON::BI__builtin_neon_vmulxq_v: {
6455 Int = Intrinsic::aarch64_neon_fmulx;
6458 case NEON::BI__builtin_neon_vmulxh_lane_f16:
6459 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
6462 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2],
"extract");
6464 Int = Intrinsic::aarch64_neon_fmulx;
6467 case NEON::BI__builtin_neon_vmul_lane_v:
6468 case NEON::BI__builtin_neon_vmul_laneq_v: {
6471 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6474 llvm::FixedVectorType *VTy =
6476 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
6477 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2],
"extract");
6481 case NEON::BI__builtin_neon_vpmaxnm_v:
6482 case NEON::BI__builtin_neon_vpmaxnmq_v: {
6483 Int = Intrinsic::aarch64_neon_fmaxnmp;
6486 case NEON::BI__builtin_neon_vpminnm_v:
6487 case NEON::BI__builtin_neon_vpminnmq_v: {
6488 Int = Intrinsic::aarch64_neon_fminnmp;
6491 case NEON::BI__builtin_neon_vsqrth_f16: {
6492 Int =
Builder.getIsFPConstrained()
6493 ? Intrinsic::experimental_constrained_sqrt
6497 case NEON::BI__builtin_neon_vsqrt_v:
6498 case NEON::BI__builtin_neon_vsqrtq_v: {
6499 Int =
Builder.getIsFPConstrained()
6500 ? Intrinsic::experimental_constrained_sqrt
6502 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6505 case NEON::BI__builtin_neon_vrbit_v:
6506 case NEON::BI__builtin_neon_vrbitq_v: {
6507 Int = Intrinsic::bitreverse;
6510 case NEON::BI__builtin_neon_vmaxv_f16: {
6511 Int = Intrinsic::aarch64_neon_fmaxv;
6513 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
6514 llvm::Type *Tys[2] = {Ty, VTy};
6517 case NEON::BI__builtin_neon_vmaxvq_f16: {
6518 Int = Intrinsic::aarch64_neon_fmaxv;
6520 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
6521 llvm::Type *Tys[2] = {Ty, VTy};
6524 case NEON::BI__builtin_neon_vminv_f16: {
6525 Int = Intrinsic::aarch64_neon_fminv;
6527 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
6528 llvm::Type *Tys[2] = {Ty, VTy};
6531 case NEON::BI__builtin_neon_vminvq_f16: {
6532 Int = Intrinsic::aarch64_neon_fminv;
6534 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
6535 llvm::Type *Tys[2] = {Ty, VTy};
6538 case NEON::BI__builtin_neon_vmaxnmv_f16: {
6539 Int = Intrinsic::aarch64_neon_fmaxnmv;
6541 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
6542 llvm::Type *Tys[2] = {Ty, VTy};
6545 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
6546 Int = Intrinsic::aarch64_neon_fmaxnmv;
6548 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
6549 llvm::Type *Tys[2] = {Ty, VTy};
6552 case NEON::BI__builtin_neon_vminnmv_f16: {
6553 Int = Intrinsic::aarch64_neon_fminnmv;
6555 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
6556 llvm::Type *Tys[2] = {Ty, VTy};
6560 case NEON::BI__builtin_neon_vminnmvq_f16: {
6561 Int = Intrinsic::aarch64_neon_fminnmv;
6563 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
6564 llvm::Type *Tys[2] = {Ty, VTy};
6567 case NEON::BI__builtin_neon_vmul_n_f64: {
6570 return Builder.CreateFMul(Ops[0], RHS);
6572 case NEON::BI__builtin_neon_vaddlv_u8:
6573 case NEON::BI__builtin_neon_vaddlvq_u8:
6574 case NEON::BI__builtin_neon_vaddlv_u16:
6575 case NEON::BI__builtin_neon_vaddlvq_u16: {
6576 Int = Intrinsic::aarch64_neon_uaddlv;
6579 llvm::Type *Tys[2] = {Ty, VTy};
6581 if (VTy->getElementType()->getPrimitiveSizeInBits() == 8)
6585 case NEON::BI__builtin_neon_vaddlv_s8:
6586 case NEON::BI__builtin_neon_vaddlvq_s8:
6587 case NEON::BI__builtin_neon_vaddlv_s16:
6588 case NEON::BI__builtin_neon_vaddlvq_s16: {
6589 Int = Intrinsic::aarch64_neon_saddlv;
6592 llvm::Type *Tys[2] = {Ty, VTy};
6594 if (VTy->getElementType()->getPrimitiveSizeInBits() == 8)
6598 case NEON::BI__builtin_neon_vsri_n_v:
6599 case NEON::BI__builtin_neon_vsriq_n_v: {
6600 Int = Intrinsic::aarch64_neon_vsri;
6601 llvm::Function *Intrin =
CGM.getIntrinsic(Int, Ty);
6604 case NEON::BI__builtin_neon_vsli_n_v:
6605 case NEON::BI__builtin_neon_vsliq_n_v: {
6606 Int = Intrinsic::aarch64_neon_vsli;
6607 llvm::Function *Intrin =
CGM.getIntrinsic(Int, Ty);
6610 case NEON::BI__builtin_neon_vsra_n_v:
6611 case NEON::BI__builtin_neon_vsraq_n_v:
6612 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6614 return Builder.CreateAdd(Ops[0], Ops[1]);
6615 case NEON::BI__builtin_neon_vrsra_n_v:
6616 case NEON::BI__builtin_neon_vrsraq_n_v: {
6617 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6619 TmpOps.push_back(Ops[1]);
6620 TmpOps.push_back(Ops[2]);
6622 llvm::Value *tmp =
EmitNeonCall(F, TmpOps,
"vrshr_n", 1,
true);
6623 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
6624 return Builder.CreateAdd(Ops[0], tmp);
6626 case NEON::BI__builtin_neon_vld1_v:
6627 case NEON::BI__builtin_neon_vld1q_v: {
6630 case NEON::BI__builtin_neon_vst1_v:
6631 case NEON::BI__builtin_neon_vst1q_v:
6632 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
6634 case NEON::BI__builtin_neon_vld1_lane_v:
6635 case NEON::BI__builtin_neon_vld1q_lane_v: {
6636 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6637 Ops[0] =
Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
6639 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vld1_lane");
6641 case NEON::BI__builtin_neon_vldap1_lane_s64:
6642 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
6643 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6644 llvm::LoadInst *LI =
Builder.CreateAlignedLoad(
6646 LI->setAtomic(llvm::AtomicOrdering::Acquire);
6648 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vldap1_lane");
6650 case NEON::BI__builtin_neon_vld1_dup_v:
6651 case NEON::BI__builtin_neon_vld1q_dup_v: {
6652 Value *
V = PoisonValue::get(Ty);
6653 Ops[0] =
Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
6655 llvm::Constant *CI = ConstantInt::get(
Int32Ty, 0);
6656 Ops[0] =
Builder.CreateInsertElement(
V, Ops[0], CI);
6659 case NEON::BI__builtin_neon_vst1_lane_v:
6660 case NEON::BI__builtin_neon_vst1q_lane_v:
6661 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6662 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
6664 case NEON::BI__builtin_neon_vstl1_lane_s64:
6665 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
6666 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6667 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
6668 llvm::StoreInst *SI =
6670 SI->setAtomic(llvm::AtomicOrdering::Release);
6673 case NEON::BI__builtin_neon_vld2_v:
6674 case NEON::BI__builtin_neon_vld2q_v: {
6676 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6677 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld2");
6678 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6680 case NEON::BI__builtin_neon_vld3_v:
6681 case NEON::BI__builtin_neon_vld3q_v: {
6683 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6684 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld3");
6685 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6687 case NEON::BI__builtin_neon_vld4_v:
6688 case NEON::BI__builtin_neon_vld4q_v: {
6690 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6691 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld4");
6692 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6694 case NEON::BI__builtin_neon_vld2_dup_v:
6695 case NEON::BI__builtin_neon_vld2q_dup_v: {
6697 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6698 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld2");
6699 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6701 case NEON::BI__builtin_neon_vld3_dup_v:
6702 case NEON::BI__builtin_neon_vld3q_dup_v: {
6704 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6705 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld3");
6706 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6708 case NEON::BI__builtin_neon_vld4_dup_v:
6709 case NEON::BI__builtin_neon_vld4q_dup_v: {
6711 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6712 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld4");
6713 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6715 case NEON::BI__builtin_neon_vld2_lane_v:
6716 case NEON::BI__builtin_neon_vld2q_lane_v: {
6717 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6718 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6719 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6720 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6721 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6724 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6726 case NEON::BI__builtin_neon_vld3_lane_v:
6727 case NEON::BI__builtin_neon_vld3q_lane_v: {
6728 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6729 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6730 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6731 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6732 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6733 Ops[3] =
Builder.CreateBitCast(Ops[3], Ty);
6736 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6738 case NEON::BI__builtin_neon_vld4_lane_v:
6739 case NEON::BI__builtin_neon_vld4q_lane_v: {
6740 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6741 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6742 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6743 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6744 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6745 Ops[3] =
Builder.CreateBitCast(Ops[3], Ty);
6746 Ops[4] =
Builder.CreateBitCast(Ops[4], Ty);
6749 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6751 case NEON::BI__builtin_neon_vst2_v:
6752 case NEON::BI__builtin_neon_vst2q_v: {
6753 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6754 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6755 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6758 case NEON::BI__builtin_neon_vst2_lane_v:
6759 case NEON::BI__builtin_neon_vst2q_lane_v: {
6760 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6762 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6763 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6766 case NEON::BI__builtin_neon_vst3_v:
6767 case NEON::BI__builtin_neon_vst3q_v: {
6768 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6769 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6770 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6773 case NEON::BI__builtin_neon_vst3_lane_v:
6774 case NEON::BI__builtin_neon_vst3q_lane_v: {
6775 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6777 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6778 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6781 case NEON::BI__builtin_neon_vst4_v:
6782 case NEON::BI__builtin_neon_vst4q_v: {
6783 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6784 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6785 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6788 case NEON::BI__builtin_neon_vst4_lane_v:
6789 case NEON::BI__builtin_neon_vst4q_lane_v: {
6790 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6792 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6793 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6796 case NEON::BI__builtin_neon_vtrn_v:
6797 case NEON::BI__builtin_neon_vtrnq_v: {
6798 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6799 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6800 Value *SV =
nullptr;
6802 for (
unsigned vi = 0; vi != 2; ++vi) {
6804 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6805 Indices.push_back(i+vi);
6806 Indices.push_back(i+e+vi);
6809 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vtrn");
6814 case NEON::BI__builtin_neon_vuzp_v:
6815 case NEON::BI__builtin_neon_vuzpq_v: {
6816 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6817 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6818 Value *SV =
nullptr;
6820 for (
unsigned vi = 0; vi != 2; ++vi) {
6822 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6823 Indices.push_back(2*i+vi);
6826 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vuzp");
6831 case NEON::BI__builtin_neon_vzip_v:
6832 case NEON::BI__builtin_neon_vzipq_v: {
6833 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6834 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6835 Value *SV =
nullptr;
6837 for (
unsigned vi = 0; vi != 2; ++vi) {
6839 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6840 Indices.push_back((i + vi*e) >> 1);
6841 Indices.push_back(((i + vi*e) >> 1)+e);
6844 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vzip");
6849 case NEON::BI__builtin_neon_vqtbl1q_v: {
6850 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6853 case NEON::BI__builtin_neon_vqtbl2q_v: {
6854 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6857 case NEON::BI__builtin_neon_vqtbl3q_v: {
6858 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6861 case NEON::BI__builtin_neon_vqtbl4q_v: {
6862 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6865 case NEON::BI__builtin_neon_vqtbx1q_v: {
6866 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6869 case NEON::BI__builtin_neon_vqtbx2q_v: {
6870 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6873 case NEON::BI__builtin_neon_vqtbx3q_v: {
6874 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
6877 case NEON::BI__builtin_neon_vqtbx4q_v: {
6878 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
6881 case NEON::BI__builtin_neon_vsqadd_v:
6882 case NEON::BI__builtin_neon_vsqaddq_v: {
6883 Int = Intrinsic::aarch64_neon_usqadd;
6886 case NEON::BI__builtin_neon_vuqadd_v:
6887 case NEON::BI__builtin_neon_vuqaddq_v: {
6888 Int = Intrinsic::aarch64_neon_suqadd;
6892 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
6893 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
6894 case NEON::BI__builtin_neon_vluti2_laneq_f16:
6895 case NEON::BI__builtin_neon_vluti2_laneq_p16:
6896 case NEON::BI__builtin_neon_vluti2_laneq_p8:
6897 case NEON::BI__builtin_neon_vluti2_laneq_s16:
6898 case NEON::BI__builtin_neon_vluti2_laneq_s8:
6899 case NEON::BI__builtin_neon_vluti2_laneq_u16:
6900 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
6901 Int = Intrinsic::aarch64_neon_vluti2_laneq;
6908 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
6909 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
6910 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
6911 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
6912 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
6913 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
6914 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
6915 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
6916 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
6917 Int = Intrinsic::aarch64_neon_vluti2_laneq;
6924 case NEON::BI__builtin_neon_vluti2_lane_mf8:
6925 case NEON::BI__builtin_neon_vluti2_lane_bf16:
6926 case NEON::BI__builtin_neon_vluti2_lane_f16:
6927 case NEON::BI__builtin_neon_vluti2_lane_p16:
6928 case NEON::BI__builtin_neon_vluti2_lane_p8:
6929 case NEON::BI__builtin_neon_vluti2_lane_s16:
6930 case NEON::BI__builtin_neon_vluti2_lane_s8:
6931 case NEON::BI__builtin_neon_vluti2_lane_u16:
6932 case NEON::BI__builtin_neon_vluti2_lane_u8: {
6933 Int = Intrinsic::aarch64_neon_vluti2_lane;
6940 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
6941 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
6942 case NEON::BI__builtin_neon_vluti2q_lane_f16:
6943 case NEON::BI__builtin_neon_vluti2q_lane_p16:
6944 case NEON::BI__builtin_neon_vluti2q_lane_p8:
6945 case NEON::BI__builtin_neon_vluti2q_lane_s16:
6946 case NEON::BI__builtin_neon_vluti2q_lane_s8:
6947 case NEON::BI__builtin_neon_vluti2q_lane_u16:
6948 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
6949 Int = Intrinsic::aarch64_neon_vluti2_lane;
6956 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
6957 case NEON::BI__builtin_neon_vluti4q_lane_p8:
6958 case NEON::BI__builtin_neon_vluti4q_lane_s8:
6959 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
6960 Int = Intrinsic::aarch64_neon_vluti4q_lane;
6963 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
6964 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
6965 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
6966 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
6967 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
6970 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
6971 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
6972 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
6973 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
6974 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
6975 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
6976 return EmitNeonCall(
CGM.getIntrinsic(Int, Ty), Ops,
"vluti4q_lane_x2");
6978 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
6979 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
6980 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
6981 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
6982 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
6983 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
6984 return EmitNeonCall(
CGM.getIntrinsic(Int, Ty), Ops,
"vluti4q_laneq_x2");
6986 case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
6988 {llvm::FixedVectorType::get(
HalfTy, 8),
6989 llvm::FixedVectorType::get(
Int8Ty, 16)},
6991 case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
6993 {llvm::FixedVectorType::get(
FloatTy, 4),
6994 llvm::FixedVectorType::get(
Int8Ty, 16)},
6996 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
6999 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7000 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7002 llvm::FixedVectorType::get(
BFloatTy, 8),
7003 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt1");
7004 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7007 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7008 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7010 llvm::FixedVectorType::get(
BFloatTy, 8),
7011 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt2");
7012 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7015 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7016 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7018 llvm::FixedVectorType::get(
HalfTy, 8),
7019 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt1");
7020 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7023 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7024 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7026 llvm::FixedVectorType::get(
HalfTy, 8),
7027 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt2");
7028 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7030 llvm::FixedVectorType::get(
Int8Ty, 8),
7031 Ops[0]->
getType(),
false, Ops, E,
"vfcvtn");
7032 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7034 llvm::FixedVectorType::get(
Int8Ty, 8),
7035 llvm::FixedVectorType::get(
HalfTy, 4),
false, Ops,
7037 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7039 llvm::FixedVectorType::get(
Int8Ty, 16),
7040 llvm::FixedVectorType::get(
HalfTy, 8),
false, Ops,
7042 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7043 llvm::Type *Ty = llvm::FixedVectorType::get(
Int8Ty, 16);
7044 Ops[0] =
Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7047 Ops[1]->
getType(),
false, Ops, E,
"vfcvtn2");
7050 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7051 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7054 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7055 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7056 ExtendLaneArg =
true;
7058 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7059 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7061 ExtendLaneArg,
HalfTy, Ops, E,
"fdot2_lane");
7062 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7063 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7066 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7067 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7068 ExtendLaneArg =
true;
7070 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7071 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7073 ExtendLaneArg,
FloatTy, Ops, E,
"fdot4_lane");
7075 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7077 {llvm::FixedVectorType::get(
HalfTy, 8)}, Ops, E,
7079 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7081 {llvm::FixedVectorType::get(
HalfTy, 8)}, Ops, E,
7083 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7085 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7087 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7089 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7091 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7093 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7095 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7097 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7099 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7100 ExtendLaneArg =
true;
7102 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7104 ExtendLaneArg,
HalfTy, Ops, E,
"vmlal_lane");
7105 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7106 ExtendLaneArg =
true;
7108 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7110 ExtendLaneArg,
HalfTy, Ops, E,
"vmlal_lane");
7111 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7112 ExtendLaneArg =
true;
7114 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7116 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7117 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7118 ExtendLaneArg =
true;
7120 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7122 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7123 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7124 ExtendLaneArg =
true;
7126 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7128 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7129 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7130 ExtendLaneArg =
true;
7132 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7134 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7135 case NEON::BI__builtin_neon_vamin_f16:
7136 case NEON::BI__builtin_neon_vaminq_f16:
7137 case NEON::BI__builtin_neon_vamin_f32:
7138 case NEON::BI__builtin_neon_vaminq_f32:
7139 case NEON::BI__builtin_neon_vaminq_f64: {
7140 Int = Intrinsic::aarch64_neon_famin;
7143 case NEON::BI__builtin_neon_vamax_f16:
7144 case NEON::BI__builtin_neon_vamaxq_f16:
7145 case NEON::BI__builtin_neon_vamax_f32:
7146 case NEON::BI__builtin_neon_vamaxq_f32:
7147 case NEON::BI__builtin_neon_vamaxq_f64: {
7148 Int = Intrinsic::aarch64_neon_famax;
7151 case NEON::BI__builtin_neon_vscale_f16:
7152 case NEON::BI__builtin_neon_vscaleq_f16:
7153 case NEON::BI__builtin_neon_vscale_f32:
7154 case NEON::BI__builtin_neon_vscaleq_f32:
7155 case NEON::BI__builtin_neon_vscaleq_f64: {
7156 Int = Intrinsic::aarch64_neon_fp8_fscale;