541 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
549 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
553 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
554 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
555 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
556 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
557 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
558 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
559 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
560 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
561 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
574 NEONMAP1(vcage_v, arm_neon_vacge, 0),
575 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
576 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
577 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
578 NEONMAP1(vcale_v, arm_neon_vacge, 0),
579 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
580 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
581 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
601 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
602 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
603 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
604 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
605 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
606 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
607 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
608 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
609 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
616 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
617 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
618 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
619 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
620 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
621 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
622 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
623 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
624 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
625 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
626 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
627 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
628 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
629 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
630 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
631 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
632 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
633 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
634 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
635 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
636 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
637 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
638 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
639 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
640 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
641 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
642 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
643 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
644 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
645 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
646 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
647 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
648 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
649 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
650 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
651 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
652 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
653 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
654 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
655 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
656 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
657 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
658 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
659 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
660 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
661 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
662 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
663 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
664 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
668 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
669 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
670 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
671 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
672 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
673 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
674 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
675 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
676 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
683 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
684 NEONMAP1(vdot_u32, arm_neon_udot, 0),
685 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
686 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
697 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
698 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
699 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
701 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
702 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
703 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
704 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
705 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
706 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
708 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
709 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
710 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
711 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
712 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
714 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
715 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
716 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
717 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
718 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
720 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
721 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
722 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
731 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
732 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
750 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
751 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
775 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
776 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
780 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
781 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
804 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
805 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
809 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
810 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
811 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
812 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
813 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
814 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
824 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
825 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
826 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
827 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
828 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
829 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
830 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
831 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
833 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
834 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
835 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
837 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
838 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
839 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
841 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
842 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
848 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
849 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
850 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
1121 unsigned BuiltinID,
unsigned LLVMIntrinsic,
unsigned AltLLVMIntrinsic,
1122 const char *NameHint,
unsigned Modifier,
const CallExpr *E,
1124 llvm::Triple::ArchType
Arch) {
1130 std::optional<llvm::APSInt> NeonTypeConst =
1137 const bool Usgn =
Type.isUnsigned();
1138 const bool Quad =
Type.isQuad();
1139 const bool Floating =
Type.isFloatingPoint();
1141 const bool AllowBFloatArgsAndRet =
1144 llvm::FixedVectorType *VTy =
1145 GetNeonType(
this,
Type, HasFastHalfType,
false, AllowBFloatArgsAndRet);
1146 llvm::Type *Ty = VTy;
1150 auto getAlignmentValue32 = [&](
Address addr) ->
Value* {
1151 return Builder.getInt32(addr.getAlignment().getQuantity());
1154 unsigned Int = LLVMIntrinsic;
1156 Int = AltLLVMIntrinsic;
1158 switch (BuiltinID) {
1160 case NEON::BI__builtin_neon_splat_lane_v:
1161 case NEON::BI__builtin_neon_splat_laneq_v:
1162 case NEON::BI__builtin_neon_splatq_lane_v:
1163 case NEON::BI__builtin_neon_splatq_laneq_v: {
1164 auto NumElements = VTy->getElementCount();
1165 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1166 NumElements = NumElements * 2;
1167 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1168 NumElements = NumElements.divideCoefficientBy(2);
1170 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
1173 case NEON::BI__builtin_neon_vpadd_v:
1174 case NEON::BI__builtin_neon_vpaddq_v:
1176 if (VTy->getElementType()->isFloatingPointTy() &&
1177 Int == Intrinsic::aarch64_neon_addp)
1178 Int = Intrinsic::aarch64_neon_faddp;
1180 case NEON::BI__builtin_neon_vabs_v:
1181 case NEON::BI__builtin_neon_vabsq_v:
1182 if (VTy->getElementType()->isFloatingPointTy())
1183 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops,
"vabs");
1184 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops,
"vabs");
1185 case NEON::BI__builtin_neon_vadd_v:
1186 case NEON::BI__builtin_neon_vaddq_v: {
1187 llvm::Type *VTy = llvm::FixedVectorType::get(
Int8Ty, Quad ? 16 : 8);
1188 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
1189 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
1190 Ops[0] =
Builder.CreateXor(Ops[0], Ops[1]);
1191 return Builder.CreateBitCast(Ops[0], Ty);
1193 case NEON::BI__builtin_neon_vaddhn_v: {
1194 llvm::FixedVectorType *SrcTy =
1195 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1198 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
1199 Ops[1] =
Builder.CreateBitCast(Ops[1], SrcTy);
1200 Ops[0] =
Builder.CreateAdd(Ops[0], Ops[1],
"vaddhn");
1204 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1205 Ops[0] =
Builder.CreateLShr(Ops[0], ShiftAmt,
"vaddhn");
1208 return Builder.CreateTrunc(Ops[0], VTy,
"vaddhn");
1210 case NEON::BI__builtin_neon_vcale_v:
1211 case NEON::BI__builtin_neon_vcaleq_v:
1212 case NEON::BI__builtin_neon_vcalt_v:
1213 case NEON::BI__builtin_neon_vcaltq_v:
1214 std::swap(Ops[0], Ops[1]);
1216 case NEON::BI__builtin_neon_vcage_v:
1217 case NEON::BI__builtin_neon_vcageq_v:
1218 case NEON::BI__builtin_neon_vcagt_v:
1219 case NEON::BI__builtin_neon_vcagtq_v: {
1221 switch (VTy->getScalarSizeInBits()) {
1222 default: llvm_unreachable(
"unexpected type");
1233 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1234 llvm::Type *Tys[] = { VTy, VecFlt };
1235 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1238 case NEON::BI__builtin_neon_vceqz_v:
1239 case NEON::BI__builtin_neon_vceqzq_v:
1241 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ,
"vceqz");
1242 case NEON::BI__builtin_neon_vcgez_v:
1243 case NEON::BI__builtin_neon_vcgezq_v:
1245 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1247 case NEON::BI__builtin_neon_vclez_v:
1248 case NEON::BI__builtin_neon_vclezq_v:
1250 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1252 case NEON::BI__builtin_neon_vcgtz_v:
1253 case NEON::BI__builtin_neon_vcgtzq_v:
1255 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1257 case NEON::BI__builtin_neon_vcltz_v:
1258 case NEON::BI__builtin_neon_vcltzq_v:
1260 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1262 case NEON::BI__builtin_neon_vclz_v:
1263 case NEON::BI__builtin_neon_vclzq_v:
1268 case NEON::BI__builtin_neon_vcvt_f32_v:
1269 case NEON::BI__builtin_neon_vcvtq_f32_v:
1270 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1273 return Usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
1274 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
1275 case NEON::BI__builtin_neon_vcvt_f16_s16:
1276 case NEON::BI__builtin_neon_vcvt_f16_u16:
1277 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1278 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1279 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1282 return Usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
1283 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
1284 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1285 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1286 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1287 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1292 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1293 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1294 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1295 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1297 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1301 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1302 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1303 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1304 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1305 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1306 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1307 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1308 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1309 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1310 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1311 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1312 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1314 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1317 case NEON::BI__builtin_neon_vcvt_s32_v:
1318 case NEON::BI__builtin_neon_vcvt_u32_v:
1319 case NEON::BI__builtin_neon_vcvt_s64_v:
1320 case NEON::BI__builtin_neon_vcvt_u64_v:
1321 case NEON::BI__builtin_neon_vcvt_s16_f16:
1322 case NEON::BI__builtin_neon_vcvt_u16_f16:
1323 case NEON::BI__builtin_neon_vcvtq_s32_v:
1324 case NEON::BI__builtin_neon_vcvtq_u32_v:
1325 case NEON::BI__builtin_neon_vcvtq_s64_v:
1326 case NEON::BI__builtin_neon_vcvtq_u64_v:
1327 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1328 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1332 if (!
Builder.getIsFPConstrained())
1333 Int = Usgn ? Intrinsic::fptoui_sat : Intrinsic::fptosi_sat;
1334 llvm::Type *Tys[2] = {Ty, Ops[0]->getType()};
1339 return Usgn ?
Builder.CreateFPToUI(Ops[0], Ty,
"vcvt")
1340 :
Builder.CreateFPToSI(Ops[0], Ty,
"vcvt");
1342 case NEON::BI__builtin_neon_vcvta_s16_f16:
1343 case NEON::BI__builtin_neon_vcvta_s32_v:
1344 case NEON::BI__builtin_neon_vcvta_s64_v:
1345 case NEON::BI__builtin_neon_vcvta_u16_f16:
1346 case NEON::BI__builtin_neon_vcvta_u32_v:
1347 case NEON::BI__builtin_neon_vcvta_u64_v:
1348 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
1349 case NEON::BI__builtin_neon_vcvtaq_s32_v:
1350 case NEON::BI__builtin_neon_vcvtaq_s64_v:
1351 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
1352 case NEON::BI__builtin_neon_vcvtaq_u32_v:
1353 case NEON::BI__builtin_neon_vcvtaq_u64_v:
1354 case NEON::BI__builtin_neon_vcvtn_s16_f16:
1355 case NEON::BI__builtin_neon_vcvtn_s32_v:
1356 case NEON::BI__builtin_neon_vcvtn_s64_v:
1357 case NEON::BI__builtin_neon_vcvtn_u16_f16:
1358 case NEON::BI__builtin_neon_vcvtn_u32_v:
1359 case NEON::BI__builtin_neon_vcvtn_u64_v:
1360 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
1361 case NEON::BI__builtin_neon_vcvtnq_s32_v:
1362 case NEON::BI__builtin_neon_vcvtnq_s64_v:
1363 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
1364 case NEON::BI__builtin_neon_vcvtnq_u32_v:
1365 case NEON::BI__builtin_neon_vcvtnq_u64_v:
1366 case NEON::BI__builtin_neon_vcvtp_s16_f16:
1367 case NEON::BI__builtin_neon_vcvtp_s32_v:
1368 case NEON::BI__builtin_neon_vcvtp_s64_v:
1369 case NEON::BI__builtin_neon_vcvtp_u16_f16:
1370 case NEON::BI__builtin_neon_vcvtp_u32_v:
1371 case NEON::BI__builtin_neon_vcvtp_u64_v:
1372 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
1373 case NEON::BI__builtin_neon_vcvtpq_s32_v:
1374 case NEON::BI__builtin_neon_vcvtpq_s64_v:
1375 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
1376 case NEON::BI__builtin_neon_vcvtpq_u32_v:
1377 case NEON::BI__builtin_neon_vcvtpq_u64_v:
1378 case NEON::BI__builtin_neon_vcvtm_s16_f16:
1379 case NEON::BI__builtin_neon_vcvtm_s32_v:
1380 case NEON::BI__builtin_neon_vcvtm_s64_v:
1381 case NEON::BI__builtin_neon_vcvtm_u16_f16:
1382 case NEON::BI__builtin_neon_vcvtm_u32_v:
1383 case NEON::BI__builtin_neon_vcvtm_u64_v:
1384 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
1385 case NEON::BI__builtin_neon_vcvtmq_s32_v:
1386 case NEON::BI__builtin_neon_vcvtmq_s64_v:
1387 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
1388 case NEON::BI__builtin_neon_vcvtmq_u32_v:
1389 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
1391 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
1393 case NEON::BI__builtin_neon_vcvtx_f32_v: {
1394 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
1395 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
1398 case NEON::BI__builtin_neon_vext_v:
1399 case NEON::BI__builtin_neon_vextq_v: {
1402 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1403 Indices.push_back(i+CV);
1405 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1406 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
1407 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices,
"vext");
1409 case NEON::BI__builtin_neon_vfma_v:
1410 case NEON::BI__builtin_neon_vfmaq_v: {
1411 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1412 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
1413 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
1417 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
1418 {Ops[1], Ops[2], Ops[0]});
1420 case NEON::BI__builtin_neon_vld1_v:
1421 case NEON::BI__builtin_neon_vld1q_v: {
1423 Ops.push_back(getAlignmentValue32(PtrOp0));
1424 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops,
"vld1");
1426 case NEON::BI__builtin_neon_vld1_x2_v:
1427 case NEON::BI__builtin_neon_vld1q_x2_v:
1428 case NEON::BI__builtin_neon_vld1_x3_v:
1429 case NEON::BI__builtin_neon_vld1q_x3_v:
1430 case NEON::BI__builtin_neon_vld1_x4_v:
1431 case NEON::BI__builtin_neon_vld1q_x4_v: {
1433 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1434 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld1xN");
1435 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1437 case NEON::BI__builtin_neon_vld2_v:
1438 case NEON::BI__builtin_neon_vld2q_v:
1439 case NEON::BI__builtin_neon_vld3_v:
1440 case NEON::BI__builtin_neon_vld3q_v:
1441 case NEON::BI__builtin_neon_vld4_v:
1442 case NEON::BI__builtin_neon_vld4q_v:
1443 case NEON::BI__builtin_neon_vld2_dup_v:
1444 case NEON::BI__builtin_neon_vld2q_dup_v:
1445 case NEON::BI__builtin_neon_vld3_dup_v:
1446 case NEON::BI__builtin_neon_vld3q_dup_v:
1447 case NEON::BI__builtin_neon_vld4_dup_v:
1448 case NEON::BI__builtin_neon_vld4q_dup_v: {
1450 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1451 Value *Align = getAlignmentValue32(PtrOp1);
1452 Ops[1] =
Builder.CreateCall(F, {Ops[1], Align}, NameHint);
1453 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1455 case NEON::BI__builtin_neon_vld1_dup_v:
1456 case NEON::BI__builtin_neon_vld1q_dup_v: {
1457 Value *
V = PoisonValue::get(Ty);
1459 LoadInst *Ld =
Builder.CreateLoad(PtrOp0);
1460 llvm::Constant *CI = ConstantInt::get(
SizeTy, 0);
1461 Ops[0] =
Builder.CreateInsertElement(
V, Ld, CI);
1464 case NEON::BI__builtin_neon_vld2_lane_v:
1465 case NEON::BI__builtin_neon_vld2q_lane_v:
1466 case NEON::BI__builtin_neon_vld3_lane_v:
1467 case NEON::BI__builtin_neon_vld3q_lane_v:
1468 case NEON::BI__builtin_neon_vld4_lane_v:
1469 case NEON::BI__builtin_neon_vld4q_lane_v: {
1471 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1472 for (
unsigned I = 2; I < Ops.size() - 1; ++I)
1473 Ops[I] =
Builder.CreateBitCast(Ops[I], Ty);
1474 Ops.push_back(getAlignmentValue32(PtrOp1));
1476 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
1478 case NEON::BI__builtin_neon_vmovl_v: {
1479 llvm::FixedVectorType *DTy =
1480 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
1481 Ops[0] =
Builder.CreateBitCast(Ops[0], DTy);
1483 return Builder.CreateZExt(Ops[0], Ty,
"vmovl");
1484 return Builder.CreateSExt(Ops[0], Ty,
"vmovl");
1486 case NEON::BI__builtin_neon_vmovn_v: {
1487 llvm::FixedVectorType *QTy =
1488 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1489 Ops[0] =
Builder.CreateBitCast(Ops[0], QTy);
1490 return Builder.CreateTrunc(Ops[0], Ty,
"vmovn");
1492 case NEON::BI__builtin_neon_vmull_v:
1498 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
1499 Int =
Type.isPoly() ? (
unsigned)Intrinsic::arm_neon_vmullp : Int;
1501 case NEON::BI__builtin_neon_vpadal_v:
1502 case NEON::BI__builtin_neon_vpadalq_v: {
1504 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1508 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
1509 llvm::Type *Tys[2] = { Ty, NarrowTy };
1512 case NEON::BI__builtin_neon_vpaddl_v:
1513 case NEON::BI__builtin_neon_vpaddlq_v: {
1515 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1516 llvm::Type *EltTy = llvm::IntegerType::get(
getLLVMContext(), EltBits / 2);
1518 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
1519 llvm::Type *Tys[2] = { Ty, NarrowTy };
1522 case NEON::BI__builtin_neon_vqdmlal_v:
1523 case NEON::BI__builtin_neon_vqdmlsl_v: {
1528 return EmitNeonCall(
CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
1530 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
1531 case NEON::BI__builtin_neon_vqdmulh_lane_v:
1532 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
1533 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
1535 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
1536 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
1537 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
1538 RTy->getNumElements() * 2);
1539 llvm::Type *Tys[2] = {
1544 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
1545 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
1546 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
1547 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
1548 llvm::Type *Tys[2] = {
1553 case NEON::BI__builtin_neon_vqshl_n_v:
1554 case NEON::BI__builtin_neon_vqshlq_n_v:
1557 case NEON::BI__builtin_neon_vqshlu_n_v:
1558 case NEON::BI__builtin_neon_vqshluq_n_v:
1561 case NEON::BI__builtin_neon_vrecpe_v:
1562 case NEON::BI__builtin_neon_vrecpeq_v:
1563 case NEON::BI__builtin_neon_vrsqrte_v:
1564 case NEON::BI__builtin_neon_vrsqrteq_v:
1565 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
1567 case NEON::BI__builtin_neon_vrndi_v:
1568 case NEON::BI__builtin_neon_vrndiq_v:
1569 Int =
Builder.getIsFPConstrained()
1570 ? Intrinsic::experimental_constrained_nearbyint
1571 : Intrinsic::nearbyint;
1573 case NEON::BI__builtin_neon_vrshr_n_v:
1574 case NEON::BI__builtin_neon_vrshrq_n_v:
1577 case NEON::BI__builtin_neon_vsha512hq_u64:
1578 case NEON::BI__builtin_neon_vsha512h2q_u64:
1579 case NEON::BI__builtin_neon_vsha512su0q_u64:
1580 case NEON::BI__builtin_neon_vsha512su1q_u64: {
1584 case NEON::BI__builtin_neon_vshl_n_v:
1585 case NEON::BI__builtin_neon_vshlq_n_v:
1587 return Builder.CreateShl(
Builder.CreateBitCast(Ops[0],Ty), Ops[1],
1589 case NEON::BI__builtin_neon_vshll_n_v: {
1590 llvm::FixedVectorType *SrcTy =
1591 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
1592 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
1594 Ops[0] =
Builder.CreateZExt(Ops[0], VTy);
1596 Ops[0] =
Builder.CreateSExt(Ops[0], VTy);
1598 return Builder.CreateShl(Ops[0], Ops[1],
"vshll_n");
1600 case NEON::BI__builtin_neon_vshrn_n_v: {
1601 llvm::FixedVectorType *SrcTy =
1602 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1603 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
1606 Ops[0] =
Builder.CreateLShr(Ops[0], Ops[1]);
1608 Ops[0] =
Builder.CreateAShr(Ops[0], Ops[1]);
1609 return Builder.CreateTrunc(Ops[0], Ty,
"vshrn_n");
1611 case NEON::BI__builtin_neon_vshr_n_v:
1612 case NEON::BI__builtin_neon_vshrq_n_v:
1614 case NEON::BI__builtin_neon_vst1_v:
1615 case NEON::BI__builtin_neon_vst1q_v:
1616 case NEON::BI__builtin_neon_vst2_v:
1617 case NEON::BI__builtin_neon_vst2q_v:
1618 case NEON::BI__builtin_neon_vst3_v:
1619 case NEON::BI__builtin_neon_vst3q_v:
1620 case NEON::BI__builtin_neon_vst4_v:
1621 case NEON::BI__builtin_neon_vst4q_v:
1622 case NEON::BI__builtin_neon_vst2_lane_v:
1623 case NEON::BI__builtin_neon_vst2q_lane_v:
1624 case NEON::BI__builtin_neon_vst3_lane_v:
1625 case NEON::BI__builtin_neon_vst3q_lane_v:
1626 case NEON::BI__builtin_neon_vst4_lane_v:
1627 case NEON::BI__builtin_neon_vst4q_lane_v: {
1629 Ops.push_back(getAlignmentValue32(PtrOp0));
1632 case NEON::BI__builtin_neon_vsm3partw1q_u32:
1633 case NEON::BI__builtin_neon_vsm3partw2q_u32:
1634 case NEON::BI__builtin_neon_vsm3ss1q_u32:
1635 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
1636 case NEON::BI__builtin_neon_vsm4eq_u32: {
1640 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
1641 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
1642 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
1643 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
1648 case NEON::BI__builtin_neon_vst1_x2_v:
1649 case NEON::BI__builtin_neon_vst1q_x2_v:
1650 case NEON::BI__builtin_neon_vst1_x3_v:
1651 case NEON::BI__builtin_neon_vst1q_x3_v:
1652 case NEON::BI__builtin_neon_vst1_x4_v:
1653 case NEON::BI__builtin_neon_vst1q_x4_v: {
1656 if (
Arch == llvm::Triple::aarch64 ||
Arch == llvm::Triple::aarch64_be ||
1657 Arch == llvm::Triple::aarch64_32) {
1659 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
1665 case NEON::BI__builtin_neon_vsubhn_v: {
1666 llvm::FixedVectorType *SrcTy =
1667 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1670 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
1671 Ops[1] =
Builder.CreateBitCast(Ops[1], SrcTy);
1672 Ops[0] =
Builder.CreateSub(Ops[0], Ops[1],
"vsubhn");
1676 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1677 Ops[0] =
Builder.CreateLShr(Ops[0], ShiftAmt,
"vsubhn");
1680 return Builder.CreateTrunc(Ops[0], VTy,
"vsubhn");
1682 case NEON::BI__builtin_neon_vtrn_v:
1683 case NEON::BI__builtin_neon_vtrnq_v: {
1684 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
1685 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
1686 Value *SV =
nullptr;
1688 for (
unsigned vi = 0; vi != 2; ++vi) {
1690 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1691 Indices.push_back(i+vi);
1692 Indices.push_back(i+e+vi);
1695 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vtrn");
1700 case NEON::BI__builtin_neon_vtst_v:
1701 case NEON::BI__builtin_neon_vtstq_v: {
1702 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1703 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
1704 Ops[0] =
Builder.CreateAnd(Ops[0], Ops[1]);
1705 Ops[0] =
Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
1706 ConstantAggregateZero::get(Ty));
1707 return Builder.CreateSExt(Ops[0], Ty,
"vtst");
1709 case NEON::BI__builtin_neon_vuzp_v:
1710 case NEON::BI__builtin_neon_vuzpq_v: {
1711 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
1712 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
1713 Value *SV =
nullptr;
1715 for (
unsigned vi = 0; vi != 2; ++vi) {
1717 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1718 Indices.push_back(2*i+vi);
1721 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vuzp");
1726 case NEON::BI__builtin_neon_vxarq_u64: {
1731 case NEON::BI__builtin_neon_vzip_v:
1732 case NEON::BI__builtin_neon_vzipq_v: {
1733 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
1734 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
1735 Value *SV =
nullptr;
1737 for (
unsigned vi = 0; vi != 2; ++vi) {
1739 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1740 Indices.push_back((i + vi*e) >> 1);
1741 Indices.push_back(((i + vi*e) >> 1)+e);
1744 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vzip");
1749 case NEON::BI__builtin_neon_vdot_s32:
1750 case NEON::BI__builtin_neon_vdot_u32:
1751 case NEON::BI__builtin_neon_vdotq_s32:
1752 case NEON::BI__builtin_neon_vdotq_u32: {
1754 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1755 llvm::Type *Tys[2] = { Ty, InputTy };
1758 case NEON::BI__builtin_neon_vfmlal_low_f16:
1759 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
1761 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1762 llvm::Type *Tys[2] = { Ty, InputTy };
1765 case NEON::BI__builtin_neon_vfmlsl_low_f16:
1766 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
1768 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1769 llvm::Type *Tys[2] = { Ty, InputTy };
1772 case NEON::BI__builtin_neon_vfmlal_high_f16:
1773 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
1775 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1776 llvm::Type *Tys[2] = { Ty, InputTy };
1779 case NEON::BI__builtin_neon_vfmlsl_high_f16:
1780 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
1782 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1783 llvm::Type *Tys[2] = { Ty, InputTy };
1786 case NEON::BI__builtin_neon_vmmlaq_s32:
1787 case NEON::BI__builtin_neon_vmmlaq_u32: {
1789 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1790 llvm::Type *Tys[2] = { Ty, InputTy };
1791 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops,
"vmmla");
1793 case NEON::BI__builtin_neon_vmmlaq_f16_f16:
1794 case NEON::BI__builtin_neon_vmmlaq_f32_f16: {
1796 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
1797 llvm::Type *Tys[2] = {Ty, InputTy};
1800 case NEON::BI__builtin_neon_vusmmlaq_s32: {
1802 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1803 llvm::Type *Tys[2] = { Ty, InputTy };
1806 case NEON::BI__builtin_neon_vusdot_s32:
1807 case NEON::BI__builtin_neon_vusdotq_s32: {
1809 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
1810 llvm::Type *Tys[2] = { Ty, InputTy };
1813 case NEON::BI__builtin_neon_vbfdot_f32:
1814 case NEON::BI__builtin_neon_vbfdotq_f32: {
1815 llvm::Type *InputTy =
1816 llvm::FixedVectorType::get(
BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
1817 llvm::Type *Tys[2] = { Ty, InputTy };
1820 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
1821 llvm::Type *Tys[1] = { Ty };
1828 assert(Int &&
"Expected valid intrinsic number");
4435 llvm::Triple::ArchType
Arch) {
4444 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
4445 return EmitAArch64CpuSupports(E);
4447 unsigned HintID =
static_cast<unsigned>(-1);
4448 switch (BuiltinID) {
4450 case clang::AArch64::BI__builtin_arm_nop:
4453 case clang::AArch64::BI__builtin_arm_yield:
4454 case clang::AArch64::BI__yield:
4457 case clang::AArch64::BI__builtin_arm_wfe:
4458 case clang::AArch64::BI__wfe:
4461 case clang::AArch64::BI__builtin_arm_wfi:
4462 case clang::AArch64::BI__wfi:
4465 case clang::AArch64::BI__builtin_arm_sev:
4466 case clang::AArch64::BI__sev:
4469 case clang::AArch64::BI__builtin_arm_sevl:
4470 case clang::AArch64::BI__sevl:
4475 if (HintID !=
static_cast<unsigned>(-1)) {
4476 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_hint);
4477 return Builder.CreateCall(F, llvm::ConstantInt::get(
Int32Ty, HintID));
4480 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
4481 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_break);
4486 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
4489 llvm::FunctionType::get(StructType::get(
CGM.Int64Ty,
CGM.Int64Ty), {},
4491 "__arm_sme_state"));
4493 "aarch64_pstate_sm_compatible");
4494 CI->setAttributes(Attrs);
4497 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
4504 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
4506 "rbit of unusual size!");
4509 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
4511 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
4513 "rbit of unusual size!");
4516 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
4519 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
4520 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
4522 Function *F =
CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
4524 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
4529 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
4531 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
4534 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
4536 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
4540 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
4541 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
4543 llvm::Type *Ty = Arg->getType();
4544 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
4548 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
4549 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
4551 llvm::Type *Ty = Arg->getType();
4552 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
4556 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
4557 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
4559 llvm::Type *Ty = Arg->getType();
4560 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
4564 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
4565 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
4567 llvm::Type *Ty = Arg->getType();
4568 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
4572 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
4574 "__jcvt of unusual size!");
4577 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
4580 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
4581 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
4582 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
4583 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
4587 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
4590 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
4591 llvm::Value *Val =
Builder.CreateCall(F, MemAddr);
4593 for (
size_t i = 0; i < 8; i++) {
4594 llvm::Value *ValOffsetPtr =
4606 Args.push_back(MemAddr);
4607 for (
size_t i = 0; i < 8; i++) {
4608 llvm::Value *ValOffsetPtr =
4614 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
4615 ? Intrinsic::aarch64_st64b
4616 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
4617 ? Intrinsic::aarch64_st64bv
4618 : Intrinsic::aarch64_st64bv0);
4620 return Builder.CreateCall(F, Args);
4623 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
4624 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
4626 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
4627 ? Intrinsic::aarch64_rndr
4628 : Intrinsic::aarch64_rndrrs);
4630 llvm::Value *Val =
Builder.CreateCall(F);
4631 Value *RandomValue =
Builder.CreateExtractValue(Val, 0);
4635 Builder.CreateStore(RandomValue, MemAddress);
4640 if (BuiltinID == clang::AArch64::BI__clear_cache) {
4641 assert(E->
getNumArgs() == 2 &&
"__clear_cache takes 2 arguments");
4644 for (
unsigned i = 0; i < 2; i++)
4646 llvm::Type *Ty =
CGM.getTypes().ConvertType(FD->
getType());
4648 StringRef Name = FD->
getName();
4652 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
4653 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
4656 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
4657 ? Intrinsic::aarch64_ldaxp
4658 : Intrinsic::aarch64_ldxp);
4665 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
4666 Val0 =
Builder.CreateZExt(Val0, Int128Ty);
4667 Val1 =
Builder.CreateZExt(Val1, Int128Ty);
4669 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
4670 Val =
Builder.CreateShl(Val0, ShiftCst,
"shl",
true );
4671 Val =
Builder.CreateOr(Val, Val1);
4673 }
else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
4674 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
4683 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
4684 ? Intrinsic::aarch64_ldaxr
4685 : Intrinsic::aarch64_ldxr,
4687 CallInst *Val =
Builder.CreateCall(F, LoadAddr,
"ldxr");
4691 if (RealResTy->isPointerTy())
4692 return Builder.CreateIntToPtr(Val, RealResTy);
4694 llvm::Type *IntResTy = llvm::IntegerType::get(
4696 return Builder.CreateBitCast(
Builder.CreateTruncOrBitCast(Val, IntResTy),
4700 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
4701 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
4704 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
4705 ? Intrinsic::aarch64_stlxp
4706 : Intrinsic::aarch64_stxp);
4713 llvm::Value *Val =
Builder.CreateLoad(Tmp);
4718 return Builder.CreateCall(F, {Arg0, Arg1, StPtr},
"stxp");
4721 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
4722 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
4727 llvm::Type *StoreTy =
4730 if (StoreVal->
getType()->isPointerTy())
4733 llvm::Type *
IntTy = llvm::IntegerType::get(
4735 CGM.getDataLayout().getTypeSizeInBits(StoreVal->
getType()));
4741 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
4742 ? Intrinsic::aarch64_stlxr
4743 : Intrinsic::aarch64_stxr,
4745 CallInst *CI =
Builder.CreateCall(F, {StoreVal, StoreAddr},
"stxr");
4747 1, Attribute::get(
getLLVMContext(), Attribute::ElementType, StoreTy));
4751 if (BuiltinID == clang::AArch64::BI__getReg ||
4752 BuiltinID == clang::AArch64::BI__setReg) {
4755 llvm_unreachable(
"Sema will ensure that the parameter is constant");
4758 LLVMContext &Context =
CGM.getLLVMContext();
4761 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
4762 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4763 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4766 if (BuiltinID == clang::AArch64::BI__getReg) {
4768 CGM.getIntrinsic(Intrinsic::read_volatile_register, {
Int64Ty});
4769 CI =
Builder.CreateCall(F, Metadata);
4772 CGM.getIntrinsic(Intrinsic::write_volatile_register, {
Int64Ty});
4778 if (BuiltinID == clang::AArch64::BI__getRegFp ||
4779 BuiltinID == clang::AArch64::BI__setRegFp) {
4782 llvm_unreachable(
"Sema will ensure that the parameter is constant");
4785 LLVMContext &Context =
CGM.getLLVMContext();
4788 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
4789 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4790 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4793 if (BuiltinID == clang::AArch64::BI__getRegFp) {
4795 CGM.getIntrinsic(Intrinsic::read_volatile_register, {
Int64Ty});
4796 llvm::Value *Bits =
Builder.CreateCall(F, Metadata);
4797 Ret =
Builder.CreateBitCast(Bits, llvm::Type::getDoubleTy(Context));
4802 CGM.getIntrinsic(Intrinsic::write_volatile_register, {
Int64Ty});
4803 Ret =
Builder.CreateCall(F, {Metadata, Bits});
4808 if (BuiltinID == clang::AArch64::BI__break) {
4811 llvm_unreachable(
"Sema will ensure that the parameter is constant");
4813 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_break);
4817 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
4818 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_clrex);
4822 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
4823 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
4824 llvm::SyncScope::SingleThread);
4827 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4828 switch (BuiltinID) {
4829 case clang::AArch64::BI__builtin_arm_crc32b:
4830 CRCIntrinsicID = Intrinsic::aarch64_crc32b;
break;
4831 case clang::AArch64::BI__builtin_arm_crc32cb:
4832 CRCIntrinsicID = Intrinsic::aarch64_crc32cb;
break;
4833 case clang::AArch64::BI__builtin_arm_crc32h:
4834 CRCIntrinsicID = Intrinsic::aarch64_crc32h;
break;
4835 case clang::AArch64::BI__builtin_arm_crc32ch:
4836 CRCIntrinsicID = Intrinsic::aarch64_crc32ch;
break;
4837 case clang::AArch64::BI__builtin_arm_crc32w:
4838 CRCIntrinsicID = Intrinsic::aarch64_crc32w;
break;
4839 case clang::AArch64::BI__builtin_arm_crc32cw:
4840 CRCIntrinsicID = Intrinsic::aarch64_crc32cw;
break;
4841 case clang::AArch64::BI__builtin_arm_crc32d:
4842 CRCIntrinsicID = Intrinsic::aarch64_crc32x;
break;
4843 case clang::AArch64::BI__builtin_arm_crc32cd:
4844 CRCIntrinsicID = Intrinsic::aarch64_crc32cx;
break;
4847 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4852 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
4853 Arg1 =
Builder.CreateZExtOrBitCast(Arg1, DataTy);
4855 return Builder.CreateCall(F, {Arg0, Arg1});
4859 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
4866 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
4869 if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch ||
4870 BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
4874 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
4875 switch (BuiltinID) {
4876 case clang::AArch64::BI__builtin_arm_irg:
4877 MTEIntrinsicID = Intrinsic::aarch64_irg;
break;
4878 case clang::AArch64::BI__builtin_arm_addg:
4879 MTEIntrinsicID = Intrinsic::aarch64_addg;
break;
4880 case clang::AArch64::BI__builtin_arm_gmi:
4881 MTEIntrinsicID = Intrinsic::aarch64_gmi;
break;
4882 case clang::AArch64::BI__builtin_arm_ldg:
4883 MTEIntrinsicID = Intrinsic::aarch64_ldg;
break;
4884 case clang::AArch64::BI__builtin_arm_stg:
4885 MTEIntrinsicID = Intrinsic::aarch64_stg;
break;
4886 case clang::AArch64::BI__builtin_arm_subp:
4887 MTEIntrinsicID = Intrinsic::aarch64_subp;
break;
4890 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
4891 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
4894 assert(Mask->
getType()->getScalarSizeInBits() == 64 &&
4895 "SemaARM::BuiltinARMMemoryTaggingCall() enforces this");
4896 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
4899 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
4904 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
4905 {Pointer, TagOffset});
4907 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
4910 assert(ExcludedMask->
getType()->getScalarSizeInBits() == 64 &&
4911 "SemaARM::BuiltinARMMemoryTaggingCall() enforces this");
4912 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
4913 {Pointer, ExcludedMask});
4918 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
4920 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
4921 {TagAddress, TagAddress});
4926 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
4928 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
4929 {TagAddress, TagAddress});
4931 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
4935 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
4939 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4940 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
4941 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4942 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
4943 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
4944 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
4945 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
4946 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
4949 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4950 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
4951 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4952 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
4955 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
4956 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
4958 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4959 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
4961 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4962 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
4964 llvm::Type *ValueType;
4968 }
else if (Is128Bit) {
4969 llvm::Type *Int128Ty =
4970 llvm::IntegerType::getInt128Ty(
CGM.getLLVMContext());
4971 ValueType = Int128Ty;
4973 }
else if (IsPointerBuiltin) {
4983 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
4984 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
4985 LLVMContext &Context =
CGM.getLLVMContext();
4990 std::string SysRegStr;
4991 llvm::raw_string_ostream(SysRegStr)
4992 << (0b10 | SysReg >> 14) <<
":" << ((SysReg >> 11) & 7) <<
":"
4993 << ((SysReg >> 7) & 15) <<
":" << ((SysReg >> 3) & 15) <<
":"
4996 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
4997 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4998 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5003 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5004 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::read_register, Types);
5006 return Builder.CreateCall(F, Metadata);
5009 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::write_register, Types);
5011 llvm::Value *
Result =
Builder.CreateCall(F, {Metadata, ArgValue});
5016 if (BuiltinID == clang::AArch64::BI__sys) {
5019 const unsigned Op1 = SysReg >> 11;
5020 const unsigned CRn = (SysReg >> 7) & 0xf;
5021 const unsigned CRm = (SysReg >> 3) & 0xf;
5022 const unsigned Op2 = SysReg & 0x7;
5024 Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_sys),
5025 {Builder.getInt32(Op1), Builder.getInt32(CRn),
5026 Builder.getInt32(CRm), Builder.getInt32(Op2),
5027 EmitScalarExpr(E->getArg(1))});
5031 return ConstantInt::get(
Builder.getInt32Ty(), 0);
5034 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5040 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5045 if (BuiltinID == clang::AArch64::BI__mulh ||
5046 BuiltinID == clang::AArch64::BI__umulh) {
5048 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
5050 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5056 Value *MulResult, *HigherBits;
5058 MulResult =
Builder.CreateNSWMul(LHS, RHS);
5059 HigherBits =
Builder.CreateAShr(MulResult, 64);
5061 MulResult =
Builder.CreateNUWMul(LHS, RHS);
5062 HigherBits =
Builder.CreateLShr(MulResult, 64);
5064 HigherBits =
Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5069 if (BuiltinID == AArch64::BI__writex18byte ||
5070 BuiltinID == AArch64::BI__writex18word ||
5071 BuiltinID == AArch64::BI__writex18dword ||
5072 BuiltinID == AArch64::BI__writex18qword) {
5088 if (BuiltinID == AArch64::BI__readx18byte ||
5089 BuiltinID == AArch64::BI__readx18word ||
5090 BuiltinID == AArch64::BI__readx18dword ||
5091 BuiltinID == AArch64::BI__readx18qword) {
5106 if (BuiltinID == AArch64::BI__addx18byte ||
5107 BuiltinID == AArch64::BI__addx18word ||
5108 BuiltinID == AArch64::BI__addx18dword ||
5109 BuiltinID == AArch64::BI__addx18qword ||
5110 BuiltinID == AArch64::BI__incx18byte ||
5111 BuiltinID == AArch64::BI__incx18word ||
5112 BuiltinID == AArch64::BI__incx18dword ||
5113 BuiltinID == AArch64::BI__incx18qword) {
5116 switch (BuiltinID) {
5117 case AArch64::BI__incx18byte:
5121 case AArch64::BI__incx18word:
5125 case AArch64::BI__incx18dword:
5129 case AArch64::BI__incx18qword:
5135 isIncrement =
false;
5160 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5161 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5162 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5163 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5166 return Builder.CreateBitCast(Arg, RetTy);
5169 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5170 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5171 BuiltinID == AArch64::BI_CountLeadingZeros ||
5172 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5176 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5177 BuiltinID == AArch64::BI_CountLeadingOnes64)
5178 Arg =
Builder.CreateXor(Arg, Constant::getAllOnesValue(
ArgType));
5183 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5184 BuiltinID == AArch64::BI_CountLeadingZeros64)
5189 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5190 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5193 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5194 ?
CGM.getIntrinsic(Intrinsic::aarch64_cls)
5195 :
CGM.getIntrinsic(Intrinsic::aarch64_cls64);
5198 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5203 if (BuiltinID == AArch64::BI_CountOneBits ||
5204 BuiltinID == AArch64::BI_CountOneBits64) {
5210 if (BuiltinID == AArch64::BI_CountOneBits64)
5215 if (BuiltinID == AArch64::BI_CountTrailingZeros ||
5216 BuiltinID == AArch64::BI_CountTrailingZeros64) {
5223 if (BuiltinID == AArch64::BI_CountTrailingZeros64)
5228 if (BuiltinID == AArch64::BI__prefetch) {
5237 if (BuiltinID == AArch64::BI__prefetch2) {
5245 uint64_t Op = PrfOp.getZExtValue();
5246 uint64_t
Type = (Op >> 3) & 0x3;
5247 uint64_t
Target = (Op >> 1) & 0x3;
5248 uint64_t Policy = Op & 0x1;
5253 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_prefetch);
5254 return Builder.CreateCall(F, {
Address, RW, Local, IsStream, IsData});
5257 if (BuiltinID == AArch64::BI__hlt) {
5258 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_hlt);
5263 return ConstantInt::get(
Builder.getInt32Ty(), 0);
5266 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5274 if (std::optional<MSVCIntrin> MsvcIntId =
5280 return P.first == BuiltinID;
5283 BuiltinID = It->second;
5289 bool IsSISD = (
Builtin !=
nullptr);
5293 unsigned ICEArguments = 0;
5304 unsigned NumArgs = E->
getNumArgs() - (HasExtraArg ? 1 : 0);
5305 for (
unsigned i = 0, e = NumArgs; i != e; i++) {
5307 switch (BuiltinID) {
5308 case NEON::BI__builtin_neon_vld1_v:
5309 case NEON::BI__builtin_neon_vld1q_v:
5310 case NEON::BI__builtin_neon_vld1_dup_v:
5311 case NEON::BI__builtin_neon_vld1q_dup_v:
5312 case NEON::BI__builtin_neon_vld1_lane_v:
5313 case NEON::BI__builtin_neon_vld1q_lane_v:
5314 case NEON::BI__builtin_neon_vst1_v:
5315 case NEON::BI__builtin_neon_vst1q_v:
5316 case NEON::BI__builtin_neon_vst1_lane_v:
5317 case NEON::BI__builtin_neon_vst1q_lane_v:
5318 case NEON::BI__builtin_neon_vldap1_lane_s64:
5319 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5320 case NEON::BI__builtin_neon_vstl1_lane_s64:
5321 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5334 assert(
Result &&
"SISD intrinsic should have been handled");
5340 if (std::optional<llvm::APSInt>
Result =
5345 bool usgn =
Type.isUnsigned();
5346 bool quad =
Type.isQuad();
5365 switch (BuiltinID) {
5367 case NEON::BI__builtin_neon_vabsh_f16:
5369 case NEON::BI__builtin_neon_vaddq_p128: {
5371 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
5372 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
5373 Ops[0] =
Builder.CreateXor(Ops[0], Ops[1]);
5374 llvm::Type *Int128Ty = llvm::Type::getIntNTy(
getLLVMContext(), 128);
5375 return Builder.CreateBitCast(Ops[0], Int128Ty);
5377 case NEON::BI__builtin_neon_vldrq_p128: {
5378 llvm::Type *Int128Ty = llvm::Type::getIntNTy(
getLLVMContext(), 128);
5379 return Builder.CreateAlignedLoad(Int128Ty, Ops[0],
5382 case NEON::BI__builtin_neon_vstrq_p128: {
5383 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5385 case NEON::BI__builtin_neon_vcvts_f32_u32:
5386 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5389 case NEON::BI__builtin_neon_vcvts_f32_s32:
5390 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5391 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5394 Ops[0] =
Builder.CreateBitCast(Ops[0], InTy);
5396 return Builder.CreateUIToFP(Ops[0], FTy);
5397 return Builder.CreateSIToFP(Ops[0], FTy);
5399 case NEON::BI__builtin_neon_vcvth_f16_u16:
5400 case NEON::BI__builtin_neon_vcvth_f16_u32:
5401 case NEON::BI__builtin_neon_vcvth_f16_u64:
5404 case NEON::BI__builtin_neon_vcvth_f16_s16:
5405 case NEON::BI__builtin_neon_vcvth_f16_s32:
5406 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5407 llvm::Type *FTy =
HalfTy;
5409 if (Ops[0]->
getType()->getPrimitiveSizeInBits() == 64)
5411 else if (Ops[0]->
getType()->getPrimitiveSizeInBits() == 32)
5415 Ops[0] =
Builder.CreateBitCast(Ops[0], InTy);
5417 return Builder.CreateUIToFP(Ops[0], FTy);
5418 return Builder.CreateSIToFP(Ops[0], FTy);
5420 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5421 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5422 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5423 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5424 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5425 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5426 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5427 case NEON::BI__builtin_neon_vcvtph_s16_f16: {
5429 llvm::Type* FTy =
HalfTy;
5430 llvm::Type *Tys[2] = {InTy, FTy};
5431 switch (BuiltinID) {
5432 default: llvm_unreachable(
"missing builtin ID in switch!");
5433 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5434 Int = Intrinsic::aarch64_neon_fcvtau;
break;
5435 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5436 Int = Intrinsic::aarch64_neon_fcvtmu;
break;
5437 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5438 Int = Intrinsic::aarch64_neon_fcvtnu;
break;
5439 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5440 Int = Intrinsic::aarch64_neon_fcvtpu;
break;
5441 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5442 Int = Intrinsic::aarch64_neon_fcvtas;
break;
5443 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5444 Int = Intrinsic::aarch64_neon_fcvtms;
break;
5445 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5446 Int = Intrinsic::aarch64_neon_fcvtns;
break;
5447 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5448 Int = Intrinsic::aarch64_neon_fcvtps;
break;
5452 case NEON::BI__builtin_neon_vcaleh_f16:
5453 case NEON::BI__builtin_neon_vcalth_f16:
5454 case NEON::BI__builtin_neon_vcageh_f16:
5455 case NEON::BI__builtin_neon_vcagth_f16: {
5457 llvm::Type* FTy =
HalfTy;
5458 llvm::Type *Tys[2] = {InTy, FTy};
5459 switch (BuiltinID) {
5460 default: llvm_unreachable(
"missing builtin ID in switch!");
5461 case NEON::BI__builtin_neon_vcageh_f16:
5462 Int = Intrinsic::aarch64_neon_facge;
break;
5463 case NEON::BI__builtin_neon_vcagth_f16:
5464 Int = Intrinsic::aarch64_neon_facgt;
break;
5465 case NEON::BI__builtin_neon_vcaleh_f16:
5466 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]);
break;
5467 case NEON::BI__builtin_neon_vcalth_f16:
5468 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]);
break;
5473 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5474 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
5476 llvm::Type* FTy =
HalfTy;
5477 llvm::Type *Tys[2] = {InTy, FTy};
5478 switch (BuiltinID) {
5479 default: llvm_unreachable(
"missing builtin ID in switch!");
5480 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5481 Int = Intrinsic::aarch64_neon_vcvtfp2fxs;
break;
5482 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
5483 Int = Intrinsic::aarch64_neon_vcvtfp2fxu;
break;
5488 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5489 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
5490 llvm::Type* FTy =
HalfTy;
5492 llvm::Type *Tys[2] = {FTy, InTy};
5493 switch (BuiltinID) {
5494 default: llvm_unreachable(
"missing builtin ID in switch!");
5495 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5496 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
5497 Ops[0] =
Builder.CreateSExt(Ops[0], InTy,
"sext");
5499 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
5500 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
5501 Ops[0] =
Builder.CreateZExt(Ops[0], InTy);
5506 case NEON::BI__builtin_neon_vpaddd_s64: {
5509 auto *Ty = llvm::FixedVectorType::get(
Int64Ty, 2);
5511 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty,
"v2i64");
5512 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
5513 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
5514 Value *Op0 =
Builder.CreateExtractElement(Ops[0], Idx0,
"lane0");
5515 Value *Op1 =
Builder.CreateExtractElement(Ops[0], Idx1,
"lane1");
5517 return Builder.CreateAdd(Op0, Op1,
"vpaddd");
5519 case NEON::BI__builtin_neon_vpaddd_f64: {
5520 auto *Ty = llvm::FixedVectorType::get(
DoubleTy, 2);
5522 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty,
"v2f64");
5523 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
5524 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
5525 Value *Op0 =
Builder.CreateExtractElement(Ops[0], Idx0,
"lane0");
5526 Value *Op1 =
Builder.CreateExtractElement(Ops[0], Idx1,
"lane1");
5528 return Builder.CreateFAdd(Op0, Op1,
"vpaddd");
5530 case NEON::BI__builtin_neon_vpadds_f32: {
5531 auto *Ty = llvm::FixedVectorType::get(
FloatTy, 2);
5533 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty,
"v2f32");
5534 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
5535 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
5536 Value *Op0 =
Builder.CreateExtractElement(Ops[0], Idx0,
"lane0");
5537 Value *Op1 =
Builder.CreateExtractElement(Ops[0], Idx1,
"lane1");
5539 return Builder.CreateFAdd(Op0, Op1,
"vpaddd");
5541 case NEON::BI__builtin_neon_vceqzd_s64:
5544 ICmpInst::ICMP_EQ,
"vceqz");
5545 case NEON::BI__builtin_neon_vceqzd_f64:
5546 case NEON::BI__builtin_neon_vceqzs_f32:
5547 case NEON::BI__builtin_neon_vceqzh_f16:
5550 ICmpInst::FCMP_OEQ,
"vceqz");
5551 case NEON::BI__builtin_neon_vcgezd_s64:
5554 ICmpInst::ICMP_SGE,
"vcgez");
5555 case NEON::BI__builtin_neon_vcgezd_f64:
5556 case NEON::BI__builtin_neon_vcgezs_f32:
5557 case NEON::BI__builtin_neon_vcgezh_f16:
5560 ICmpInst::FCMP_OGE,
"vcgez");
5561 case NEON::BI__builtin_neon_vclezd_s64:
5564 ICmpInst::ICMP_SLE,
"vclez");
5565 case NEON::BI__builtin_neon_vclezd_f64:
5566 case NEON::BI__builtin_neon_vclezs_f32:
5567 case NEON::BI__builtin_neon_vclezh_f16:
5570 ICmpInst::FCMP_OLE,
"vclez");
5571 case NEON::BI__builtin_neon_vcgtzd_s64:
5574 ICmpInst::ICMP_SGT,
"vcgtz");
5575 case NEON::BI__builtin_neon_vcgtzd_f64:
5576 case NEON::BI__builtin_neon_vcgtzs_f32:
5577 case NEON::BI__builtin_neon_vcgtzh_f16:
5580 ICmpInst::FCMP_OGT,
"vcgtz");
5581 case NEON::BI__builtin_neon_vcltzd_s64:
5584 ICmpInst::ICMP_SLT,
"vcltz");
5586 case NEON::BI__builtin_neon_vcltzd_f64:
5587 case NEON::BI__builtin_neon_vcltzs_f32:
5588 case NEON::BI__builtin_neon_vcltzh_f16:
5591 ICmpInst::FCMP_OLT,
"vcltz");
5593 case NEON::BI__builtin_neon_vceqzd_u64: {
5596 ICmpInst::ICMP_EQ,
"vceqzd");
5598 case NEON::BI__builtin_neon_vceqd_f64:
5599 case NEON::BI__builtin_neon_vcled_f64:
5600 case NEON::BI__builtin_neon_vcltd_f64:
5601 case NEON::BI__builtin_neon_vcged_f64:
5602 case NEON::BI__builtin_neon_vcgtd_f64: {
5603 llvm::CmpInst::Predicate P;
5604 switch (BuiltinID) {
5605 default: llvm_unreachable(
"missing builtin ID in switch!");
5606 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ;
break;
5607 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE;
break;
5608 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT;
break;
5609 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE;
break;
5610 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT;
break;
5614 if (P == llvm::FCmpInst::FCMP_OEQ)
5615 Ops[0] =
Builder.CreateFCmp(P, Ops[0], Ops[1]);
5617 Ops[0] =
Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5620 case NEON::BI__builtin_neon_vceqs_f32:
5621 case NEON::BI__builtin_neon_vcles_f32:
5622 case NEON::BI__builtin_neon_vclts_f32:
5623 case NEON::BI__builtin_neon_vcges_f32:
5624 case NEON::BI__builtin_neon_vcgts_f32: {
5625 llvm::CmpInst::Predicate P;
5626 switch (BuiltinID) {
5627 default: llvm_unreachable(
"missing builtin ID in switch!");
5628 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ;
break;
5629 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE;
break;
5630 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT;
break;
5631 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE;
break;
5632 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT;
break;
5636 if (P == llvm::FCmpInst::FCMP_OEQ)
5637 Ops[0] =
Builder.CreateFCmp(P, Ops[0], Ops[1]);
5639 Ops[0] =
Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5642 case NEON::BI__builtin_neon_vceqh_f16:
5643 case NEON::BI__builtin_neon_vcleh_f16:
5644 case NEON::BI__builtin_neon_vclth_f16:
5645 case NEON::BI__builtin_neon_vcgeh_f16:
5646 case NEON::BI__builtin_neon_vcgth_f16: {
5647 llvm::CmpInst::Predicate P;
5648 switch (BuiltinID) {
5649 default: llvm_unreachable(
"missing builtin ID in switch!");
5650 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ;
break;
5651 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE;
break;
5652 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT;
break;
5653 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE;
break;
5654 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT;
break;
5658 if (P == llvm::FCmpInst::FCMP_OEQ)
5659 Ops[0] =
Builder.CreateFCmp(P, Ops[0], Ops[1]);
5661 Ops[0] =
Builder.CreateFCmpS(P, Ops[0], Ops[1]);
5664 case NEON::BI__builtin_neon_vceqd_s64:
5665 case NEON::BI__builtin_neon_vceqd_u64:
5666 case NEON::BI__builtin_neon_vcgtd_s64:
5667 case NEON::BI__builtin_neon_vcgtd_u64:
5668 case NEON::BI__builtin_neon_vcltd_s64:
5669 case NEON::BI__builtin_neon_vcltd_u64:
5670 case NEON::BI__builtin_neon_vcged_u64:
5671 case NEON::BI__builtin_neon_vcged_s64:
5672 case NEON::BI__builtin_neon_vcled_u64:
5673 case NEON::BI__builtin_neon_vcled_s64: {
5674 llvm::CmpInst::Predicate P;
5675 switch (BuiltinID) {
5676 default: llvm_unreachable(
"missing builtin ID in switch!");
5677 case NEON::BI__builtin_neon_vceqd_s64:
5678 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;
break;
5679 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;
break;
5680 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;
break;
5681 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;
break;
5682 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;
break;
5683 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;
break;
5684 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;
break;
5685 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;
break;
5686 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;
break;
5690 Ops[0] =
Builder.CreateICmp(P, Ops[0], Ops[1]);
5693 case NEON::BI__builtin_neon_vnegd_s64:
5694 return Builder.CreateNeg(Ops[0],
"vnegd");
5695 case NEON::BI__builtin_neon_vnegh_f16:
5696 return Builder.CreateFNeg(Ops[0],
"vnegh");
5697 case NEON::BI__builtin_neon_vtstd_s64:
5698 case NEON::BI__builtin_neon_vtstd_u64: {
5701 Ops[0] =
Builder.CreateAnd(Ops[0], Ops[1]);
5702 Ops[0] =
Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5703 llvm::Constant::getNullValue(
Int64Ty));
5706 case NEON::BI__builtin_neon_vset_lane_i8:
5707 case NEON::BI__builtin_neon_vset_lane_i16:
5708 case NEON::BI__builtin_neon_vset_lane_i32:
5709 case NEON::BI__builtin_neon_vset_lane_i64:
5710 case NEON::BI__builtin_neon_vset_lane_bf16:
5711 case NEON::BI__builtin_neon_vset_lane_f32:
5712 case NEON::BI__builtin_neon_vsetq_lane_i8:
5713 case NEON::BI__builtin_neon_vsetq_lane_i16:
5714 case NEON::BI__builtin_neon_vsetq_lane_i32:
5715 case NEON::BI__builtin_neon_vsetq_lane_i64:
5716 case NEON::BI__builtin_neon_vsetq_lane_bf16:
5717 case NEON::BI__builtin_neon_vsetq_lane_f32:
5718 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
5719 case NEON::BI__builtin_neon_vset_lane_f64:
5722 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(
DoubleTy, 1));
5723 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
5724 case NEON::BI__builtin_neon_vset_lane_mf8:
5725 case NEON::BI__builtin_neon_vsetq_lane_mf8:
5729 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
5730 case NEON::BI__builtin_neon_vsetq_lane_f64:
5733 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(
DoubleTy, 2));
5734 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
5736 case NEON::BI__builtin_neon_vget_lane_i8:
5737 case NEON::BI__builtin_neon_vdupb_lane_i8:
5738 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5739 case NEON::BI__builtin_neon_vgetq_lane_i8:
5740 case NEON::BI__builtin_neon_vdupb_laneq_i8:
5741 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5742 case NEON::BI__builtin_neon_vget_lane_mf8:
5743 case NEON::BI__builtin_neon_vdupb_lane_mf8:
5744 case NEON::BI__builtin_neon_vgetq_lane_mf8:
5745 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
5746 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5747 case NEON::BI__builtin_neon_vget_lane_i16:
5748 case NEON::BI__builtin_neon_vduph_lane_i16:
5749 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5750 case NEON::BI__builtin_neon_vgetq_lane_i16:
5751 case NEON::BI__builtin_neon_vduph_laneq_i16:
5752 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5753 case NEON::BI__builtin_neon_vget_lane_i32:
5754 case NEON::BI__builtin_neon_vdups_lane_i32:
5755 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5756 case NEON::BI__builtin_neon_vdups_lane_f32:
5757 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vdups_lane");
5758 case NEON::BI__builtin_neon_vgetq_lane_i32:
5759 case NEON::BI__builtin_neon_vdups_laneq_i32:
5760 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5761 case NEON::BI__builtin_neon_vget_lane_i64:
5762 case NEON::BI__builtin_neon_vdupd_lane_i64:
5763 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5764 case NEON::BI__builtin_neon_vdupd_lane_f64:
5765 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vdupd_lane");
5766 case NEON::BI__builtin_neon_vgetq_lane_i64:
5767 case NEON::BI__builtin_neon_vdupd_laneq_i64:
5768 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5769 case NEON::BI__builtin_neon_vget_lane_f32:
5770 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5771 case NEON::BI__builtin_neon_vget_lane_f64:
5772 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5773 case NEON::BI__builtin_neon_vgetq_lane_f32:
5774 case NEON::BI__builtin_neon_vdups_laneq_f32:
5775 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5776 case NEON::BI__builtin_neon_vgetq_lane_f64:
5777 case NEON::BI__builtin_neon_vdupd_laneq_f64:
5778 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5779 case NEON::BI__builtin_neon_vaddh_f16:
5780 return Builder.CreateFAdd(Ops[0], Ops[1],
"vaddh");
5781 case NEON::BI__builtin_neon_vsubh_f16:
5782 return Builder.CreateFSub(Ops[0], Ops[1],
"vsubh");
5783 case NEON::BI__builtin_neon_vmulh_f16:
5784 return Builder.CreateFMul(Ops[0], Ops[1],
"vmulh");
5785 case NEON::BI__builtin_neon_vdivh_f16:
5786 return Builder.CreateFDiv(Ops[0], Ops[1],
"vdivh");
5787 case NEON::BI__builtin_neon_vfmah_f16:
5790 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
HalfTy,
5791 {Ops[1], Ops[2], Ops[0]});
5792 case NEON::BI__builtin_neon_vfmsh_f16: {
5797 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
HalfTy,
5798 {Neg, Ops[2], Ops[0]});
5800 case NEON::BI__builtin_neon_vaddd_s64:
5801 case NEON::BI__builtin_neon_vaddd_u64:
5802 return Builder.CreateAdd(Ops[0], Ops[1],
"vaddd");
5803 case NEON::BI__builtin_neon_vsubd_s64:
5804 case NEON::BI__builtin_neon_vsubd_u64:
5805 return Builder.CreateSub(Ops[0], Ops[1],
"vsubd");
5806 case NEON::BI__builtin_neon_vqdmlalh_s16:
5807 case NEON::BI__builtin_neon_vqdmlslh_s16: {
5811 auto *VTy = llvm::FixedVectorType::get(
Int32Ty, 4);
5812 Ops[1] =
EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5813 ProductOps,
"vqdmlXl");
5815 Ops[1] =
Builder.CreateExtractElement(Ops[1], CI,
"lane0");
5817 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5818 ? Intrinsic::aarch64_neon_sqadd
5819 : Intrinsic::aarch64_neon_sqsub;
5824 case NEON::BI__builtin_neon_vqshlud_n_s64: {
5829 case NEON::BI__builtin_neon_vqshld_n_u64:
5830 case NEON::BI__builtin_neon_vqshld_n_s64: {
5831 Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5832 ? Intrinsic::aarch64_neon_uqshl
5833 : Intrinsic::aarch64_neon_sqshl;
5837 case NEON::BI__builtin_neon_vrshrd_n_u64:
5838 case NEON::BI__builtin_neon_vrshrd_n_s64: {
5839 Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5840 ? Intrinsic::aarch64_neon_urshl
5841 : Intrinsic::aarch64_neon_srshl;
5843 Ops[1] = ConstantInt::get(
Int64Ty, -SV);
5846 case NEON::BI__builtin_neon_vrsrad_n_u64:
5847 case NEON::BI__builtin_neon_vrsrad_n_s64: {
5848 Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5849 ? Intrinsic::aarch64_neon_urshl
5850 : Intrinsic::aarch64_neon_srshl;
5852 Ops[2] =
Builder.CreateNeg(Ops[2]);
5854 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5857 case NEON::BI__builtin_neon_vshld_n_s64:
5858 case NEON::BI__builtin_neon_vshld_n_u64: {
5861 Ops[0], ConstantInt::get(
Int64Ty, Amt->getZExtValue()),
"shld_n");
5863 case NEON::BI__builtin_neon_vshrd_n_s64: {
5866 Ops[0], ConstantInt::get(
Int64Ty, std::min(
static_cast<uint64_t
>(63),
5867 Amt->getZExtValue())),
5870 case NEON::BI__builtin_neon_vshrd_n_u64: {
5872 uint64_t ShiftAmt = Amt->getZExtValue();
5875 return ConstantInt::get(
Int64Ty, 0);
5876 return Builder.CreateLShr(Ops[0], ConstantInt::get(
Int64Ty, ShiftAmt),
5879 case NEON::BI__builtin_neon_vsrad_n_s64: {
5882 Ops[1], ConstantInt::get(
Int64Ty, std::min(
static_cast<uint64_t
>(63),
5883 Amt->getZExtValue())),
5885 return Builder.CreateAdd(Ops[0], Ops[1]);
5887 case NEON::BI__builtin_neon_vsrad_n_u64: {
5889 uint64_t ShiftAmt = Amt->getZExtValue();
5894 Ops[1] =
Builder.CreateLShr(Ops[1], ConstantInt::get(
Int64Ty, ShiftAmt),
5896 return Builder.CreateAdd(Ops[0], Ops[1]);
5898 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5899 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5900 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5901 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5902 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"lane");
5906 auto *VTy = llvm::FixedVectorType::get(
Int32Ty, 4);
5907 Ops[1] =
EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5908 ProductOps,
"vqdmlXl");
5910 Ops[1] =
Builder.CreateExtractElement(Ops[1], CI,
"lane0");
5915 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5916 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5917 ? Intrinsic::aarch64_neon_sqadd
5918 : Intrinsic::aarch64_neon_sqsub;
5921 case NEON::BI__builtin_neon_vqdmlals_s32:
5922 case NEON::BI__builtin_neon_vqdmlsls_s32: {
5924 ProductOps.push_back(Ops[1]);
5925 ProductOps.push_back(Ops[2]);
5927 EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5928 ProductOps,
"vqdmlXl");
5930 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5931 ? Intrinsic::aarch64_neon_sqadd
5932 : Intrinsic::aarch64_neon_sqsub;
5937 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5938 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5939 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5940 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5941 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"lane");
5943 ProductOps.push_back(Ops[1]);
5944 ProductOps.push_back(Ops[2]);
5946 EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5947 ProductOps,
"vqdmlXl");
5952 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
5953 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
5954 ? Intrinsic::aarch64_neon_sqadd
5955 : Intrinsic::aarch64_neon_sqsub;
5958 case NEON::BI__builtin_neon_vget_lane_bf16:
5959 case NEON::BI__builtin_neon_vduph_lane_bf16:
5960 case NEON::BI__builtin_neon_vduph_lane_f16: {
5961 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
5963 case NEON::BI__builtin_neon_vgetq_lane_bf16:
5964 case NEON::BI__builtin_neon_vduph_laneq_bf16:
5965 case NEON::BI__builtin_neon_vduph_laneq_f16: {
5966 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
5968 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
5969 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
5970 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
5971 return Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
5973 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
5975 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
5976 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
5977 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
5978 llvm::Value *Trunc =
5979 Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
5980 return Builder.CreateShuffleVector(
5981 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
5983 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
5985 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
5987 std::iota(LoMask.begin(), LoMask.end(), 0);
5988 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
5989 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
5990 llvm::Type *V8BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 8);
5991 llvm::Value *Inactive =
Builder.CreateShuffleVector(
5992 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
5993 llvm::Value *Trunc =
5994 Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
5995 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
5997 case NEON::BI__builtin_neon_vcvt_f16_f32: {
5998 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
5999 llvm::Type *V4F16 = FixedVectorType::get(
Builder.getHalfTy(), 4);
6000 return Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[0], V4F32), V4F16);
6002 case NEON::BI__builtin_neon_vcvt_f32_f16: {
6003 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6004 llvm::Type *V4F16 = FixedVectorType::get(
Builder.getHalfTy(), 4);
6005 return Builder.CreateFPExt(
Builder.CreateBitCast(Ops[0], V4F16), V4F32);
6008 case clang::AArch64::BI_InterlockedAdd:
6009 case clang::AArch64::BI_InterlockedAdd_acq:
6010 case clang::AArch64::BI_InterlockedAdd_rel:
6011 case clang::AArch64::BI_InterlockedAdd_nf:
6012 case clang::AArch64::BI_InterlockedAdd64:
6013 case clang::AArch64::BI_InterlockedAdd64_acq:
6014 case clang::AArch64::BI_InterlockedAdd64_rel:
6015 case clang::AArch64::BI_InterlockedAdd64_nf: {
6017 Value *Val = Ops[1];
6018 llvm::AtomicOrdering Ordering;
6019 switch (BuiltinID) {
6020 case clang::AArch64::BI_InterlockedAdd:
6021 case clang::AArch64::BI_InterlockedAdd64:
6022 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6024 case clang::AArch64::BI_InterlockedAdd_acq:
6025 case clang::AArch64::BI_InterlockedAdd64_acq:
6026 Ordering = llvm::AtomicOrdering::Acquire;
6028 case clang::AArch64::BI_InterlockedAdd_rel:
6029 case clang::AArch64::BI_InterlockedAdd64_rel:
6030 Ordering = llvm::AtomicOrdering::Release;
6032 case clang::AArch64::BI_InterlockedAdd_nf:
6033 case clang::AArch64::BI_InterlockedAdd64_nf:
6034 Ordering = llvm::AtomicOrdering::Monotonic;
6037 llvm_unreachable(
"missing builtin ID in switch!");
6039 AtomicRMWInst *RMWI =
6040 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, Ordering);
6041 return Builder.CreateAdd(RMWI, Val);
6046 llvm::Type *Ty = VTy;
6050 bool ExtractLow =
false;
6051 bool ExtendLaneArg =
false;
6052 switch (BuiltinID) {
6053 default:
return nullptr;
6054 case NEON::BI__builtin_neon_vbsl_v:
6055 case NEON::BI__builtin_neon_vbslq_v: {
6056 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6057 Ops[0] =
Builder.CreateBitCast(Ops[0], BitTy,
"vbsl");
6058 Ops[1] =
Builder.CreateBitCast(Ops[1], BitTy,
"vbsl");
6059 Ops[2] =
Builder.CreateBitCast(Ops[2], BitTy,
"vbsl");
6061 Ops[1] =
Builder.CreateAnd(Ops[0], Ops[1],
"vbsl");
6062 Ops[2] =
Builder.CreateAnd(
Builder.CreateNot(Ops[0]), Ops[2],
"vbsl");
6063 Ops[0] =
Builder.CreateOr(Ops[1], Ops[2],
"vbsl");
6064 return Builder.CreateBitCast(Ops[0], Ty);
6066 case NEON::BI__builtin_neon_vfma_lane_v:
6067 case NEON::BI__builtin_neon_vfmaq_lane_v: {
6070 Value *Addend = Ops[0];
6071 Value *Multiplicand = Ops[1];
6072 Value *LaneSource = Ops[2];
6073 Ops[0] = Multiplicand;
6074 Ops[1] = LaneSource;
6078 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6079 ? llvm::FixedVectorType::get(VTy->getElementType(),
6080 VTy->getNumElements() / 2)
6083 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6084 Ops[1] =
Builder.CreateBitCast(Ops[1], SourceTy);
6085 Ops[1] =
Builder.CreateShuffleVector(Ops[1], Ops[1], SV,
"lane");
6088 Int =
Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6092 case NEON::BI__builtin_neon_vfma_laneq_v: {
6095 if (VTy && VTy->getElementType() ==
DoubleTy) {
6098 llvm::FixedVectorType *VTy =
6100 Ops[2] =
Builder.CreateBitCast(Ops[2], VTy);
6101 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"extract");
6104 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6105 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6108 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6109 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6111 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6112 VTy->getNumElements() * 2);
6113 Ops[2] =
Builder.CreateBitCast(Ops[2], STy);
6114 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6116 Ops[2] =
Builder.CreateShuffleVector(Ops[2], Ops[2], SV,
"lane");
6119 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6120 {Ops[2], Ops[1], Ops[0]});
6122 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6123 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6124 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6126 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6129 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6130 {Ops[2], Ops[1], Ops[0]});
6132 case NEON::BI__builtin_neon_vfmah_lane_f16:
6133 case NEON::BI__builtin_neon_vfmas_lane_f32:
6134 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6135 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6136 case NEON::BI__builtin_neon_vfmad_lane_f64:
6137 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6139 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"extract");
6141 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6142 {Ops[1], Ops[2], Ops[0]});
6144 case NEON::BI__builtin_neon_vmull_v:
6146 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6147 if (
Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6149 case NEON::BI__builtin_neon_vmax_v:
6150 case NEON::BI__builtin_neon_vmaxq_v:
6152 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6153 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6155 case NEON::BI__builtin_neon_vmaxh_f16: {
6156 Int = Intrinsic::aarch64_neon_fmax;
6159 case NEON::BI__builtin_neon_vmin_v:
6160 case NEON::BI__builtin_neon_vminq_v:
6162 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6163 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6165 case NEON::BI__builtin_neon_vminh_f16: {
6166 Int = Intrinsic::aarch64_neon_fmin;
6169 case NEON::BI__builtin_neon_vabd_v:
6170 case NEON::BI__builtin_neon_vabdq_v:
6172 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6173 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6175 case NEON::BI__builtin_neon_vpadal_v:
6176 case NEON::BI__builtin_neon_vpadalq_v: {
6177 unsigned ArgElts = VTy->getNumElements();
6179 unsigned BitWidth = EltTy->getBitWidth();
6180 auto *ArgTy = llvm::FixedVectorType::get(
6181 llvm::IntegerType::get(
getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6182 llvm::Type* Tys[2] = { VTy, ArgTy };
6183 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6185 TmpOps.push_back(Ops[1]);
6188 llvm::Value *addend =
Builder.CreateBitCast(Ops[0], tmp->getType());
6189 return Builder.CreateAdd(tmp, addend);
6191 case NEON::BI__builtin_neon_vpmin_v:
6192 case NEON::BI__builtin_neon_vpminq_v:
6194 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6195 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6197 case NEON::BI__builtin_neon_vpmax_v:
6198 case NEON::BI__builtin_neon_vpmaxq_v:
6200 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6201 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6203 case NEON::BI__builtin_neon_vminnm_v:
6204 case NEON::BI__builtin_neon_vminnmq_v:
6205 Int = Intrinsic::aarch64_neon_fminnm;
6207 case NEON::BI__builtin_neon_vminnmh_f16:
6208 Int = Intrinsic::aarch64_neon_fminnm;
6210 case NEON::BI__builtin_neon_vmaxnm_v:
6211 case NEON::BI__builtin_neon_vmaxnmq_v:
6212 Int = Intrinsic::aarch64_neon_fmaxnm;
6214 case NEON::BI__builtin_neon_vmaxnmh_f16:
6215 Int = Intrinsic::aarch64_neon_fmaxnm;
6217 case NEON::BI__builtin_neon_vrecpss_f32: {
6221 case NEON::BI__builtin_neon_vrecpsd_f64:
6224 case NEON::BI__builtin_neon_vrecpsh_f16:
6227 case NEON::BI__builtin_neon_vqshrun_n_v:
6228 Int = Intrinsic::aarch64_neon_sqshrun;
6230 case NEON::BI__builtin_neon_vqrshrun_n_v:
6231 Int = Intrinsic::aarch64_neon_sqrshrun;
6233 case NEON::BI__builtin_neon_vqshrn_n_v:
6234 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6236 case NEON::BI__builtin_neon_vrshrn_n_v:
6237 Int = Intrinsic::aarch64_neon_rshrn;
6239 case NEON::BI__builtin_neon_vqrshrn_n_v:
6240 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6242 case NEON::BI__builtin_neon_vrndah_f16: {
6243 Int =
Builder.getIsFPConstrained()
6244 ? Intrinsic::experimental_constrained_round
6248 case NEON::BI__builtin_neon_vrnda_v:
6249 case NEON::BI__builtin_neon_vrndaq_v: {
6250 Int =
Builder.getIsFPConstrained()
6251 ? Intrinsic::experimental_constrained_round
6255 case NEON::BI__builtin_neon_vrndih_f16: {
6256 Int =
Builder.getIsFPConstrained()
6257 ? Intrinsic::experimental_constrained_nearbyint
6258 : Intrinsic::nearbyint;
6261 case NEON::BI__builtin_neon_vrndmh_f16: {
6262 Int =
Builder.getIsFPConstrained()
6263 ? Intrinsic::experimental_constrained_floor
6267 case NEON::BI__builtin_neon_vrndm_v:
6268 case NEON::BI__builtin_neon_vrndmq_v: {
6269 Int =
Builder.getIsFPConstrained()
6270 ? Intrinsic::experimental_constrained_floor
6274 case NEON::BI__builtin_neon_vrndnh_f16: {
6275 Int =
Builder.getIsFPConstrained()
6276 ? Intrinsic::experimental_constrained_roundeven
6277 : Intrinsic::roundeven;
6280 case NEON::BI__builtin_neon_vrndn_v:
6281 case NEON::BI__builtin_neon_vrndnq_v: {
6282 Int =
Builder.getIsFPConstrained()
6283 ? Intrinsic::experimental_constrained_roundeven
6284 : Intrinsic::roundeven;
6287 case NEON::BI__builtin_neon_vrndns_f32: {
6288 Int =
Builder.getIsFPConstrained()
6289 ? Intrinsic::experimental_constrained_roundeven
6290 : Intrinsic::roundeven;
6293 case NEON::BI__builtin_neon_vrndph_f16: {
6294 Int =
Builder.getIsFPConstrained()
6295 ? Intrinsic::experimental_constrained_ceil
6299 case NEON::BI__builtin_neon_vrndp_v:
6300 case NEON::BI__builtin_neon_vrndpq_v: {
6301 Int =
Builder.getIsFPConstrained()
6302 ? Intrinsic::experimental_constrained_ceil
6306 case NEON::BI__builtin_neon_vrndxh_f16: {
6307 Int =
Builder.getIsFPConstrained()
6308 ? Intrinsic::experimental_constrained_rint
6312 case NEON::BI__builtin_neon_vrndx_v:
6313 case NEON::BI__builtin_neon_vrndxq_v: {
6314 Int =
Builder.getIsFPConstrained()
6315 ? Intrinsic::experimental_constrained_rint
6319 case NEON::BI__builtin_neon_vrndh_f16: {
6320 Int =
Builder.getIsFPConstrained()
6321 ? Intrinsic::experimental_constrained_trunc
6325 case NEON::BI__builtin_neon_vrnd32x_f32:
6326 case NEON::BI__builtin_neon_vrnd32xq_f32:
6327 case NEON::BI__builtin_neon_vrnd32x_f64:
6328 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6329 Int = Intrinsic::aarch64_neon_frint32x;
6332 case NEON::BI__builtin_neon_vrnd32z_f32:
6333 case NEON::BI__builtin_neon_vrnd32zq_f32:
6334 case NEON::BI__builtin_neon_vrnd32z_f64:
6335 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6336 Int = Intrinsic::aarch64_neon_frint32z;
6339 case NEON::BI__builtin_neon_vrnd64x_f32:
6340 case NEON::BI__builtin_neon_vrnd64xq_f32:
6341 case NEON::BI__builtin_neon_vrnd64x_f64:
6342 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6343 Int = Intrinsic::aarch64_neon_frint64x;
6346 case NEON::BI__builtin_neon_vrnd64z_f32:
6347 case NEON::BI__builtin_neon_vrnd64zq_f32:
6348 case NEON::BI__builtin_neon_vrnd64z_f64:
6349 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6350 Int = Intrinsic::aarch64_neon_frint64z;
6353 case NEON::BI__builtin_neon_vrnd_v:
6354 case NEON::BI__builtin_neon_vrndq_v: {
6355 Int =
Builder.getIsFPConstrained()
6356 ? Intrinsic::experimental_constrained_trunc
6360 case NEON::BI__builtin_neon_vcvt_f64_v:
6361 case NEON::BI__builtin_neon_vcvtq_f64_v:
6362 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6364 return usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
6365 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
6366 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6368 "unexpected vcvt_f64_f32 builtin");
6372 return Builder.CreateFPExt(Ops[0], Ty,
"vcvt");
6374 case NEON::BI__builtin_neon_vcvt_f32_f64: {
6376 "unexpected vcvt_f32_f64 builtin");
6380 return Builder.CreateFPTrunc(Ops[0], Ty,
"vcvt");
6382 case NEON::BI__builtin_neon_vcvta_s16_f16:
6383 case NEON::BI__builtin_neon_vcvta_u16_f16:
6384 case NEON::BI__builtin_neon_vcvta_s32_v:
6385 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
6386 case NEON::BI__builtin_neon_vcvtaq_s32_v:
6387 case NEON::BI__builtin_neon_vcvta_u32_v:
6388 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
6389 case NEON::BI__builtin_neon_vcvtaq_u32_v:
6390 case NEON::BI__builtin_neon_vcvta_s64_v:
6391 case NEON::BI__builtin_neon_vcvtaq_s64_v:
6392 case NEON::BI__builtin_neon_vcvta_u64_v:
6393 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6394 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6398 case NEON::BI__builtin_neon_vcvtm_s16_f16:
6399 case NEON::BI__builtin_neon_vcvtm_s32_v:
6400 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
6401 case NEON::BI__builtin_neon_vcvtmq_s32_v:
6402 case NEON::BI__builtin_neon_vcvtm_u16_f16:
6403 case NEON::BI__builtin_neon_vcvtm_u32_v:
6404 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
6405 case NEON::BI__builtin_neon_vcvtmq_u32_v:
6406 case NEON::BI__builtin_neon_vcvtm_s64_v:
6407 case NEON::BI__builtin_neon_vcvtmq_s64_v:
6408 case NEON::BI__builtin_neon_vcvtm_u64_v:
6409 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6410 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6414 case NEON::BI__builtin_neon_vcvtn_s16_f16:
6415 case NEON::BI__builtin_neon_vcvtn_s32_v:
6416 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
6417 case NEON::BI__builtin_neon_vcvtnq_s32_v:
6418 case NEON::BI__builtin_neon_vcvtn_u16_f16:
6419 case NEON::BI__builtin_neon_vcvtn_u32_v:
6420 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
6421 case NEON::BI__builtin_neon_vcvtnq_u32_v:
6422 case NEON::BI__builtin_neon_vcvtn_s64_v:
6423 case NEON::BI__builtin_neon_vcvtnq_s64_v:
6424 case NEON::BI__builtin_neon_vcvtn_u64_v:
6425 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6426 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6430 case NEON::BI__builtin_neon_vcvtp_s16_f16:
6431 case NEON::BI__builtin_neon_vcvtp_s32_v:
6432 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
6433 case NEON::BI__builtin_neon_vcvtpq_s32_v:
6434 case NEON::BI__builtin_neon_vcvtp_u16_f16:
6435 case NEON::BI__builtin_neon_vcvtp_u32_v:
6436 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
6437 case NEON::BI__builtin_neon_vcvtpq_u32_v:
6438 case NEON::BI__builtin_neon_vcvtp_s64_v:
6439 case NEON::BI__builtin_neon_vcvtpq_s64_v:
6440 case NEON::BI__builtin_neon_vcvtp_u64_v:
6441 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6442 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6446 case NEON::BI__builtin_neon_vmulx_v:
6447 case NEON::BI__builtin_neon_vmulxq_v: {
6448 Int = Intrinsic::aarch64_neon_fmulx;
6451 case NEON::BI__builtin_neon_vmulxh_lane_f16:
6452 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
6455 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2],
"extract");
6457 Int = Intrinsic::aarch64_neon_fmulx;
6460 case NEON::BI__builtin_neon_vmul_lane_v:
6461 case NEON::BI__builtin_neon_vmul_laneq_v: {
6464 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6467 llvm::FixedVectorType *VTy =
6469 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
6470 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2],
"extract");
6474 case NEON::BI__builtin_neon_vpmaxnm_v:
6475 case NEON::BI__builtin_neon_vpmaxnmq_v: {
6476 Int = Intrinsic::aarch64_neon_fmaxnmp;
6479 case NEON::BI__builtin_neon_vpminnm_v:
6480 case NEON::BI__builtin_neon_vpminnmq_v: {
6481 Int = Intrinsic::aarch64_neon_fminnmp;
6484 case NEON::BI__builtin_neon_vsqrth_f16: {
6485 Int =
Builder.getIsFPConstrained()
6486 ? Intrinsic::experimental_constrained_sqrt
6490 case NEON::BI__builtin_neon_vsqrt_v:
6491 case NEON::BI__builtin_neon_vsqrtq_v: {
6492 Int =
Builder.getIsFPConstrained()
6493 ? Intrinsic::experimental_constrained_sqrt
6495 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6498 case NEON::BI__builtin_neon_vrbit_v:
6499 case NEON::BI__builtin_neon_vrbitq_v: {
6500 Int = Intrinsic::bitreverse;
6503 case NEON::BI__builtin_neon_vmaxv_f16: {
6504 Int = Intrinsic::aarch64_neon_fmaxv;
6506 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
6507 llvm::Type *Tys[2] = {Ty, VTy};
6510 case NEON::BI__builtin_neon_vmaxvq_f16: {
6511 Int = Intrinsic::aarch64_neon_fmaxv;
6513 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
6514 llvm::Type *Tys[2] = {Ty, VTy};
6517 case NEON::BI__builtin_neon_vminv_f16: {
6518 Int = Intrinsic::aarch64_neon_fminv;
6520 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
6521 llvm::Type *Tys[2] = {Ty, VTy};
6524 case NEON::BI__builtin_neon_vminvq_f16: {
6525 Int = Intrinsic::aarch64_neon_fminv;
6527 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
6528 llvm::Type *Tys[2] = {Ty, VTy};
6531 case NEON::BI__builtin_neon_vmaxnmv_f16: {
6532 Int = Intrinsic::aarch64_neon_fmaxnmv;
6534 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
6535 llvm::Type *Tys[2] = {Ty, VTy};
6538 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
6539 Int = Intrinsic::aarch64_neon_fmaxnmv;
6541 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
6542 llvm::Type *Tys[2] = {Ty, VTy};
6545 case NEON::BI__builtin_neon_vminnmv_f16: {
6546 Int = Intrinsic::aarch64_neon_fminnmv;
6548 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
6549 llvm::Type *Tys[2] = {Ty, VTy};
6552 case NEON::BI__builtin_neon_vminnmvq_f16: {
6553 Int = Intrinsic::aarch64_neon_fminnmv;
6555 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
6556 llvm::Type *Tys[2] = {Ty, VTy};
6559 case NEON::BI__builtin_neon_vmul_n_f64: {
6562 return Builder.CreateFMul(Ops[0], RHS);
6564 case NEON::BI__builtin_neon_vaddlv_u8:
6565 case NEON::BI__builtin_neon_vaddlvq_u8:
6566 case NEON::BI__builtin_neon_vaddlv_u16:
6567 case NEON::BI__builtin_neon_vaddlvq_u16: {
6568 Int = Intrinsic::aarch64_neon_uaddlv;
6571 llvm::Type *Tys[2] = {Ty, VTy};
6573 if (VTy->getElementType()->getPrimitiveSizeInBits() == 8)
6577 case NEON::BI__builtin_neon_vaddlv_s8:
6578 case NEON::BI__builtin_neon_vaddlvq_s8:
6579 case NEON::BI__builtin_neon_vaddlv_s16:
6580 case NEON::BI__builtin_neon_vaddlvq_s16: {
6581 Int = Intrinsic::aarch64_neon_saddlv;
6584 llvm::Type *Tys[2] = {Ty, VTy};
6586 if (VTy->getElementType()->getPrimitiveSizeInBits() == 8)
6590 case NEON::BI__builtin_neon_vsri_n_v:
6591 case NEON::BI__builtin_neon_vsriq_n_v: {
6592 Int = Intrinsic::aarch64_neon_vsri;
6593 llvm::Function *Intrin =
CGM.getIntrinsic(Int, Ty);
6596 case NEON::BI__builtin_neon_vsli_n_v:
6597 case NEON::BI__builtin_neon_vsliq_n_v: {
6598 Int = Intrinsic::aarch64_neon_vsli;
6599 llvm::Function *Intrin =
CGM.getIntrinsic(Int, Ty);
6602 case NEON::BI__builtin_neon_vsra_n_v:
6603 case NEON::BI__builtin_neon_vsraq_n_v:
6604 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6606 return Builder.CreateAdd(Ops[0], Ops[1]);
6607 case NEON::BI__builtin_neon_vrsra_n_v:
6608 case NEON::BI__builtin_neon_vrsraq_n_v: {
6609 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6611 TmpOps.push_back(Ops[1]);
6612 TmpOps.push_back(Ops[2]);
6614 llvm::Value *tmp =
EmitNeonCall(F, TmpOps,
"vrshr_n", 1,
true);
6615 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
6616 return Builder.CreateAdd(Ops[0], tmp);
6618 case NEON::BI__builtin_neon_vld1_v:
6619 case NEON::BI__builtin_neon_vld1q_v: {
6622 case NEON::BI__builtin_neon_vst1_v:
6623 case NEON::BI__builtin_neon_vst1q_v:
6624 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
6626 case NEON::BI__builtin_neon_vld1_lane_v:
6627 case NEON::BI__builtin_neon_vld1q_lane_v: {
6628 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6629 Ops[0] =
Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
6631 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vld1_lane");
6633 case NEON::BI__builtin_neon_vldap1_lane_s64:
6634 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
6635 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6636 llvm::LoadInst *LI =
Builder.CreateAlignedLoad(
6638 LI->setAtomic(llvm::AtomicOrdering::Acquire);
6640 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vldap1_lane");
6642 case NEON::BI__builtin_neon_vld1_dup_v:
6643 case NEON::BI__builtin_neon_vld1q_dup_v: {
6644 Value *
V = PoisonValue::get(Ty);
6645 Ops[0] =
Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
6647 llvm::Constant *CI = ConstantInt::get(
Int32Ty, 0);
6648 Ops[0] =
Builder.CreateInsertElement(
V, Ops[0], CI);
6651 case NEON::BI__builtin_neon_vst1_lane_v:
6652 case NEON::BI__builtin_neon_vst1q_lane_v:
6653 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6654 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
6656 case NEON::BI__builtin_neon_vstl1_lane_s64:
6657 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
6658 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6659 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
6660 llvm::StoreInst *SI =
6662 SI->setAtomic(llvm::AtomicOrdering::Release);
6665 case NEON::BI__builtin_neon_vld2_v:
6666 case NEON::BI__builtin_neon_vld2q_v: {
6668 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6669 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld2");
6670 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6672 case NEON::BI__builtin_neon_vld3_v:
6673 case NEON::BI__builtin_neon_vld3q_v: {
6675 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6676 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld3");
6677 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6679 case NEON::BI__builtin_neon_vld4_v:
6680 case NEON::BI__builtin_neon_vld4q_v: {
6682 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6683 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld4");
6684 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6686 case NEON::BI__builtin_neon_vld2_dup_v:
6687 case NEON::BI__builtin_neon_vld2q_dup_v: {
6689 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6690 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld2");
6691 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6693 case NEON::BI__builtin_neon_vld3_dup_v:
6694 case NEON::BI__builtin_neon_vld3q_dup_v: {
6696 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6697 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld3");
6698 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6700 case NEON::BI__builtin_neon_vld4_dup_v:
6701 case NEON::BI__builtin_neon_vld4q_dup_v: {
6703 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6704 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld4");
6705 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6707 case NEON::BI__builtin_neon_vld2_lane_v:
6708 case NEON::BI__builtin_neon_vld2q_lane_v: {
6709 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6710 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6711 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6712 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6713 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6716 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6718 case NEON::BI__builtin_neon_vld3_lane_v:
6719 case NEON::BI__builtin_neon_vld3q_lane_v: {
6720 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6721 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6722 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6723 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6724 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6725 Ops[3] =
Builder.CreateBitCast(Ops[3], Ty);
6728 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6730 case NEON::BI__builtin_neon_vld4_lane_v:
6731 case NEON::BI__builtin_neon_vld4q_lane_v: {
6732 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6733 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6734 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
6735 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6736 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6737 Ops[3] =
Builder.CreateBitCast(Ops[3], Ty);
6738 Ops[4] =
Builder.CreateBitCast(Ops[4], Ty);
6741 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6743 case NEON::BI__builtin_neon_vst2_v:
6744 case NEON::BI__builtin_neon_vst2q_v: {
6745 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6746 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6747 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6750 case NEON::BI__builtin_neon_vst2_lane_v:
6751 case NEON::BI__builtin_neon_vst2q_lane_v: {
6752 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6754 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6755 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6758 case NEON::BI__builtin_neon_vst3_v:
6759 case NEON::BI__builtin_neon_vst3q_v: {
6760 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6761 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6762 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6765 case NEON::BI__builtin_neon_vst3_lane_v:
6766 case NEON::BI__builtin_neon_vst3q_lane_v: {
6767 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6769 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6770 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6773 case NEON::BI__builtin_neon_vst4_v:
6774 case NEON::BI__builtin_neon_vst4q_v: {
6775 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6776 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6777 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6780 case NEON::BI__builtin_neon_vst4_lane_v:
6781 case NEON::BI__builtin_neon_vst4q_lane_v: {
6782 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6784 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6785 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6788 case NEON::BI__builtin_neon_vtrn_v:
6789 case NEON::BI__builtin_neon_vtrnq_v: {
6790 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6791 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6792 Value *SV =
nullptr;
6794 for (
unsigned vi = 0; vi != 2; ++vi) {
6796 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6797 Indices.push_back(i+vi);
6798 Indices.push_back(i+e+vi);
6801 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vtrn");
6806 case NEON::BI__builtin_neon_vuzp_v:
6807 case NEON::BI__builtin_neon_vuzpq_v: {
6808 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6809 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6810 Value *SV =
nullptr;
6812 for (
unsigned vi = 0; vi != 2; ++vi) {
6814 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6815 Indices.push_back(2*i+vi);
6818 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vuzp");
6823 case NEON::BI__builtin_neon_vzip_v:
6824 case NEON::BI__builtin_neon_vzipq_v: {
6825 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6826 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6827 Value *SV =
nullptr;
6829 for (
unsigned vi = 0; vi != 2; ++vi) {
6831 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6832 Indices.push_back((i + vi*e) >> 1);
6833 Indices.push_back(((i + vi*e) >> 1)+e);
6836 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vzip");
6841 case NEON::BI__builtin_neon_vqtbl1q_v: {
6842 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6845 case NEON::BI__builtin_neon_vqtbl2q_v: {
6846 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6849 case NEON::BI__builtin_neon_vqtbl3q_v: {
6850 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6853 case NEON::BI__builtin_neon_vqtbl4q_v: {
6854 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6857 case NEON::BI__builtin_neon_vqtbx1q_v: {
6858 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6861 case NEON::BI__builtin_neon_vqtbx2q_v: {
6862 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6865 case NEON::BI__builtin_neon_vqtbx3q_v: {
6866 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
6869 case NEON::BI__builtin_neon_vqtbx4q_v: {
6870 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
6873 case NEON::BI__builtin_neon_vsqadd_v:
6874 case NEON::BI__builtin_neon_vsqaddq_v: {
6875 Int = Intrinsic::aarch64_neon_usqadd;
6878 case NEON::BI__builtin_neon_vuqadd_v:
6879 case NEON::BI__builtin_neon_vuqaddq_v: {
6880 Int = Intrinsic::aarch64_neon_suqadd;
6884 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
6885 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
6886 case NEON::BI__builtin_neon_vluti2_laneq_f16:
6887 case NEON::BI__builtin_neon_vluti2_laneq_p16:
6888 case NEON::BI__builtin_neon_vluti2_laneq_p8:
6889 case NEON::BI__builtin_neon_vluti2_laneq_s16:
6890 case NEON::BI__builtin_neon_vluti2_laneq_s8:
6891 case NEON::BI__builtin_neon_vluti2_laneq_u16:
6892 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
6893 Int = Intrinsic::aarch64_neon_vluti2_laneq;
6900 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
6901 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
6902 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
6903 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
6904 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
6905 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
6906 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
6907 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
6908 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
6909 Int = Intrinsic::aarch64_neon_vluti2_laneq;
6916 case NEON::BI__builtin_neon_vluti2_lane_mf8:
6917 case NEON::BI__builtin_neon_vluti2_lane_bf16:
6918 case NEON::BI__builtin_neon_vluti2_lane_f16:
6919 case NEON::BI__builtin_neon_vluti2_lane_p16:
6920 case NEON::BI__builtin_neon_vluti2_lane_p8:
6921 case NEON::BI__builtin_neon_vluti2_lane_s16:
6922 case NEON::BI__builtin_neon_vluti2_lane_s8:
6923 case NEON::BI__builtin_neon_vluti2_lane_u16:
6924 case NEON::BI__builtin_neon_vluti2_lane_u8: {
6925 Int = Intrinsic::aarch64_neon_vluti2_lane;
6932 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
6933 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
6934 case NEON::BI__builtin_neon_vluti2q_lane_f16:
6935 case NEON::BI__builtin_neon_vluti2q_lane_p16:
6936 case NEON::BI__builtin_neon_vluti2q_lane_p8:
6937 case NEON::BI__builtin_neon_vluti2q_lane_s16:
6938 case NEON::BI__builtin_neon_vluti2q_lane_s8:
6939 case NEON::BI__builtin_neon_vluti2q_lane_u16:
6940 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
6941 Int = Intrinsic::aarch64_neon_vluti2_lane;
6948 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
6949 case NEON::BI__builtin_neon_vluti4q_lane_p8:
6950 case NEON::BI__builtin_neon_vluti4q_lane_s8:
6951 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
6952 Int = Intrinsic::aarch64_neon_vluti4q_lane;
6955 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
6956 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
6957 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
6958 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
6959 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
6962 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
6963 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
6964 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
6965 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
6966 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
6967 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
6968 return EmitNeonCall(
CGM.getIntrinsic(Int, Ty), Ops,
"vluti4q_lane_x2");
6970 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
6971 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
6972 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
6973 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
6974 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
6975 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
6976 return EmitNeonCall(
CGM.getIntrinsic(Int, Ty), Ops,
"vluti4q_laneq_x2");
6978 case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
6980 {llvm::FixedVectorType::get(
HalfTy, 8),
6981 llvm::FixedVectorType::get(
Int8Ty, 16)},
6983 case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
6985 {llvm::FixedVectorType::get(
FloatTy, 4),
6986 llvm::FixedVectorType::get(
Int8Ty, 16)},
6988 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
6991 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
6992 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
6994 llvm::FixedVectorType::get(
BFloatTy, 8),
6995 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt1");
6996 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
6999 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7000 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7002 llvm::FixedVectorType::get(
BFloatTy, 8),
7003 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt2");
7004 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7007 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7008 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7010 llvm::FixedVectorType::get(
HalfTy, 8),
7011 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt1");
7012 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7015 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7016 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7018 llvm::FixedVectorType::get(
HalfTy, 8),
7019 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt2");
7020 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7022 llvm::FixedVectorType::get(
Int8Ty, 8),
7023 Ops[0]->
getType(),
false, Ops, E,
"vfcvtn");
7024 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7026 llvm::FixedVectorType::get(
Int8Ty, 8),
7027 llvm::FixedVectorType::get(
HalfTy, 4),
false, Ops,
7029 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7031 llvm::FixedVectorType::get(
Int8Ty, 16),
7032 llvm::FixedVectorType::get(
HalfTy, 8),
false, Ops,
7034 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7035 llvm::Type *Ty = llvm::FixedVectorType::get(
Int8Ty, 16);
7036 Ops[0] =
Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7039 Ops[1]->
getType(),
false, Ops, E,
"vfcvtn2");
7042 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7043 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7046 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7047 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7048 ExtendLaneArg =
true;
7050 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7051 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7053 ExtendLaneArg,
HalfTy, Ops, E,
"fdot2_lane");
7054 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7055 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7058 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7059 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7060 ExtendLaneArg =
true;
7062 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7063 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7065 ExtendLaneArg,
FloatTy, Ops, E,
"fdot4_lane");
7067 case NEON::BI__builtin_neon_vdot_f32_f16:
7068 case NEON::BI__builtin_neon_vdotq_f32_f16: {
7069 llvm::Type *InputTy =
7070 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7071 llvm::Type *Tys[2] = {Ty, InputTy};
7072 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_fdot, Tys),
7076 case NEON::BI__builtin_neon_vdot_lane_f32_f16:
7077 case NEON::BI__builtin_neon_vdot_laneq_f32_f16:
7078 case NEON::BI__builtin_neon_vdotq_lane_f32_f16:
7079 case NEON::BI__builtin_neon_vdotq_laneq_f32_f16: {
7080 llvm::FixedVectorType *InputTy =
7081 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
7082 llvm::FixedVectorType *LaneTy = llvm::FixedVectorType::get(
7086 Ops[2] =
Builder.CreateBitCast(Ops[2], LaneTy);
7088 InputTy->getElementCount());
7089 llvm::Type *Tys[2] = {Ty, InputTy};
7091 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_fdot, Tys),
7095 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7097 {llvm::FixedVectorType::get(
HalfTy, 8)}, Ops, E,
7099 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7101 {llvm::FixedVectorType::get(
HalfTy, 8)}, Ops, E,
7103 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7105 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7107 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7109 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7111 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7113 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7115 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7117 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7119 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7120 ExtendLaneArg =
true;
7122 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7124 ExtendLaneArg,
HalfTy, Ops, E,
"vmlal_lane");
7125 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7126 ExtendLaneArg =
true;
7128 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7130 ExtendLaneArg,
HalfTy, Ops, E,
"vmlal_lane");
7131 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7132 ExtendLaneArg =
true;
7134 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7136 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7137 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7138 ExtendLaneArg =
true;
7140 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7142 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7143 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7144 ExtendLaneArg =
true;
7146 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7148 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7149 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7150 ExtendLaneArg =
true;
7152 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7154 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7155 case NEON::BI__builtin_neon_vamin_f16:
7156 case NEON::BI__builtin_neon_vaminq_f16:
7157 case NEON::BI__builtin_neon_vamin_f32:
7158 case NEON::BI__builtin_neon_vaminq_f32:
7159 case NEON::BI__builtin_neon_vaminq_f64: {
7160 Int = Intrinsic::aarch64_neon_famin;
7163 case NEON::BI__builtin_neon_vamax_f16:
7164 case NEON::BI__builtin_neon_vamaxq_f16:
7165 case NEON::BI__builtin_neon_vamax_f32:
7166 case NEON::BI__builtin_neon_vamaxq_f32:
7167 case NEON::BI__builtin_neon_vamaxq_f64: {
7168 Int = Intrinsic::aarch64_neon_famax;
7171 case NEON::BI__builtin_neon_vscale_f16:
7172 case NEON::BI__builtin_neon_vscaleq_f16:
7173 case NEON::BI__builtin_neon_vscale_f32:
7174 case NEON::BI__builtin_neon_vscaleq_f32:
7175 case NEON::BI__builtin_neon_vscaleq_f64: {
7176 Int = Intrinsic::aarch64_neon_fp8_fscale;