601 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
609 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
613 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
614 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
615 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
616 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
617 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
618 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
619 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
620 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
621 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
634 NEONMAP1(vcage_v, arm_neon_vacge, 0),
635 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
636 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
637 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
638 NEONMAP1(vcale_v, arm_neon_vacge, 0),
639 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
640 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
641 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
658 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
661 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
663 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
664 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
665 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
666 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
667 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
668 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
669 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
670 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
671 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
678 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
679 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
680 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
681 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
682 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
683 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
684 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
685 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
686 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
687 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
688 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
689 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
690 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
691 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
692 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
693 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
694 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
695 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
696 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
697 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
698 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
699 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
700 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
701 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
702 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
703 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
704 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
705 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
706 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
707 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
708 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
709 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
710 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
711 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
712 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
713 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
714 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
715 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
716 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
717 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
718 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
719 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
720 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
721 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
722 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
723 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
724 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
725 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
726 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
730 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
731 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
732 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
733 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
734 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
735 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
736 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
737 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
738 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
745 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
746 NEONMAP1(vdot_u32, arm_neon_udot, 0),
747 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
748 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
759 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
760 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
761 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
763 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
764 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
765 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
766 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
767 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
768 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
770 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
771 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
772 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
773 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
774 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
776 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
777 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
778 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
779 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
780 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
782 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
783 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
784 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
793 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
794 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
812 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
813 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
837 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
838 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
842 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
843 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
866 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
867 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
871 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
872 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
873 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
874 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
875 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
876 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
886 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
887 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
888 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
889 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
890 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
891 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
892 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
893 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
895 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
896 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
897 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
899 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
900 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
901 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
903 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
904 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
910 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
911 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
912 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
924 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
925 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
929 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
930 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
931 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
932 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
941 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
942 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
943 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
944 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
945 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
956 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
957 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
958 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
959 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
960 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
961 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
962 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
963 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
1000 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
1003 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
1005 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1006 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1007 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1008 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1009 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1010 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1011 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1012 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1013 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1014 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1018 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1019 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1020 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1021 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1022 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1023 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1024 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1025 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1026 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1027 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1029 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
1030 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
1031 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
1032 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
1045 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
1046 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
1047 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
1048 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
1049 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
1050 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
1051 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
1052 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
1057 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
1058 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
1059 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
1060 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
1061 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
1062 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
1063 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
1064 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
1077 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
1078 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
1079 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
1080 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1082 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
1083 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1098 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1099 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1101 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1102 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1110 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
1111 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
1115 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
1116 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1117 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1144 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1145 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1149 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
1150 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
1151 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
1152 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
1153 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
1154 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
1155 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
1156 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
1157 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
1158 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
1167 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
1168 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
1169 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
1170 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
1171 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
1172 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
1173 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
1174 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
1175 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
1176 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
1177 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
1178 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
1179 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
1180 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
1181 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
1185 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
1186 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
1187 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
1188 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
1803 unsigned BuiltinID,
unsigned LLVMIntrinsic,
unsigned AltLLVMIntrinsic,
1804 const char *NameHint,
unsigned Modifier,
const CallExpr *E,
1806 llvm::Triple::ArchType
Arch) {
1812 std::optional<llvm::APSInt> NeonTypeConst =
1819 const bool Usgn =
Type.isUnsigned();
1820 const bool Quad =
Type.isQuad();
1821 const bool Floating =
Type.isFloatingPoint();
1823 const bool AllowBFloatArgsAndRet =
1826 llvm::FixedVectorType *VTy =
1827 GetNeonType(
this,
Type, HasFastHalfType,
false, AllowBFloatArgsAndRet);
1828 llvm::Type *Ty = VTy;
1832 auto getAlignmentValue32 = [&](
Address addr) ->
Value* {
1833 return Builder.getInt32(addr.getAlignment().getQuantity());
1836 unsigned Int = LLVMIntrinsic;
1838 Int = AltLLVMIntrinsic;
1840 switch (BuiltinID) {
1842 case NEON::BI__builtin_neon_splat_lane_v:
1843 case NEON::BI__builtin_neon_splat_laneq_v:
1844 case NEON::BI__builtin_neon_splatq_lane_v:
1845 case NEON::BI__builtin_neon_splatq_laneq_v: {
1846 auto NumElements = VTy->getElementCount();
1847 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1848 NumElements = NumElements * 2;
1849 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1850 NumElements = NumElements.divideCoefficientBy(2);
1852 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
1855 case NEON::BI__builtin_neon_vpadd_v:
1856 case NEON::BI__builtin_neon_vpaddq_v:
1858 if (VTy->getElementType()->isFloatingPointTy() &&
1859 Int == Intrinsic::aarch64_neon_addp)
1860 Int = Intrinsic::aarch64_neon_faddp;
1862 case NEON::BI__builtin_neon_vabs_v:
1863 case NEON::BI__builtin_neon_vabsq_v:
1864 if (VTy->getElementType()->isFloatingPointTy())
1865 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops,
"vabs");
1866 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops,
"vabs");
1867 case NEON::BI__builtin_neon_vadd_v:
1868 case NEON::BI__builtin_neon_vaddq_v: {
1869 llvm::Type *VTy = llvm::FixedVectorType::get(
Int8Ty, Quad ? 16 : 8);
1870 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
1871 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
1872 Ops[0] =
Builder.CreateXor(Ops[0], Ops[1]);
1873 return Builder.CreateBitCast(Ops[0], Ty);
1875 case NEON::BI__builtin_neon_vaddhn_v: {
1876 llvm::FixedVectorType *SrcTy =
1877 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1880 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
1881 Ops[1] =
Builder.CreateBitCast(Ops[1], SrcTy);
1882 Ops[0] =
Builder.CreateAdd(Ops[0], Ops[1],
"vaddhn");
1885 Constant *ShiftAmt =
1886 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1887 Ops[0] =
Builder.CreateLShr(Ops[0], ShiftAmt,
"vaddhn");
1890 return Builder.CreateTrunc(Ops[0], VTy,
"vaddhn");
1892 case NEON::BI__builtin_neon_vcale_v:
1893 case NEON::BI__builtin_neon_vcaleq_v:
1894 case NEON::BI__builtin_neon_vcalt_v:
1895 case NEON::BI__builtin_neon_vcaltq_v:
1896 std::swap(Ops[0], Ops[1]);
1898 case NEON::BI__builtin_neon_vcage_v:
1899 case NEON::BI__builtin_neon_vcageq_v:
1900 case NEON::BI__builtin_neon_vcagt_v:
1901 case NEON::BI__builtin_neon_vcagtq_v: {
1903 switch (VTy->getScalarSizeInBits()) {
1904 default: llvm_unreachable(
"unexpected type");
1915 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1916 llvm::Type *Tys[] = { VTy, VecFlt };
1917 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1920 case NEON::BI__builtin_neon_vceqz_v:
1921 case NEON::BI__builtin_neon_vceqzq_v:
1923 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ,
"vceqz");
1924 case NEON::BI__builtin_neon_vcgez_v:
1925 case NEON::BI__builtin_neon_vcgezq_v:
1927 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1929 case NEON::BI__builtin_neon_vclez_v:
1930 case NEON::BI__builtin_neon_vclezq_v:
1932 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1934 case NEON::BI__builtin_neon_vcgtz_v:
1935 case NEON::BI__builtin_neon_vcgtzq_v:
1937 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1939 case NEON::BI__builtin_neon_vcltz_v:
1940 case NEON::BI__builtin_neon_vcltzq_v:
1942 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1944 case NEON::BI__builtin_neon_vclz_v:
1945 case NEON::BI__builtin_neon_vclzq_v:
1950 case NEON::BI__builtin_neon_vcvt_f32_v:
1951 case NEON::BI__builtin_neon_vcvtq_f32_v:
1952 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1955 return Usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
1956 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
1957 case NEON::BI__builtin_neon_vcvt_f16_s16:
1958 case NEON::BI__builtin_neon_vcvt_f16_u16:
1959 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1960 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1961 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1964 return Usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
1965 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
1966 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1967 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1968 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1969 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1974 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1975 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1976 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1977 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1979 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1983 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1984 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1985 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1986 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1987 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1988 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1989 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1990 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1991 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1992 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1993 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1994 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1996 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1999 case NEON::BI__builtin_neon_vcvt_s32_v:
2000 case NEON::BI__builtin_neon_vcvt_u32_v:
2001 case NEON::BI__builtin_neon_vcvt_s64_v:
2002 case NEON::BI__builtin_neon_vcvt_u64_v:
2003 case NEON::BI__builtin_neon_vcvt_s16_f16:
2004 case NEON::BI__builtin_neon_vcvt_u16_f16:
2005 case NEON::BI__builtin_neon_vcvtq_s32_v:
2006 case NEON::BI__builtin_neon_vcvtq_u32_v:
2007 case NEON::BI__builtin_neon_vcvtq_s64_v:
2008 case NEON::BI__builtin_neon_vcvtq_u64_v:
2009 case NEON::BI__builtin_neon_vcvtq_s16_f16:
2010 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
2012 return Usgn ?
Builder.CreateFPToUI(Ops[0], Ty,
"vcvt")
2013 :
Builder.CreateFPToSI(Ops[0], Ty,
"vcvt");
2015 case NEON::BI__builtin_neon_vcvta_s16_f16:
2016 case NEON::BI__builtin_neon_vcvta_s32_v:
2017 case NEON::BI__builtin_neon_vcvta_s64_v:
2018 case NEON::BI__builtin_neon_vcvta_u16_f16:
2019 case NEON::BI__builtin_neon_vcvta_u32_v:
2020 case NEON::BI__builtin_neon_vcvta_u64_v:
2021 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
2022 case NEON::BI__builtin_neon_vcvtaq_s32_v:
2023 case NEON::BI__builtin_neon_vcvtaq_s64_v:
2024 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
2025 case NEON::BI__builtin_neon_vcvtaq_u32_v:
2026 case NEON::BI__builtin_neon_vcvtaq_u64_v:
2027 case NEON::BI__builtin_neon_vcvtn_s16_f16:
2028 case NEON::BI__builtin_neon_vcvtn_s32_v:
2029 case NEON::BI__builtin_neon_vcvtn_s64_v:
2030 case NEON::BI__builtin_neon_vcvtn_u16_f16:
2031 case NEON::BI__builtin_neon_vcvtn_u32_v:
2032 case NEON::BI__builtin_neon_vcvtn_u64_v:
2033 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
2034 case NEON::BI__builtin_neon_vcvtnq_s32_v:
2035 case NEON::BI__builtin_neon_vcvtnq_s64_v:
2036 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
2037 case NEON::BI__builtin_neon_vcvtnq_u32_v:
2038 case NEON::BI__builtin_neon_vcvtnq_u64_v:
2039 case NEON::BI__builtin_neon_vcvtp_s16_f16:
2040 case NEON::BI__builtin_neon_vcvtp_s32_v:
2041 case NEON::BI__builtin_neon_vcvtp_s64_v:
2042 case NEON::BI__builtin_neon_vcvtp_u16_f16:
2043 case NEON::BI__builtin_neon_vcvtp_u32_v:
2044 case NEON::BI__builtin_neon_vcvtp_u64_v:
2045 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
2046 case NEON::BI__builtin_neon_vcvtpq_s32_v:
2047 case NEON::BI__builtin_neon_vcvtpq_s64_v:
2048 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
2049 case NEON::BI__builtin_neon_vcvtpq_u32_v:
2050 case NEON::BI__builtin_neon_vcvtpq_u64_v:
2051 case NEON::BI__builtin_neon_vcvtm_s16_f16:
2052 case NEON::BI__builtin_neon_vcvtm_s32_v:
2053 case NEON::BI__builtin_neon_vcvtm_s64_v:
2054 case NEON::BI__builtin_neon_vcvtm_u16_f16:
2055 case NEON::BI__builtin_neon_vcvtm_u32_v:
2056 case NEON::BI__builtin_neon_vcvtm_u64_v:
2057 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
2058 case NEON::BI__builtin_neon_vcvtmq_s32_v:
2059 case NEON::BI__builtin_neon_vcvtmq_s64_v:
2060 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
2061 case NEON::BI__builtin_neon_vcvtmq_u32_v:
2062 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
2064 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2066 case NEON::BI__builtin_neon_vcvtx_f32_v: {
2067 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
2068 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2071 case NEON::BI__builtin_neon_vext_v:
2072 case NEON::BI__builtin_neon_vextq_v: {
2075 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2076 Indices.push_back(i+CV);
2078 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
2079 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2080 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices,
"vext");
2082 case NEON::BI__builtin_neon_vfma_v:
2083 case NEON::BI__builtin_neon_vfmaq_v: {
2084 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
2085 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2086 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2090 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
2091 {Ops[1], Ops[2], Ops[0]});
2093 case NEON::BI__builtin_neon_vld1_v:
2094 case NEON::BI__builtin_neon_vld1q_v: {
2096 Ops.push_back(getAlignmentValue32(PtrOp0));
2097 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops,
"vld1");
2099 case NEON::BI__builtin_neon_vld1_x2_v:
2100 case NEON::BI__builtin_neon_vld1q_x2_v:
2101 case NEON::BI__builtin_neon_vld1_x3_v:
2102 case NEON::BI__builtin_neon_vld1q_x3_v:
2103 case NEON::BI__builtin_neon_vld1_x4_v:
2104 case NEON::BI__builtin_neon_vld1q_x4_v: {
2106 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
2107 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld1xN");
2108 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2110 case NEON::BI__builtin_neon_vld2_v:
2111 case NEON::BI__builtin_neon_vld2q_v:
2112 case NEON::BI__builtin_neon_vld3_v:
2113 case NEON::BI__builtin_neon_vld3q_v:
2114 case NEON::BI__builtin_neon_vld4_v:
2115 case NEON::BI__builtin_neon_vld4q_v:
2116 case NEON::BI__builtin_neon_vld2_dup_v:
2117 case NEON::BI__builtin_neon_vld2q_dup_v:
2118 case NEON::BI__builtin_neon_vld3_dup_v:
2119 case NEON::BI__builtin_neon_vld3q_dup_v:
2120 case NEON::BI__builtin_neon_vld4_dup_v:
2121 case NEON::BI__builtin_neon_vld4q_dup_v: {
2123 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
2124 Value *Align = getAlignmentValue32(PtrOp1);
2125 Ops[1] =
Builder.CreateCall(F, {Ops[1], Align}, NameHint);
2126 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2128 case NEON::BI__builtin_neon_vld1_dup_v:
2129 case NEON::BI__builtin_neon_vld1q_dup_v: {
2130 Value *
V = PoisonValue::get(Ty);
2132 LoadInst *Ld =
Builder.CreateLoad(PtrOp0);
2133 llvm::Constant *CI = ConstantInt::get(
SizeTy, 0);
2134 Ops[0] =
Builder.CreateInsertElement(
V, Ld, CI);
2137 case NEON::BI__builtin_neon_vld2_lane_v:
2138 case NEON::BI__builtin_neon_vld2q_lane_v:
2139 case NEON::BI__builtin_neon_vld3_lane_v:
2140 case NEON::BI__builtin_neon_vld3q_lane_v:
2141 case NEON::BI__builtin_neon_vld4_lane_v:
2142 case NEON::BI__builtin_neon_vld4q_lane_v: {
2144 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
2145 for (
unsigned I = 2; I < Ops.size() - 1; ++I)
2146 Ops[I] =
Builder.CreateBitCast(Ops[I], Ty);
2147 Ops.push_back(getAlignmentValue32(PtrOp1));
2149 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2151 case NEON::BI__builtin_neon_vmovl_v: {
2152 llvm::FixedVectorType *DTy =
2153 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2154 Ops[0] =
Builder.CreateBitCast(Ops[0], DTy);
2156 return Builder.CreateZExt(Ops[0], Ty,
"vmovl");
2157 return Builder.CreateSExt(Ops[0], Ty,
"vmovl");
2159 case NEON::BI__builtin_neon_vmovn_v: {
2160 llvm::FixedVectorType *QTy =
2161 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2162 Ops[0] =
Builder.CreateBitCast(Ops[0], QTy);
2163 return Builder.CreateTrunc(Ops[0], Ty,
"vmovn");
2165 case NEON::BI__builtin_neon_vmull_v:
2171 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2172 Int =
Type.isPoly() ? (
unsigned)Intrinsic::arm_neon_vmullp : Int;
2174 case NEON::BI__builtin_neon_vpadal_v:
2175 case NEON::BI__builtin_neon_vpadalq_v: {
2177 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2181 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2182 llvm::Type *Tys[2] = { Ty, NarrowTy };
2185 case NEON::BI__builtin_neon_vpaddl_v:
2186 case NEON::BI__builtin_neon_vpaddlq_v: {
2188 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2189 llvm::Type *EltTy = llvm::IntegerType::get(
getLLVMContext(), EltBits / 2);
2191 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2192 llvm::Type *Tys[2] = { Ty, NarrowTy };
2195 case NEON::BI__builtin_neon_vqdmlal_v:
2196 case NEON::BI__builtin_neon_vqdmlsl_v: {
2201 return EmitNeonCall(
CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
2203 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
2204 case NEON::BI__builtin_neon_vqdmulh_lane_v:
2205 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
2206 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
2208 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2209 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
2210 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
2211 RTy->getNumElements() * 2);
2212 llvm::Type *Tys[2] = {
2217 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
2218 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
2219 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
2220 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
2221 llvm::Type *Tys[2] = {
2226 case NEON::BI__builtin_neon_vqshl_n_v:
2227 case NEON::BI__builtin_neon_vqshlq_n_v:
2230 case NEON::BI__builtin_neon_vqshlu_n_v:
2231 case NEON::BI__builtin_neon_vqshluq_n_v:
2234 case NEON::BI__builtin_neon_vrecpe_v:
2235 case NEON::BI__builtin_neon_vrecpeq_v:
2236 case NEON::BI__builtin_neon_vrsqrte_v:
2237 case NEON::BI__builtin_neon_vrsqrteq_v:
2238 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
2240 case NEON::BI__builtin_neon_vrndi_v:
2241 case NEON::BI__builtin_neon_vrndiq_v:
2242 Int =
Builder.getIsFPConstrained()
2243 ? Intrinsic::experimental_constrained_nearbyint
2244 : Intrinsic::nearbyint;
2246 case NEON::BI__builtin_neon_vrshr_n_v:
2247 case NEON::BI__builtin_neon_vrshrq_n_v:
2250 case NEON::BI__builtin_neon_vsha512hq_u64:
2251 case NEON::BI__builtin_neon_vsha512h2q_u64:
2252 case NEON::BI__builtin_neon_vsha512su0q_u64:
2253 case NEON::BI__builtin_neon_vsha512su1q_u64: {
2257 case NEON::BI__builtin_neon_vshl_n_v:
2258 case NEON::BI__builtin_neon_vshlq_n_v:
2260 return Builder.CreateShl(
Builder.CreateBitCast(Ops[0],Ty), Ops[1],
2262 case NEON::BI__builtin_neon_vshll_n_v: {
2263 llvm::FixedVectorType *SrcTy =
2264 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2265 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
2267 Ops[0] =
Builder.CreateZExt(Ops[0], VTy);
2269 Ops[0] =
Builder.CreateSExt(Ops[0], VTy);
2271 return Builder.CreateShl(Ops[0], Ops[1],
"vshll_n");
2273 case NEON::BI__builtin_neon_vshrn_n_v: {
2274 llvm::FixedVectorType *SrcTy =
2275 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2276 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
2279 Ops[0] =
Builder.CreateLShr(Ops[0], Ops[1]);
2281 Ops[0] =
Builder.CreateAShr(Ops[0], Ops[1]);
2282 return Builder.CreateTrunc(Ops[0], Ty,
"vshrn_n");
2284 case NEON::BI__builtin_neon_vshr_n_v:
2285 case NEON::BI__builtin_neon_vshrq_n_v:
2287 case NEON::BI__builtin_neon_vst1_v:
2288 case NEON::BI__builtin_neon_vst1q_v:
2289 case NEON::BI__builtin_neon_vst2_v:
2290 case NEON::BI__builtin_neon_vst2q_v:
2291 case NEON::BI__builtin_neon_vst3_v:
2292 case NEON::BI__builtin_neon_vst3q_v:
2293 case NEON::BI__builtin_neon_vst4_v:
2294 case NEON::BI__builtin_neon_vst4q_v:
2295 case NEON::BI__builtin_neon_vst2_lane_v:
2296 case NEON::BI__builtin_neon_vst2q_lane_v:
2297 case NEON::BI__builtin_neon_vst3_lane_v:
2298 case NEON::BI__builtin_neon_vst3q_lane_v:
2299 case NEON::BI__builtin_neon_vst4_lane_v:
2300 case NEON::BI__builtin_neon_vst4q_lane_v: {
2302 Ops.push_back(getAlignmentValue32(PtrOp0));
2305 case NEON::BI__builtin_neon_vsm3partw1q_u32:
2306 case NEON::BI__builtin_neon_vsm3partw2q_u32:
2307 case NEON::BI__builtin_neon_vsm3ss1q_u32:
2308 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
2309 case NEON::BI__builtin_neon_vsm4eq_u32: {
2313 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
2314 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
2315 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
2316 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
2321 case NEON::BI__builtin_neon_vst1_x2_v:
2322 case NEON::BI__builtin_neon_vst1q_x2_v:
2323 case NEON::BI__builtin_neon_vst1_x3_v:
2324 case NEON::BI__builtin_neon_vst1q_x3_v:
2325 case NEON::BI__builtin_neon_vst1_x4_v:
2326 case NEON::BI__builtin_neon_vst1q_x4_v: {
2329 if (
Arch == llvm::Triple::aarch64 ||
Arch == llvm::Triple::aarch64_be ||
2330 Arch == llvm::Triple::aarch64_32) {
2332 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
2338 case NEON::BI__builtin_neon_vsubhn_v: {
2339 llvm::FixedVectorType *SrcTy =
2340 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2343 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
2344 Ops[1] =
Builder.CreateBitCast(Ops[1], SrcTy);
2345 Ops[0] =
Builder.CreateSub(Ops[0], Ops[1],
"vsubhn");
2348 Constant *ShiftAmt =
2349 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
2350 Ops[0] =
Builder.CreateLShr(Ops[0], ShiftAmt,
"vsubhn");
2353 return Builder.CreateTrunc(Ops[0], VTy,
"vsubhn");
2355 case NEON::BI__builtin_neon_vtrn_v:
2356 case NEON::BI__builtin_neon_vtrnq_v: {
2357 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2358 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2359 Value *SV =
nullptr;
2361 for (
unsigned vi = 0; vi != 2; ++vi) {
2363 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2364 Indices.push_back(i+vi);
2365 Indices.push_back(i+e+vi);
2368 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vtrn");
2373 case NEON::BI__builtin_neon_vtst_v:
2374 case NEON::BI__builtin_neon_vtstq_v: {
2375 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
2376 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2377 Ops[0] =
Builder.CreateAnd(Ops[0], Ops[1]);
2378 Ops[0] =
Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
2379 ConstantAggregateZero::get(Ty));
2380 return Builder.CreateSExt(Ops[0], Ty,
"vtst");
2382 case NEON::BI__builtin_neon_vuzp_v:
2383 case NEON::BI__builtin_neon_vuzpq_v: {
2384 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2385 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2386 Value *SV =
nullptr;
2388 for (
unsigned vi = 0; vi != 2; ++vi) {
2390 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2391 Indices.push_back(2*i+vi);
2394 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vuzp");
2399 case NEON::BI__builtin_neon_vxarq_u64: {
2404 case NEON::BI__builtin_neon_vzip_v:
2405 case NEON::BI__builtin_neon_vzipq_v: {
2406 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2407 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2408 Value *SV =
nullptr;
2410 for (
unsigned vi = 0; vi != 2; ++vi) {
2412 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2413 Indices.push_back((i + vi*e) >> 1);
2414 Indices.push_back(((i + vi*e) >> 1)+e);
2417 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vzip");
2422 case NEON::BI__builtin_neon_vdot_s32:
2423 case NEON::BI__builtin_neon_vdot_u32:
2424 case NEON::BI__builtin_neon_vdotq_s32:
2425 case NEON::BI__builtin_neon_vdotq_u32: {
2427 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2428 llvm::Type *Tys[2] = { Ty, InputTy };
2431 case NEON::BI__builtin_neon_vfmlal_low_f16:
2432 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
2434 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2435 llvm::Type *Tys[2] = { Ty, InputTy };
2438 case NEON::BI__builtin_neon_vfmlsl_low_f16:
2439 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
2441 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2442 llvm::Type *Tys[2] = { Ty, InputTy };
2445 case NEON::BI__builtin_neon_vfmlal_high_f16:
2446 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
2448 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2449 llvm::Type *Tys[2] = { Ty, InputTy };
2452 case NEON::BI__builtin_neon_vfmlsl_high_f16:
2453 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
2455 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2456 llvm::Type *Tys[2] = { Ty, InputTy };
2459 case NEON::BI__builtin_neon_vmmlaq_s32:
2460 case NEON::BI__builtin_neon_vmmlaq_u32: {
2462 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2463 llvm::Type *Tys[2] = { Ty, InputTy };
2464 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops,
"vmmla");
2466 case NEON::BI__builtin_neon_vusmmlaq_s32: {
2468 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2469 llvm::Type *Tys[2] = { Ty, InputTy };
2472 case NEON::BI__builtin_neon_vusdot_s32:
2473 case NEON::BI__builtin_neon_vusdotq_s32: {
2475 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2476 llvm::Type *Tys[2] = { Ty, InputTy };
2479 case NEON::BI__builtin_neon_vbfdot_f32:
2480 case NEON::BI__builtin_neon_vbfdotq_f32: {
2481 llvm::Type *InputTy =
2482 llvm::FixedVectorType::get(
BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
2483 llvm::Type *Tys[2] = { Ty, InputTy };
2486 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
2487 llvm::Type *Tys[1] = { Ty };
2494 assert(Int &&
"Expected valid intrinsic number");
5113 llvm::Triple::ArchType
Arch) {
5122 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
5123 return EmitAArch64CpuSupports(E);
5125 unsigned HintID =
static_cast<unsigned>(-1);
5126 switch (BuiltinID) {
5128 case clang::AArch64::BI__builtin_arm_nop:
5131 case clang::AArch64::BI__builtin_arm_yield:
5132 case clang::AArch64::BI__yield:
5135 case clang::AArch64::BI__builtin_arm_wfe:
5136 case clang::AArch64::BI__wfe:
5139 case clang::AArch64::BI__builtin_arm_wfi:
5140 case clang::AArch64::BI__wfi:
5143 case clang::AArch64::BI__builtin_arm_sev:
5144 case clang::AArch64::BI__sev:
5147 case clang::AArch64::BI__builtin_arm_sevl:
5148 case clang::AArch64::BI__sevl:
5153 if (HintID !=
static_cast<unsigned>(-1)) {
5154 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_hint);
5155 return Builder.CreateCall(F, llvm::ConstantInt::get(
Int32Ty, HintID));
5158 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
5159 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_break);
5164 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
5167 llvm::FunctionType::get(StructType::get(
CGM.Int64Ty,
CGM.Int64Ty), {},
5169 "__arm_sme_state"));
5171 "aarch64_pstate_sm_compatible");
5172 CI->setAttributes(Attrs);
5175 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
5182 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
5184 "rbit of unusual size!");
5187 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
5189 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
5191 "rbit of unusual size!");
5194 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
5197 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
5198 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
5200 Function *F =
CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
5202 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
5207 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
5209 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
5212 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
5214 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
5218 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
5219 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
5221 llvm::Type *Ty = Arg->getType();
5222 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
5226 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
5227 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
5229 llvm::Type *Ty = Arg->getType();
5230 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
5234 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
5235 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
5237 llvm::Type *Ty = Arg->getType();
5238 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
5242 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
5243 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
5245 llvm::Type *Ty = Arg->getType();
5246 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
5250 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
5252 "__jcvt of unusual size!");
5255 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
5258 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
5259 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
5260 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
5261 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
5265 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
5268 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
5269 llvm::Value *Val =
Builder.CreateCall(F, MemAddr);
5271 for (
size_t i = 0; i < 8; i++) {
5272 llvm::Value *ValOffsetPtr =
5284 Args.push_back(MemAddr);
5285 for (
size_t i = 0; i < 8; i++) {
5286 llvm::Value *ValOffsetPtr =
5292 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
5293 ? Intrinsic::aarch64_st64b
5294 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
5295 ? Intrinsic::aarch64_st64bv
5296 : Intrinsic::aarch64_st64bv0);
5298 return Builder.CreateCall(F, Args);
5301 if (BuiltinID == clang::AArch64::BI__builtin_arm_atomic_store_with_stshh) {
5312 auto *SizeC = llvm::ConstantInt::get(
Int32Ty, SizeBits);
5317 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_stshh_atomic_store,
5323 F, {StoreAddr, StoreValue64,
5324 ConstantInt::get(
Int32Ty, OrderC->getZExtValue()),
5325 ConstantInt::get(
Int32Ty, PolicyC->getZExtValue()), SizeC});
5328 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
5329 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
5331 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
5332 ? Intrinsic::aarch64_rndr
5333 : Intrinsic::aarch64_rndrrs);
5335 llvm::Value *Val =
Builder.CreateCall(F);
5336 Value *RandomValue =
Builder.CreateExtractValue(Val, 0);
5340 Builder.CreateStore(RandomValue, MemAddress);
5345 if (BuiltinID == clang::AArch64::BI__clear_cache) {
5346 assert(E->
getNumArgs() == 2 &&
"__clear_cache takes 2 arguments");
5349 for (
unsigned i = 0; i < 2; i++)
5351 llvm::Type *Ty =
CGM.getTypes().ConvertType(FD->
getType());
5353 StringRef Name = FD->
getName();
5357 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5358 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
5361 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5362 ? Intrinsic::aarch64_ldaxp
5363 : Intrinsic::aarch64_ldxp);
5370 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
5371 Val0 =
Builder.CreateZExt(Val0, Int128Ty);
5372 Val1 =
Builder.CreateZExt(Val1, Int128Ty);
5374 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5375 Val =
Builder.CreateShl(Val0, ShiftCst,
"shl",
true );
5376 Val =
Builder.CreateOr(Val, Val1);
5378 }
else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5379 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
5388 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5389 ? Intrinsic::aarch64_ldaxr
5390 : Intrinsic::aarch64_ldxr,
5392 CallInst *Val =
Builder.CreateCall(F, LoadAddr,
"ldxr");
5396 if (RealResTy->isPointerTy())
5397 return Builder.CreateIntToPtr(Val, RealResTy);
5399 llvm::Type *IntResTy = llvm::IntegerType::get(
5401 return Builder.CreateBitCast(
Builder.CreateTruncOrBitCast(Val, IntResTy),
5405 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5406 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
5409 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5410 ? Intrinsic::aarch64_stlxp
5411 : Intrinsic::aarch64_stxp);
5418 llvm::Value *Val =
Builder.CreateLoad(Tmp);
5423 return Builder.CreateCall(F, {Arg0, Arg1, StPtr},
"stxp");
5426 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5427 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
5432 llvm::Type *StoreTy =
5435 if (StoreVal->
getType()->isPointerTy())
5438 llvm::Type *
IntTy = llvm::IntegerType::get(
5440 CGM.getDataLayout().getTypeSizeInBits(StoreVal->
getType()));
5446 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5447 ? Intrinsic::aarch64_stlxr
5448 : Intrinsic::aarch64_stxr,
5450 CallInst *CI =
Builder.CreateCall(F, {StoreVal, StoreAddr},
"stxr");
5452 1, Attribute::get(
getLLVMContext(), Attribute::ElementType, StoreTy));
5456 if (BuiltinID == clang::AArch64::BI__getReg) {
5459 llvm_unreachable(
"Sema will ensure that the parameter is constant");
5462 LLVMContext &Context =
CGM.getLLVMContext();
5465 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
5466 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5467 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5470 CGM.getIntrinsic(Intrinsic::read_register, {
Int64Ty});
5471 return Builder.CreateCall(F, Metadata);
5474 if (BuiltinID == clang::AArch64::BI__break) {
5477 llvm_unreachable(
"Sema will ensure that the parameter is constant");
5479 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_break);
5483 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
5484 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5488 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
5489 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5490 llvm::SyncScope::SingleThread);
5493 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5494 switch (BuiltinID) {
5495 case clang::AArch64::BI__builtin_arm_crc32b:
5496 CRCIntrinsicID = Intrinsic::aarch64_crc32b;
break;
5497 case clang::AArch64::BI__builtin_arm_crc32cb:
5498 CRCIntrinsicID = Intrinsic::aarch64_crc32cb;
break;
5499 case clang::AArch64::BI__builtin_arm_crc32h:
5500 CRCIntrinsicID = Intrinsic::aarch64_crc32h;
break;
5501 case clang::AArch64::BI__builtin_arm_crc32ch:
5502 CRCIntrinsicID = Intrinsic::aarch64_crc32ch;
break;
5503 case clang::AArch64::BI__builtin_arm_crc32w:
5504 CRCIntrinsicID = Intrinsic::aarch64_crc32w;
break;
5505 case clang::AArch64::BI__builtin_arm_crc32cw:
5506 CRCIntrinsicID = Intrinsic::aarch64_crc32cw;
break;
5507 case clang::AArch64::BI__builtin_arm_crc32d:
5508 CRCIntrinsicID = Intrinsic::aarch64_crc32x;
break;
5509 case clang::AArch64::BI__builtin_arm_crc32cd:
5510 CRCIntrinsicID = Intrinsic::aarch64_crc32cx;
break;
5513 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5518 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5519 Arg1 =
Builder.CreateZExtOrBitCast(Arg1, DataTy);
5521 return Builder.CreateCall(F, {Arg0, Arg1});
5525 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
5532 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
5535 if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch ||
5536 BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
5540 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
5541 switch (BuiltinID) {
5542 case clang::AArch64::BI__builtin_arm_irg:
5543 MTEIntrinsicID = Intrinsic::aarch64_irg;
break;
5544 case clang::AArch64::BI__builtin_arm_addg:
5545 MTEIntrinsicID = Intrinsic::aarch64_addg;
break;
5546 case clang::AArch64::BI__builtin_arm_gmi:
5547 MTEIntrinsicID = Intrinsic::aarch64_gmi;
break;
5548 case clang::AArch64::BI__builtin_arm_ldg:
5549 MTEIntrinsicID = Intrinsic::aarch64_ldg;
break;
5550 case clang::AArch64::BI__builtin_arm_stg:
5551 MTEIntrinsicID = Intrinsic::aarch64_stg;
break;
5552 case clang::AArch64::BI__builtin_arm_subp:
5553 MTEIntrinsicID = Intrinsic::aarch64_subp;
break;
5556 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
5557 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
5562 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
5565 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
5570 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
5571 {Pointer, TagOffset});
5573 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
5579 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
5584 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
5586 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
5587 {TagAddress, TagAddress});
5592 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
5594 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
5595 {TagAddress, TagAddress});
5597 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
5601 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
5605 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5606 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5607 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5608 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5609 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
5610 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
5611 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
5612 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
5615 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5616 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5617 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5618 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
5621 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5622 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
5624 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5625 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5627 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5628 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5630 llvm::Type *ValueType;
5634 }
else if (Is128Bit) {
5635 llvm::Type *Int128Ty =
5636 llvm::IntegerType::getInt128Ty(
CGM.getLLVMContext());
5637 ValueType = Int128Ty;
5639 }
else if (IsPointerBuiltin) {
5649 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5650 BuiltinID == clang::AArch64::BI_WriteStatusReg ||
5651 BuiltinID == clang::AArch64::BI__sys) {
5652 LLVMContext &Context =
CGM.getLLVMContext();
5657 std::string SysRegStr;
5658 unsigned SysRegOp0 = (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5659 BuiltinID == clang::AArch64::BI_WriteStatusReg)
5660 ? ((1 << 1) | ((SysReg >> 14) & 1))
5662 llvm::raw_string_ostream(SysRegStr)
5663 << SysRegOp0 <<
":" << ((SysReg >> 11) & 7) <<
":"
5664 << ((SysReg >> 7) & 15) <<
":" << ((SysReg >> 3) & 15) <<
":"
5667 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
5668 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5669 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5674 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5675 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::read_register, Types);
5677 return Builder.CreateCall(F, Metadata);
5680 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::write_register, Types);
5682 llvm::Value *
Result =
Builder.CreateCall(F, {Metadata, ArgValue});
5683 if (BuiltinID == clang::AArch64::BI__sys) {
5691 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5697 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5702 if (BuiltinID == clang::AArch64::BI__mulh ||
5703 BuiltinID == clang::AArch64::BI__umulh) {
5705 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
5707 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5713 Value *MulResult, *HigherBits;
5715 MulResult =
Builder.CreateNSWMul(LHS, RHS);
5716 HigherBits =
Builder.CreateAShr(MulResult, 64);
5718 MulResult =
Builder.CreateNUWMul(LHS, RHS);
5719 HigherBits =
Builder.CreateLShr(MulResult, 64);
5721 HigherBits =
Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5726 if (BuiltinID == AArch64::BI__writex18byte ||
5727 BuiltinID == AArch64::BI__writex18word ||
5728 BuiltinID == AArch64::BI__writex18dword ||
5729 BuiltinID == AArch64::BI__writex18qword) {
5745 if (BuiltinID == AArch64::BI__readx18byte ||
5746 BuiltinID == AArch64::BI__readx18word ||
5747 BuiltinID == AArch64::BI__readx18dword ||
5748 BuiltinID == AArch64::BI__readx18qword) {
5763 if (BuiltinID == AArch64::BI__addx18byte ||
5764 BuiltinID == AArch64::BI__addx18word ||
5765 BuiltinID == AArch64::BI__addx18dword ||
5766 BuiltinID == AArch64::BI__addx18qword ||
5767 BuiltinID == AArch64::BI__incx18byte ||
5768 BuiltinID == AArch64::BI__incx18word ||
5769 BuiltinID == AArch64::BI__incx18dword ||
5770 BuiltinID == AArch64::BI__incx18qword) {
5773 switch (BuiltinID) {
5774 case AArch64::BI__incx18byte:
5778 case AArch64::BI__incx18word:
5782 case AArch64::BI__incx18dword:
5786 case AArch64::BI__incx18qword:
5792 isIncrement =
false;
5817 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5818 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5819 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5820 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5823 return Builder.CreateBitCast(Arg, RetTy);
5826 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5827 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5828 BuiltinID == AArch64::BI_CountLeadingZeros ||
5829 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5833 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5834 BuiltinID == AArch64::BI_CountLeadingOnes64)
5835 Arg =
Builder.CreateXor(Arg, Constant::getAllOnesValue(
ArgType));
5840 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5841 BuiltinID == AArch64::BI_CountLeadingZeros64)
5846 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5847 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5850 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5851 ?
CGM.getIntrinsic(Intrinsic::aarch64_cls)
5852 :
CGM.getIntrinsic(Intrinsic::aarch64_cls64);
5855 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5860 if (BuiltinID == AArch64::BI_CountOneBits ||
5861 BuiltinID == AArch64::BI_CountOneBits64) {
5867 if (BuiltinID == AArch64::BI_CountOneBits64)
5872 if (BuiltinID == AArch64::BI__prefetch) {
5881 if (BuiltinID == AArch64::BI__hlt) {
5882 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_hlt);
5887 return ConstantInt::get(
Builder.getInt32Ty(), 0);
5890 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5898 if (std::optional<MSVCIntrin> MsvcIntId =
5904 return P.first == BuiltinID;
5907 BuiltinID = It->second;
5913 bool IsSISD = (
Builtin !=
nullptr);
5917 unsigned ICEArguments = 0;
5928 unsigned NumArgs = E->
getNumArgs() - (HasExtraArg ? 1 : 0);
5929 for (
unsigned i = 0, e = NumArgs; i != e; i++) {
5931 switch (BuiltinID) {
5932 case NEON::BI__builtin_neon_vld1_v:
5933 case NEON::BI__builtin_neon_vld1q_v:
5934 case NEON::BI__builtin_neon_vld1_dup_v:
5935 case NEON::BI__builtin_neon_vld1q_dup_v:
5936 case NEON::BI__builtin_neon_vld1_lane_v:
5937 case NEON::BI__builtin_neon_vld1q_lane_v:
5938 case NEON::BI__builtin_neon_vst1_v:
5939 case NEON::BI__builtin_neon_vst1q_v:
5940 case NEON::BI__builtin_neon_vst1_lane_v:
5941 case NEON::BI__builtin_neon_vst1q_lane_v:
5942 case NEON::BI__builtin_neon_vldap1_lane_s64:
5943 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5944 case NEON::BI__builtin_neon_vstl1_lane_s64:
5945 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5958 assert(
Result &&
"SISD intrinsic should have been handled");
5964 if (std::optional<llvm::APSInt>
Result =
5969 bool usgn =
Type.isUnsigned();
5970 bool quad =
Type.isQuad();
5988 switch (BuiltinID) {
5990 case NEON::BI__builtin_neon_vabsh_f16:
5992 case NEON::BI__builtin_neon_vaddq_p128: {
5994 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
5995 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
5996 Ops[0] =
Builder.CreateXor(Ops[0], Ops[1]);
5997 llvm::Type *Int128Ty = llvm::Type::getIntNTy(
getLLVMContext(), 128);
5998 return Builder.CreateBitCast(Ops[0], Int128Ty);
6000 case NEON::BI__builtin_neon_vldrq_p128: {
6001 llvm::Type *Int128Ty = llvm::Type::getIntNTy(
getLLVMContext(), 128);
6002 return Builder.CreateAlignedLoad(Int128Ty, Ops[0],
6005 case NEON::BI__builtin_neon_vstrq_p128: {
6006 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6008 case NEON::BI__builtin_neon_vcvts_f32_u32:
6009 case NEON::BI__builtin_neon_vcvtd_f64_u64:
6012 case NEON::BI__builtin_neon_vcvts_f32_s32:
6013 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
6014 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
6017 Ops[0] =
Builder.CreateBitCast(Ops[0], InTy);
6019 return Builder.CreateUIToFP(Ops[0], FTy);
6020 return Builder.CreateSIToFP(Ops[0], FTy);
6022 case NEON::BI__builtin_neon_vcvth_f16_u16:
6023 case NEON::BI__builtin_neon_vcvth_f16_u32:
6024 case NEON::BI__builtin_neon_vcvth_f16_u64:
6027 case NEON::BI__builtin_neon_vcvth_f16_s16:
6028 case NEON::BI__builtin_neon_vcvth_f16_s32:
6029 case NEON::BI__builtin_neon_vcvth_f16_s64: {
6030 llvm::Type *FTy =
HalfTy;
6032 if (Ops[0]->
getType()->getPrimitiveSizeInBits() == 64)
6034 else if (Ops[0]->
getType()->getPrimitiveSizeInBits() == 32)
6038 Ops[0] =
Builder.CreateBitCast(Ops[0], InTy);
6040 return Builder.CreateUIToFP(Ops[0], FTy);
6041 return Builder.CreateSIToFP(Ops[0], FTy);
6043 case NEON::BI__builtin_neon_vcvtah_u16_f16:
6044 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
6045 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
6046 case NEON::BI__builtin_neon_vcvtph_u16_f16:
6047 case NEON::BI__builtin_neon_vcvth_u16_f16:
6048 case NEON::BI__builtin_neon_vcvtah_s16_f16:
6049 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
6050 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
6051 case NEON::BI__builtin_neon_vcvtph_s16_f16:
6052 case NEON::BI__builtin_neon_vcvth_s16_f16: {
6054 llvm::Type* FTy =
HalfTy;
6055 llvm::Type *Tys[2] = {InTy, FTy};
6056 switch (BuiltinID) {
6057 default: llvm_unreachable(
"missing builtin ID in switch!");
6058 case NEON::BI__builtin_neon_vcvtah_u16_f16:
6059 Int = Intrinsic::aarch64_neon_fcvtau;
break;
6060 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
6061 Int = Intrinsic::aarch64_neon_fcvtmu;
break;
6062 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
6063 Int = Intrinsic::aarch64_neon_fcvtnu;
break;
6064 case NEON::BI__builtin_neon_vcvtph_u16_f16:
6065 Int = Intrinsic::aarch64_neon_fcvtpu;
break;
6066 case NEON::BI__builtin_neon_vcvth_u16_f16:
6067 Int = Intrinsic::aarch64_neon_fcvtzu;
break;
6068 case NEON::BI__builtin_neon_vcvtah_s16_f16:
6069 Int = Intrinsic::aarch64_neon_fcvtas;
break;
6070 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
6071 Int = Intrinsic::aarch64_neon_fcvtms;
break;
6072 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
6073 Int = Intrinsic::aarch64_neon_fcvtns;
break;
6074 case NEON::BI__builtin_neon_vcvtph_s16_f16:
6075 Int = Intrinsic::aarch64_neon_fcvtps;
break;
6076 case NEON::BI__builtin_neon_vcvth_s16_f16:
6077 Int = Intrinsic::aarch64_neon_fcvtzs;
break;
6081 case NEON::BI__builtin_neon_vcaleh_f16:
6082 case NEON::BI__builtin_neon_vcalth_f16:
6083 case NEON::BI__builtin_neon_vcageh_f16:
6084 case NEON::BI__builtin_neon_vcagth_f16: {
6086 llvm::Type* FTy =
HalfTy;
6087 llvm::Type *Tys[2] = {InTy, FTy};
6088 switch (BuiltinID) {
6089 default: llvm_unreachable(
"missing builtin ID in switch!");
6090 case NEON::BI__builtin_neon_vcageh_f16:
6091 Int = Intrinsic::aarch64_neon_facge;
break;
6092 case NEON::BI__builtin_neon_vcagth_f16:
6093 Int = Intrinsic::aarch64_neon_facgt;
break;
6094 case NEON::BI__builtin_neon_vcaleh_f16:
6095 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]);
break;
6096 case NEON::BI__builtin_neon_vcalth_f16:
6097 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]);
break;
6102 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
6103 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
6105 llvm::Type* FTy =
HalfTy;
6106 llvm::Type *Tys[2] = {InTy, FTy};
6107 switch (BuiltinID) {
6108 default: llvm_unreachable(
"missing builtin ID in switch!");
6109 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
6110 Int = Intrinsic::aarch64_neon_vcvtfp2fxs;
break;
6111 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
6112 Int = Intrinsic::aarch64_neon_vcvtfp2fxu;
break;
6117 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
6118 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
6119 llvm::Type* FTy =
HalfTy;
6121 llvm::Type *Tys[2] = {FTy, InTy};
6122 switch (BuiltinID) {
6123 default: llvm_unreachable(
"missing builtin ID in switch!");
6124 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
6125 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
6126 Ops[0] =
Builder.CreateSExt(Ops[0], InTy,
"sext");
6128 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
6129 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
6130 Ops[0] =
Builder.CreateZExt(Ops[0], InTy);
6135 case NEON::BI__builtin_neon_vpaddd_s64: {
6138 auto *Ty = llvm::FixedVectorType::get(
Int64Ty, 2);
6140 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty,
"v2i64");
6141 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
6142 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
6143 Value *Op0 =
Builder.CreateExtractElement(Ops[0], Idx0,
"lane0");
6144 Value *Op1 =
Builder.CreateExtractElement(Ops[0], Idx1,
"lane1");
6146 return Builder.CreateAdd(Op0, Op1,
"vpaddd");
6148 case NEON::BI__builtin_neon_vpaddd_f64: {
6149 auto *Ty = llvm::FixedVectorType::get(
DoubleTy, 2);
6151 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty,
"v2f64");
6152 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
6153 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
6154 Value *Op0 =
Builder.CreateExtractElement(Ops[0], Idx0,
"lane0");
6155 Value *Op1 =
Builder.CreateExtractElement(Ops[0], Idx1,
"lane1");
6157 return Builder.CreateFAdd(Op0, Op1,
"vpaddd");
6159 case NEON::BI__builtin_neon_vpadds_f32: {
6160 auto *Ty = llvm::FixedVectorType::get(
FloatTy, 2);
6162 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty,
"v2f32");
6163 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
6164 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
6165 Value *Op0 =
Builder.CreateExtractElement(Ops[0], Idx0,
"lane0");
6166 Value *Op1 =
Builder.CreateExtractElement(Ops[0], Idx1,
"lane1");
6168 return Builder.CreateFAdd(Op0, Op1,
"vpaddd");
6170 case NEON::BI__builtin_neon_vceqzd_s64:
6173 ICmpInst::ICMP_EQ,
"vceqz");
6174 case NEON::BI__builtin_neon_vceqzd_f64:
6175 case NEON::BI__builtin_neon_vceqzs_f32:
6176 case NEON::BI__builtin_neon_vceqzh_f16:
6179 ICmpInst::FCMP_OEQ,
"vceqz");
6180 case NEON::BI__builtin_neon_vcgezd_s64:
6183 ICmpInst::ICMP_SGE,
"vcgez");
6184 case NEON::BI__builtin_neon_vcgezd_f64:
6185 case NEON::BI__builtin_neon_vcgezs_f32:
6186 case NEON::BI__builtin_neon_vcgezh_f16:
6189 ICmpInst::FCMP_OGE,
"vcgez");
6190 case NEON::BI__builtin_neon_vclezd_s64:
6193 ICmpInst::ICMP_SLE,
"vclez");
6194 case NEON::BI__builtin_neon_vclezd_f64:
6195 case NEON::BI__builtin_neon_vclezs_f32:
6196 case NEON::BI__builtin_neon_vclezh_f16:
6199 ICmpInst::FCMP_OLE,
"vclez");
6200 case NEON::BI__builtin_neon_vcgtzd_s64:
6203 ICmpInst::ICMP_SGT,
"vcgtz");
6204 case NEON::BI__builtin_neon_vcgtzd_f64:
6205 case NEON::BI__builtin_neon_vcgtzs_f32:
6206 case NEON::BI__builtin_neon_vcgtzh_f16:
6209 ICmpInst::FCMP_OGT,
"vcgtz");
6210 case NEON::BI__builtin_neon_vcltzd_s64:
6213 ICmpInst::ICMP_SLT,
"vcltz");
6215 case NEON::BI__builtin_neon_vcltzd_f64:
6216 case NEON::BI__builtin_neon_vcltzs_f32:
6217 case NEON::BI__builtin_neon_vcltzh_f16:
6220 ICmpInst::FCMP_OLT,
"vcltz");
6222 case NEON::BI__builtin_neon_vceqzd_u64: {
6225 ICmpInst::ICMP_EQ,
"vceqzd");
6227 case NEON::BI__builtin_neon_vceqd_f64:
6228 case NEON::BI__builtin_neon_vcled_f64:
6229 case NEON::BI__builtin_neon_vcltd_f64:
6230 case NEON::BI__builtin_neon_vcged_f64:
6231 case NEON::BI__builtin_neon_vcgtd_f64: {
6232 llvm::CmpInst::Predicate P;
6233 switch (BuiltinID) {
6234 default: llvm_unreachable(
"missing builtin ID in switch!");
6235 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ;
break;
6236 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE;
break;
6237 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT;
break;
6238 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE;
break;
6239 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT;
break;
6243 if (P == llvm::FCmpInst::FCMP_OEQ)
6244 Ops[0] =
Builder.CreateFCmp(P, Ops[0], Ops[1]);
6246 Ops[0] =
Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6249 case NEON::BI__builtin_neon_vceqs_f32:
6250 case NEON::BI__builtin_neon_vcles_f32:
6251 case NEON::BI__builtin_neon_vclts_f32:
6252 case NEON::BI__builtin_neon_vcges_f32:
6253 case NEON::BI__builtin_neon_vcgts_f32: {
6254 llvm::CmpInst::Predicate P;
6255 switch (BuiltinID) {
6256 default: llvm_unreachable(
"missing builtin ID in switch!");
6257 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ;
break;
6258 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE;
break;
6259 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT;
break;
6260 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE;
break;
6261 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT;
break;
6265 if (P == llvm::FCmpInst::FCMP_OEQ)
6266 Ops[0] =
Builder.CreateFCmp(P, Ops[0], Ops[1]);
6268 Ops[0] =
Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6271 case NEON::BI__builtin_neon_vceqh_f16:
6272 case NEON::BI__builtin_neon_vcleh_f16:
6273 case NEON::BI__builtin_neon_vclth_f16:
6274 case NEON::BI__builtin_neon_vcgeh_f16:
6275 case NEON::BI__builtin_neon_vcgth_f16: {
6276 llvm::CmpInst::Predicate P;
6277 switch (BuiltinID) {
6278 default: llvm_unreachable(
"missing builtin ID in switch!");
6279 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ;
break;
6280 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE;
break;
6281 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT;
break;
6282 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE;
break;
6283 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT;
break;
6287 if (P == llvm::FCmpInst::FCMP_OEQ)
6288 Ops[0] =
Builder.CreateFCmp(P, Ops[0], Ops[1]);
6290 Ops[0] =
Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6293 case NEON::BI__builtin_neon_vceqd_s64:
6294 case NEON::BI__builtin_neon_vceqd_u64:
6295 case NEON::BI__builtin_neon_vcgtd_s64:
6296 case NEON::BI__builtin_neon_vcgtd_u64:
6297 case NEON::BI__builtin_neon_vcltd_s64:
6298 case NEON::BI__builtin_neon_vcltd_u64:
6299 case NEON::BI__builtin_neon_vcged_u64:
6300 case NEON::BI__builtin_neon_vcged_s64:
6301 case NEON::BI__builtin_neon_vcled_u64:
6302 case NEON::BI__builtin_neon_vcled_s64: {
6303 llvm::CmpInst::Predicate P;
6304 switch (BuiltinID) {
6305 default: llvm_unreachable(
"missing builtin ID in switch!");
6306 case NEON::BI__builtin_neon_vceqd_s64:
6307 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;
break;
6308 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;
break;
6309 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;
break;
6310 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;
break;
6311 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;
break;
6312 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;
break;
6313 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;
break;
6314 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;
break;
6315 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;
break;
6319 Ops[0] =
Builder.CreateICmp(P, Ops[0], Ops[1]);
6322 case NEON::BI__builtin_neon_vnegd_s64:
6323 return Builder.CreateNeg(Ops[0],
"vnegd");
6324 case NEON::BI__builtin_neon_vnegh_f16:
6325 return Builder.CreateFNeg(Ops[0],
"vnegh");
6326 case NEON::BI__builtin_neon_vtstd_s64:
6327 case NEON::BI__builtin_neon_vtstd_u64: {
6330 Ops[0] =
Builder.CreateAnd(Ops[0], Ops[1]);
6331 Ops[0] =
Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
6332 llvm::Constant::getNullValue(
Int64Ty));
6335 case NEON::BI__builtin_neon_vset_lane_i8:
6336 case NEON::BI__builtin_neon_vset_lane_i16:
6337 case NEON::BI__builtin_neon_vset_lane_i32:
6338 case NEON::BI__builtin_neon_vset_lane_i64:
6339 case NEON::BI__builtin_neon_vset_lane_bf16:
6340 case NEON::BI__builtin_neon_vset_lane_f32:
6341 case NEON::BI__builtin_neon_vsetq_lane_i8:
6342 case NEON::BI__builtin_neon_vsetq_lane_i16:
6343 case NEON::BI__builtin_neon_vsetq_lane_i32:
6344 case NEON::BI__builtin_neon_vsetq_lane_i64:
6345 case NEON::BI__builtin_neon_vsetq_lane_bf16:
6346 case NEON::BI__builtin_neon_vsetq_lane_f32:
6347 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6348 case NEON::BI__builtin_neon_vset_lane_f64:
6351 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(
DoubleTy, 1));
6352 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6353 case NEON::BI__builtin_neon_vset_lane_mf8:
6354 case NEON::BI__builtin_neon_vsetq_lane_mf8:
6358 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6359 case NEON::BI__builtin_neon_vsetq_lane_f64:
6362 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(
DoubleTy, 2));
6363 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6365 case NEON::BI__builtin_neon_vget_lane_i8:
6366 case NEON::BI__builtin_neon_vdupb_lane_i8:
6368 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int8Ty, 8));
6369 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
6370 case NEON::BI__builtin_neon_vgetq_lane_i8:
6371 case NEON::BI__builtin_neon_vdupb_laneq_i8:
6373 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int8Ty, 16));
6374 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
6375 case NEON::BI__builtin_neon_vget_lane_mf8:
6376 case NEON::BI__builtin_neon_vdupb_lane_mf8:
6377 case NEON::BI__builtin_neon_vgetq_lane_mf8:
6378 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
6379 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
6380 case NEON::BI__builtin_neon_vget_lane_i16:
6381 case NEON::BI__builtin_neon_vduph_lane_i16:
6383 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int16Ty, 4));
6384 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
6385 case NEON::BI__builtin_neon_vgetq_lane_i16:
6386 case NEON::BI__builtin_neon_vduph_laneq_i16:
6388 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int16Ty, 8));
6389 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
6390 case NEON::BI__builtin_neon_vget_lane_i32:
6391 case NEON::BI__builtin_neon_vdups_lane_i32:
6393 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int32Ty, 2));
6394 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
6395 case NEON::BI__builtin_neon_vdups_lane_f32:
6397 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 2));
6398 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vdups_lane");
6399 case NEON::BI__builtin_neon_vgetq_lane_i32:
6400 case NEON::BI__builtin_neon_vdups_laneq_i32:
6402 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int32Ty, 4));
6403 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
6404 case NEON::BI__builtin_neon_vget_lane_i64:
6405 case NEON::BI__builtin_neon_vdupd_lane_i64:
6407 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int64Ty, 1));
6408 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
6409 case NEON::BI__builtin_neon_vdupd_lane_f64:
6411 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 1));
6412 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vdupd_lane");
6413 case NEON::BI__builtin_neon_vgetq_lane_i64:
6414 case NEON::BI__builtin_neon_vdupd_laneq_i64:
6416 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int64Ty, 2));
6417 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
6418 case NEON::BI__builtin_neon_vget_lane_f32:
6420 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 2));
6421 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
6422 case NEON::BI__builtin_neon_vget_lane_f64:
6424 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 1));
6425 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
6426 case NEON::BI__builtin_neon_vgetq_lane_f32:
6427 case NEON::BI__builtin_neon_vdups_laneq_f32:
6429 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 4));
6430 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
6431 case NEON::BI__builtin_neon_vgetq_lane_f64:
6432 case NEON::BI__builtin_neon_vdupd_laneq_f64:
6434 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 2));
6435 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
6436 case NEON::BI__builtin_neon_vaddh_f16:
6437 return Builder.CreateFAdd(Ops[0], Ops[1],
"vaddh");
6438 case NEON::BI__builtin_neon_vsubh_f16:
6439 return Builder.CreateFSub(Ops[0], Ops[1],
"vsubh");
6440 case NEON::BI__builtin_neon_vmulh_f16:
6441 return Builder.CreateFMul(Ops[0], Ops[1],
"vmulh");
6442 case NEON::BI__builtin_neon_vdivh_f16:
6443 return Builder.CreateFDiv(Ops[0], Ops[1],
"vdivh");
6444 case NEON::BI__builtin_neon_vfmah_f16:
6447 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
HalfTy,
6448 {Ops[1], Ops[2], Ops[0]});
6449 case NEON::BI__builtin_neon_vfmsh_f16: {
6454 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
HalfTy,
6455 {Neg, Ops[2], Ops[0]});
6457 case NEON::BI__builtin_neon_vaddd_s64:
6458 case NEON::BI__builtin_neon_vaddd_u64:
6459 return Builder.CreateAdd(Ops[0], Ops[1],
"vaddd");
6460 case NEON::BI__builtin_neon_vsubd_s64:
6461 case NEON::BI__builtin_neon_vsubd_u64:
6462 return Builder.CreateSub(Ops[0], Ops[1],
"vsubd");
6463 case NEON::BI__builtin_neon_vqdmlalh_s16:
6464 case NEON::BI__builtin_neon_vqdmlslh_s16: {
6468 auto *VTy = llvm::FixedVectorType::get(
Int32Ty, 4);
6469 Ops[1] =
EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6470 ProductOps,
"vqdmlXl");
6471 Constant *CI = ConstantInt::get(
SizeTy, 0);
6472 Ops[1] =
Builder.CreateExtractElement(Ops[1], CI,
"lane0");
6474 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6475 ? Intrinsic::aarch64_neon_sqadd
6476 : Intrinsic::aarch64_neon_sqsub;
6481 case NEON::BI__builtin_neon_vqshlud_n_s64: {
6486 case NEON::BI__builtin_neon_vqshld_n_u64:
6487 case NEON::BI__builtin_neon_vqshld_n_s64: {
6488 Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6489 ? Intrinsic::aarch64_neon_uqshl
6490 : Intrinsic::aarch64_neon_sqshl;
6494 case NEON::BI__builtin_neon_vrshrd_n_u64:
6495 case NEON::BI__builtin_neon_vrshrd_n_s64: {
6496 Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6497 ? Intrinsic::aarch64_neon_urshl
6498 : Intrinsic::aarch64_neon_srshl;
6500 Ops[1] = ConstantInt::get(
Int64Ty, -SV);
6503 case NEON::BI__builtin_neon_vrsrad_n_u64:
6504 case NEON::BI__builtin_neon_vrsrad_n_s64: {
6505 Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6506 ? Intrinsic::aarch64_neon_urshl
6507 : Intrinsic::aarch64_neon_srshl;
6509 Ops[2] =
Builder.CreateNeg(Ops[2]);
6511 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
6514 case NEON::BI__builtin_neon_vshld_n_s64:
6515 case NEON::BI__builtin_neon_vshld_n_u64: {
6518 Ops[0], ConstantInt::get(
Int64Ty, Amt->getZExtValue()),
"shld_n");
6520 case NEON::BI__builtin_neon_vshrd_n_s64: {
6523 Ops[0], ConstantInt::get(
Int64Ty, std::min(
static_cast<uint64_t
>(63),
6524 Amt->getZExtValue())),
6527 case NEON::BI__builtin_neon_vshrd_n_u64: {
6529 uint64_t ShiftAmt = Amt->getZExtValue();
6532 return ConstantInt::get(
Int64Ty, 0);
6533 return Builder.CreateLShr(Ops[0], ConstantInt::get(
Int64Ty, ShiftAmt),
6536 case NEON::BI__builtin_neon_vsrad_n_s64: {
6539 Ops[1], ConstantInt::get(
Int64Ty, std::min(
static_cast<uint64_t
>(63),
6540 Amt->getZExtValue())),
6542 return Builder.CreateAdd(Ops[0], Ops[1]);
6544 case NEON::BI__builtin_neon_vsrad_n_u64: {
6546 uint64_t ShiftAmt = Amt->getZExtValue();
6551 Ops[1] =
Builder.CreateLShr(Ops[1], ConstantInt::get(
Int64Ty, ShiftAmt),
6553 return Builder.CreateAdd(Ops[0], Ops[1]);
6555 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6556 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6557 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6558 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6559 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"lane");
6563 auto *VTy = llvm::FixedVectorType::get(
Int32Ty, 4);
6564 Ops[1] =
EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6565 ProductOps,
"vqdmlXl");
6566 Constant *CI = ConstantInt::get(
SizeTy, 0);
6567 Ops[1] =
Builder.CreateExtractElement(Ops[1], CI,
"lane0");
6572 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6573 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6574 ? Intrinsic::aarch64_neon_sqadd
6575 : Intrinsic::aarch64_neon_sqsub;
6578 case NEON::BI__builtin_neon_vqdmlals_s32:
6579 case NEON::BI__builtin_neon_vqdmlsls_s32: {
6581 ProductOps.push_back(Ops[1]);
6582 ProductOps.push_back(Ops[2]);
6584 EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6585 ProductOps,
"vqdmlXl");
6587 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6588 ? Intrinsic::aarch64_neon_sqadd
6589 : Intrinsic::aarch64_neon_sqsub;
6594 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6595 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6596 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6597 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6598 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"lane");
6600 ProductOps.push_back(Ops[1]);
6601 ProductOps.push_back(Ops[2]);
6603 EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6604 ProductOps,
"vqdmlXl");
6609 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6610 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6611 ? Intrinsic::aarch64_neon_sqadd
6612 : Intrinsic::aarch64_neon_sqsub;
6615 case NEON::BI__builtin_neon_vget_lane_bf16:
6616 case NEON::BI__builtin_neon_vduph_lane_bf16:
6617 case NEON::BI__builtin_neon_vduph_lane_f16: {
6618 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
6620 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6621 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6622 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6623 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vgetq_lane");
6625 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6626 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6627 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
6628 return Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6630 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6632 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6633 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6634 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
6635 llvm::Value *Trunc =
6636 Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6637 return Builder.CreateShuffleVector(
6638 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
6640 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6642 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6644 std::iota(LoMask.begin(), LoMask.end(), 0);
6645 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6646 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
6647 llvm::Type *V8BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 8);
6648 llvm::Value *Inactive =
Builder.CreateShuffleVector(
6649 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
6650 llvm::Value *Trunc =
6651 Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
6652 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
6655 case clang::AArch64::BI_InterlockedAdd:
6656 case clang::AArch64::BI_InterlockedAdd_acq:
6657 case clang::AArch64::BI_InterlockedAdd_rel:
6658 case clang::AArch64::BI_InterlockedAdd_nf:
6659 case clang::AArch64::BI_InterlockedAdd64:
6660 case clang::AArch64::BI_InterlockedAdd64_acq:
6661 case clang::AArch64::BI_InterlockedAdd64_rel:
6662 case clang::AArch64::BI_InterlockedAdd64_nf: {
6664 Value *Val = Ops[1];
6665 llvm::AtomicOrdering Ordering;
6666 switch (BuiltinID) {
6667 case clang::AArch64::BI_InterlockedAdd:
6668 case clang::AArch64::BI_InterlockedAdd64:
6669 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6671 case clang::AArch64::BI_InterlockedAdd_acq:
6672 case clang::AArch64::BI_InterlockedAdd64_acq:
6673 Ordering = llvm::AtomicOrdering::Acquire;
6675 case clang::AArch64::BI_InterlockedAdd_rel:
6676 case clang::AArch64::BI_InterlockedAdd64_rel:
6677 Ordering = llvm::AtomicOrdering::Release;
6679 case clang::AArch64::BI_InterlockedAdd_nf:
6680 case clang::AArch64::BI_InterlockedAdd64_nf:
6681 Ordering = llvm::AtomicOrdering::Monotonic;
6684 llvm_unreachable(
"missing builtin ID in switch!");
6686 AtomicRMWInst *RMWI =
6687 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, Ordering);
6688 return Builder.CreateAdd(RMWI, Val);
6693 llvm::Type *Ty = VTy;
6697 bool ExtractLow =
false;
6698 bool ExtendLaneArg =
false;
6699 switch (BuiltinID) {
6700 default:
return nullptr;
6701 case NEON::BI__builtin_neon_vbsl_v:
6702 case NEON::BI__builtin_neon_vbslq_v: {
6703 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6704 Ops[0] =
Builder.CreateBitCast(Ops[0], BitTy,
"vbsl");
6705 Ops[1] =
Builder.CreateBitCast(Ops[1], BitTy,
"vbsl");
6706 Ops[2] =
Builder.CreateBitCast(Ops[2], BitTy,
"vbsl");
6708 Ops[1] =
Builder.CreateAnd(Ops[0], Ops[1],
"vbsl");
6709 Ops[2] =
Builder.CreateAnd(
Builder.CreateNot(Ops[0]), Ops[2],
"vbsl");
6710 Ops[0] =
Builder.CreateOr(Ops[1], Ops[2],
"vbsl");
6711 return Builder.CreateBitCast(Ops[0], Ty);
6713 case NEON::BI__builtin_neon_vfma_lane_v:
6714 case NEON::BI__builtin_neon_vfmaq_lane_v: {
6717 Value *Addend = Ops[0];
6718 Value *Multiplicand = Ops[1];
6719 Value *LaneSource = Ops[2];
6720 Ops[0] = Multiplicand;
6721 Ops[1] = LaneSource;
6725 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6726 ? llvm::FixedVectorType::get(VTy->getElementType(),
6727 VTy->getNumElements() / 2)
6730 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6731 Ops[1] =
Builder.CreateBitCast(Ops[1], SourceTy);
6732 Ops[1] =
Builder.CreateShuffleVector(Ops[1], Ops[1], SV,
"lane");
6735 Int =
Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6739 case NEON::BI__builtin_neon_vfma_laneq_v: {
6742 if (VTy && VTy->getElementType() ==
DoubleTy) {
6745 llvm::FixedVectorType *VTy =
6747 Ops[2] =
Builder.CreateBitCast(Ops[2], VTy);
6748 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"extract");
6751 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6752 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6755 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6756 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6758 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6759 VTy->getNumElements() * 2);
6760 Ops[2] =
Builder.CreateBitCast(Ops[2], STy);
6761 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6763 Ops[2] =
Builder.CreateShuffleVector(Ops[2], Ops[2], SV,
"lane");
6766 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6767 {Ops[2], Ops[1], Ops[0]});
6769 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6770 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6771 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6773 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6776 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6777 {Ops[2], Ops[1], Ops[0]});
6779 case NEON::BI__builtin_neon_vfmah_lane_f16:
6780 case NEON::BI__builtin_neon_vfmas_lane_f32:
6781 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6782 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6783 case NEON::BI__builtin_neon_vfmad_lane_f64:
6784 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6786 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"extract");
6788 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6789 {Ops[1], Ops[2], Ops[0]});
6791 case NEON::BI__builtin_neon_vmull_v:
6793 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6794 if (
Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6796 case NEON::BI__builtin_neon_vmax_v:
6797 case NEON::BI__builtin_neon_vmaxq_v:
6799 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6800 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6802 case NEON::BI__builtin_neon_vmaxh_f16: {
6803 Int = Intrinsic::aarch64_neon_fmax;
6806 case NEON::BI__builtin_neon_vmin_v:
6807 case NEON::BI__builtin_neon_vminq_v:
6809 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6810 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6812 case NEON::BI__builtin_neon_vminh_f16: {
6813 Int = Intrinsic::aarch64_neon_fmin;
6816 case NEON::BI__builtin_neon_vabd_v:
6817 case NEON::BI__builtin_neon_vabdq_v:
6819 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6820 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6822 case NEON::BI__builtin_neon_vpadal_v:
6823 case NEON::BI__builtin_neon_vpadalq_v: {
6824 unsigned ArgElts = VTy->getNumElements();
6826 unsigned BitWidth = EltTy->getBitWidth();
6827 auto *ArgTy = llvm::FixedVectorType::get(
6828 llvm::IntegerType::get(
getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6829 llvm::Type* Tys[2] = { VTy, ArgTy };
6830 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6832 TmpOps.push_back(Ops[1]);
6835 llvm::Value *addend =
Builder.CreateBitCast(Ops[0], tmp->getType());
6836 return Builder.CreateAdd(tmp, addend);
6838 case NEON::BI__builtin_neon_vpmin_v:
6839 case NEON::BI__builtin_neon_vpminq_v:
6841 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6842 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6844 case NEON::BI__builtin_neon_vpmax_v:
6845 case NEON::BI__builtin_neon_vpmaxq_v:
6847 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6848 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6850 case NEON::BI__builtin_neon_vminnm_v:
6851 case NEON::BI__builtin_neon_vminnmq_v:
6852 Int = Intrinsic::aarch64_neon_fminnm;
6854 case NEON::BI__builtin_neon_vminnmh_f16:
6855 Int = Intrinsic::aarch64_neon_fminnm;
6857 case NEON::BI__builtin_neon_vmaxnm_v:
6858 case NEON::BI__builtin_neon_vmaxnmq_v:
6859 Int = Intrinsic::aarch64_neon_fmaxnm;
6861 case NEON::BI__builtin_neon_vmaxnmh_f16:
6862 Int = Intrinsic::aarch64_neon_fmaxnm;
6864 case NEON::BI__builtin_neon_vrecpss_f32: {
6868 case NEON::BI__builtin_neon_vrecpsd_f64:
6871 case NEON::BI__builtin_neon_vrecpsh_f16:
6874 case NEON::BI__builtin_neon_vqshrun_n_v:
6875 Int = Intrinsic::aarch64_neon_sqshrun;
6877 case NEON::BI__builtin_neon_vqrshrun_n_v:
6878 Int = Intrinsic::aarch64_neon_sqrshrun;
6880 case NEON::BI__builtin_neon_vqshrn_n_v:
6881 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6883 case NEON::BI__builtin_neon_vrshrn_n_v:
6884 Int = Intrinsic::aarch64_neon_rshrn;
6886 case NEON::BI__builtin_neon_vqrshrn_n_v:
6887 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6889 case NEON::BI__builtin_neon_vrndah_f16: {
6890 Int =
Builder.getIsFPConstrained()
6891 ? Intrinsic::experimental_constrained_round
6895 case NEON::BI__builtin_neon_vrnda_v:
6896 case NEON::BI__builtin_neon_vrndaq_v: {
6897 Int =
Builder.getIsFPConstrained()
6898 ? Intrinsic::experimental_constrained_round
6902 case NEON::BI__builtin_neon_vrndih_f16: {
6903 Int =
Builder.getIsFPConstrained()
6904 ? Intrinsic::experimental_constrained_nearbyint
6905 : Intrinsic::nearbyint;
6908 case NEON::BI__builtin_neon_vrndmh_f16: {
6909 Int =
Builder.getIsFPConstrained()
6910 ? Intrinsic::experimental_constrained_floor
6914 case NEON::BI__builtin_neon_vrndm_v:
6915 case NEON::BI__builtin_neon_vrndmq_v: {
6916 Int =
Builder.getIsFPConstrained()
6917 ? Intrinsic::experimental_constrained_floor
6921 case NEON::BI__builtin_neon_vrndnh_f16: {
6922 Int =
Builder.getIsFPConstrained()
6923 ? Intrinsic::experimental_constrained_roundeven
6924 : Intrinsic::roundeven;
6927 case NEON::BI__builtin_neon_vrndn_v:
6928 case NEON::BI__builtin_neon_vrndnq_v: {
6929 Int =
Builder.getIsFPConstrained()
6930 ? Intrinsic::experimental_constrained_roundeven
6931 : Intrinsic::roundeven;
6934 case NEON::BI__builtin_neon_vrndns_f32: {
6935 Int =
Builder.getIsFPConstrained()
6936 ? Intrinsic::experimental_constrained_roundeven
6937 : Intrinsic::roundeven;
6940 case NEON::BI__builtin_neon_vrndph_f16: {
6941 Int =
Builder.getIsFPConstrained()
6942 ? Intrinsic::experimental_constrained_ceil
6946 case NEON::BI__builtin_neon_vrndp_v:
6947 case NEON::BI__builtin_neon_vrndpq_v: {
6948 Int =
Builder.getIsFPConstrained()
6949 ? Intrinsic::experimental_constrained_ceil
6953 case NEON::BI__builtin_neon_vrndxh_f16: {
6954 Int =
Builder.getIsFPConstrained()
6955 ? Intrinsic::experimental_constrained_rint
6959 case NEON::BI__builtin_neon_vrndx_v:
6960 case NEON::BI__builtin_neon_vrndxq_v: {
6961 Int =
Builder.getIsFPConstrained()
6962 ? Intrinsic::experimental_constrained_rint
6966 case NEON::BI__builtin_neon_vrndh_f16: {
6967 Int =
Builder.getIsFPConstrained()
6968 ? Intrinsic::experimental_constrained_trunc
6972 case NEON::BI__builtin_neon_vrnd32x_f32:
6973 case NEON::BI__builtin_neon_vrnd32xq_f32:
6974 case NEON::BI__builtin_neon_vrnd32x_f64:
6975 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6976 Int = Intrinsic::aarch64_neon_frint32x;
6979 case NEON::BI__builtin_neon_vrnd32z_f32:
6980 case NEON::BI__builtin_neon_vrnd32zq_f32:
6981 case NEON::BI__builtin_neon_vrnd32z_f64:
6982 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6983 Int = Intrinsic::aarch64_neon_frint32z;
6986 case NEON::BI__builtin_neon_vrnd64x_f32:
6987 case NEON::BI__builtin_neon_vrnd64xq_f32:
6988 case NEON::BI__builtin_neon_vrnd64x_f64:
6989 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6990 Int = Intrinsic::aarch64_neon_frint64x;
6993 case NEON::BI__builtin_neon_vrnd64z_f32:
6994 case NEON::BI__builtin_neon_vrnd64zq_f32:
6995 case NEON::BI__builtin_neon_vrnd64z_f64:
6996 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6997 Int = Intrinsic::aarch64_neon_frint64z;
7000 case NEON::BI__builtin_neon_vrnd_v:
7001 case NEON::BI__builtin_neon_vrndq_v: {
7002 Int =
Builder.getIsFPConstrained()
7003 ? Intrinsic::experimental_constrained_trunc
7007 case NEON::BI__builtin_neon_vcvt_f64_v:
7008 case NEON::BI__builtin_neon_vcvtq_f64_v:
7009 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
7011 return usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
7012 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
7013 case NEON::BI__builtin_neon_vcvt_f64_f32: {
7015 "unexpected vcvt_f64_f32 builtin");
7019 return Builder.CreateFPExt(Ops[0], Ty,
"vcvt");
7021 case NEON::BI__builtin_neon_vcvt_f32_f64: {
7023 "unexpected vcvt_f32_f64 builtin");
7027 return Builder.CreateFPTrunc(Ops[0], Ty,
"vcvt");
7029 case NEON::BI__builtin_neon_vcvt_s32_v:
7030 case NEON::BI__builtin_neon_vcvt_u32_v:
7031 case NEON::BI__builtin_neon_vcvt_s64_v:
7032 case NEON::BI__builtin_neon_vcvt_u64_v:
7033 case NEON::BI__builtin_neon_vcvt_s16_f16:
7034 case NEON::BI__builtin_neon_vcvt_u16_f16:
7035 case NEON::BI__builtin_neon_vcvtq_s32_v:
7036 case NEON::BI__builtin_neon_vcvtq_u32_v:
7037 case NEON::BI__builtin_neon_vcvtq_s64_v:
7038 case NEON::BI__builtin_neon_vcvtq_u64_v:
7039 case NEON::BI__builtin_neon_vcvtq_s16_f16:
7040 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
7042 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
7046 case NEON::BI__builtin_neon_vcvta_s16_f16:
7047 case NEON::BI__builtin_neon_vcvta_u16_f16:
7048 case NEON::BI__builtin_neon_vcvta_s32_v:
7049 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
7050 case NEON::BI__builtin_neon_vcvtaq_s32_v:
7051 case NEON::BI__builtin_neon_vcvta_u32_v:
7052 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
7053 case NEON::BI__builtin_neon_vcvtaq_u32_v:
7054 case NEON::BI__builtin_neon_vcvta_s64_v:
7055 case NEON::BI__builtin_neon_vcvtaq_s64_v:
7056 case NEON::BI__builtin_neon_vcvta_u64_v:
7057 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
7058 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
7062 case NEON::BI__builtin_neon_vcvtm_s16_f16:
7063 case NEON::BI__builtin_neon_vcvtm_s32_v:
7064 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7065 case NEON::BI__builtin_neon_vcvtmq_s32_v:
7066 case NEON::BI__builtin_neon_vcvtm_u16_f16:
7067 case NEON::BI__builtin_neon_vcvtm_u32_v:
7068 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7069 case NEON::BI__builtin_neon_vcvtmq_u32_v:
7070 case NEON::BI__builtin_neon_vcvtm_s64_v:
7071 case NEON::BI__builtin_neon_vcvtmq_s64_v:
7072 case NEON::BI__builtin_neon_vcvtm_u64_v:
7073 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7074 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
7078 case NEON::BI__builtin_neon_vcvtn_s16_f16:
7079 case NEON::BI__builtin_neon_vcvtn_s32_v:
7080 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7081 case NEON::BI__builtin_neon_vcvtnq_s32_v:
7082 case NEON::BI__builtin_neon_vcvtn_u16_f16:
7083 case NEON::BI__builtin_neon_vcvtn_u32_v:
7084 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7085 case NEON::BI__builtin_neon_vcvtnq_u32_v:
7086 case NEON::BI__builtin_neon_vcvtn_s64_v:
7087 case NEON::BI__builtin_neon_vcvtnq_s64_v:
7088 case NEON::BI__builtin_neon_vcvtn_u64_v:
7089 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
7090 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
7094 case NEON::BI__builtin_neon_vcvtp_s16_f16:
7095 case NEON::BI__builtin_neon_vcvtp_s32_v:
7096 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7097 case NEON::BI__builtin_neon_vcvtpq_s32_v:
7098 case NEON::BI__builtin_neon_vcvtp_u16_f16:
7099 case NEON::BI__builtin_neon_vcvtp_u32_v:
7100 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7101 case NEON::BI__builtin_neon_vcvtpq_u32_v:
7102 case NEON::BI__builtin_neon_vcvtp_s64_v:
7103 case NEON::BI__builtin_neon_vcvtpq_s64_v:
7104 case NEON::BI__builtin_neon_vcvtp_u64_v:
7105 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
7106 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
7110 case NEON::BI__builtin_neon_vmulx_v:
7111 case NEON::BI__builtin_neon_vmulxq_v: {
7112 Int = Intrinsic::aarch64_neon_fmulx;
7115 case NEON::BI__builtin_neon_vmulxh_lane_f16:
7116 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
7119 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2],
"extract");
7121 Int = Intrinsic::aarch64_neon_fmulx;
7124 case NEON::BI__builtin_neon_vmul_lane_v:
7125 case NEON::BI__builtin_neon_vmul_laneq_v: {
7128 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
7131 llvm::FixedVectorType *VTy =
7133 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
7134 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2],
"extract");
7138 case NEON::BI__builtin_neon_vpmaxnm_v:
7139 case NEON::BI__builtin_neon_vpmaxnmq_v: {
7140 Int = Intrinsic::aarch64_neon_fmaxnmp;
7143 case NEON::BI__builtin_neon_vpminnm_v:
7144 case NEON::BI__builtin_neon_vpminnmq_v: {
7145 Int = Intrinsic::aarch64_neon_fminnmp;
7148 case NEON::BI__builtin_neon_vsqrth_f16: {
7149 Int =
Builder.getIsFPConstrained()
7150 ? Intrinsic::experimental_constrained_sqrt
7154 case NEON::BI__builtin_neon_vsqrt_v:
7155 case NEON::BI__builtin_neon_vsqrtq_v: {
7156 Int =
Builder.getIsFPConstrained()
7157 ? Intrinsic::experimental_constrained_sqrt
7159 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
7162 case NEON::BI__builtin_neon_vrbit_v:
7163 case NEON::BI__builtin_neon_vrbitq_v: {
7164 Int = Intrinsic::bitreverse;
7167 case NEON::BI__builtin_neon_vmaxv_f16: {
7168 Int = Intrinsic::aarch64_neon_fmaxv;
7170 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7171 llvm::Type *Tys[2] = {Ty, VTy};
7175 case NEON::BI__builtin_neon_vmaxvq_f16: {
7176 Int = Intrinsic::aarch64_neon_fmaxv;
7178 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7179 llvm::Type *Tys[2] = {Ty, VTy};
7183 case NEON::BI__builtin_neon_vminv_f16: {
7184 Int = Intrinsic::aarch64_neon_fminv;
7186 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7187 llvm::Type *Tys[2] = {Ty, VTy};
7191 case NEON::BI__builtin_neon_vminvq_f16: {
7192 Int = Intrinsic::aarch64_neon_fminv;
7194 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7195 llvm::Type *Tys[2] = {Ty, VTy};
7199 case NEON::BI__builtin_neon_vmaxnmv_f16: {
7200 Int = Intrinsic::aarch64_neon_fmaxnmv;
7202 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7203 llvm::Type *Tys[2] = {Ty, VTy};
7207 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
7208 Int = Intrinsic::aarch64_neon_fmaxnmv;
7210 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7211 llvm::Type *Tys[2] = {Ty, VTy};
7215 case NEON::BI__builtin_neon_vminnmv_f16: {
7216 Int = Intrinsic::aarch64_neon_fminnmv;
7218 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7219 llvm::Type *Tys[2] = {Ty, VTy};
7223 case NEON::BI__builtin_neon_vminnmvq_f16: {
7224 Int = Intrinsic::aarch64_neon_fminnmv;
7226 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7227 llvm::Type *Tys[2] = {Ty, VTy};
7231 case NEON::BI__builtin_neon_vmul_n_f64: {
7234 return Builder.CreateFMul(Ops[0], RHS);
7236 case NEON::BI__builtin_neon_vaddlv_u8: {
7237 Int = Intrinsic::aarch64_neon_uaddlv;
7239 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7240 llvm::Type *Tys[2] = {Ty, VTy};
7244 case NEON::BI__builtin_neon_vaddlv_u16: {
7245 Int = Intrinsic::aarch64_neon_uaddlv;
7247 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7248 llvm::Type *Tys[2] = {Ty, VTy};
7251 case NEON::BI__builtin_neon_vaddlvq_u8: {
7252 Int = Intrinsic::aarch64_neon_uaddlv;
7254 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7255 llvm::Type *Tys[2] = {Ty, VTy};
7259 case NEON::BI__builtin_neon_vaddlvq_u16: {
7260 Int = Intrinsic::aarch64_neon_uaddlv;
7262 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7263 llvm::Type *Tys[2] = {Ty, VTy};
7266 case NEON::BI__builtin_neon_vaddlv_s8: {
7267 Int = Intrinsic::aarch64_neon_saddlv;
7269 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7270 llvm::Type *Tys[2] = {Ty, VTy};
7274 case NEON::BI__builtin_neon_vaddlv_s16: {
7275 Int = Intrinsic::aarch64_neon_saddlv;
7277 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7278 llvm::Type *Tys[2] = {Ty, VTy};
7281 case NEON::BI__builtin_neon_vaddlvq_s8: {
7282 Int = Intrinsic::aarch64_neon_saddlv;
7284 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7285 llvm::Type *Tys[2] = {Ty, VTy};
7289 case NEON::BI__builtin_neon_vaddlvq_s16: {
7290 Int = Intrinsic::aarch64_neon_saddlv;
7292 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7293 llvm::Type *Tys[2] = {Ty, VTy};
7296 case NEON::BI__builtin_neon_vsri_n_v:
7297 case NEON::BI__builtin_neon_vsriq_n_v: {
7298 Int = Intrinsic::aarch64_neon_vsri;
7299 llvm::Function *Intrin =
CGM.getIntrinsic(Int, Ty);
7302 case NEON::BI__builtin_neon_vsli_n_v:
7303 case NEON::BI__builtin_neon_vsliq_n_v: {
7304 Int = Intrinsic::aarch64_neon_vsli;
7305 llvm::Function *Intrin =
CGM.getIntrinsic(Int, Ty);
7308 case NEON::BI__builtin_neon_vsra_n_v:
7309 case NEON::BI__builtin_neon_vsraq_n_v:
7310 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
7312 return Builder.CreateAdd(Ops[0], Ops[1]);
7313 case NEON::BI__builtin_neon_vrsra_n_v:
7314 case NEON::BI__builtin_neon_vrsraq_n_v: {
7315 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
7317 TmpOps.push_back(Ops[1]);
7318 TmpOps.push_back(Ops[2]);
7320 llvm::Value *tmp =
EmitNeonCall(F, TmpOps,
"vrshr_n", 1,
true);
7321 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
7322 return Builder.CreateAdd(Ops[0], tmp);
7324 case NEON::BI__builtin_neon_vld1_v:
7325 case NEON::BI__builtin_neon_vld1q_v: {
7328 case NEON::BI__builtin_neon_vst1_v:
7329 case NEON::BI__builtin_neon_vst1q_v:
7330 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
7332 case NEON::BI__builtin_neon_vld1_lane_v:
7333 case NEON::BI__builtin_neon_vld1q_lane_v: {
7334 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7335 Ops[0] =
Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7337 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vld1_lane");
7339 case NEON::BI__builtin_neon_vldap1_lane_s64:
7340 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
7341 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7342 llvm::LoadInst *LI =
Builder.CreateAlignedLoad(
7344 LI->setAtomic(llvm::AtomicOrdering::Acquire);
7346 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vldap1_lane");
7348 case NEON::BI__builtin_neon_vld1_dup_v:
7349 case NEON::BI__builtin_neon_vld1q_dup_v: {
7350 Value *
V = PoisonValue::get(Ty);
7351 Ops[0] =
Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7353 llvm::Constant *CI = ConstantInt::get(
Int32Ty, 0);
7354 Ops[0] =
Builder.CreateInsertElement(
V, Ops[0], CI);
7357 case NEON::BI__builtin_neon_vst1_lane_v:
7358 case NEON::BI__builtin_neon_vst1q_lane_v:
7359 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7360 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
7362 case NEON::BI__builtin_neon_vstl1_lane_s64:
7363 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
7364 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7365 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
7366 llvm::StoreInst *SI =
7368 SI->setAtomic(llvm::AtomicOrdering::Release);
7371 case NEON::BI__builtin_neon_vld2_v:
7372 case NEON::BI__builtin_neon_vld2q_v: {
7374 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
7375 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld2");
7376 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7378 case NEON::BI__builtin_neon_vld3_v:
7379 case NEON::BI__builtin_neon_vld3q_v: {
7381 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
7382 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld3");
7383 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7385 case NEON::BI__builtin_neon_vld4_v:
7386 case NEON::BI__builtin_neon_vld4q_v: {
7388 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
7389 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld4");
7390 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7392 case NEON::BI__builtin_neon_vld2_dup_v:
7393 case NEON::BI__builtin_neon_vld2q_dup_v: {
7395 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
7396 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld2");
7397 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7399 case NEON::BI__builtin_neon_vld3_dup_v:
7400 case NEON::BI__builtin_neon_vld3q_dup_v: {
7402 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
7403 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld3");
7404 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7406 case NEON::BI__builtin_neon_vld4_dup_v:
7407 case NEON::BI__builtin_neon_vld4q_dup_v: {
7409 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
7410 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld4");
7411 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7413 case NEON::BI__builtin_neon_vld2_lane_v:
7414 case NEON::BI__builtin_neon_vld2q_lane_v: {
7415 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7416 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
7417 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7418 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7419 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7422 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7424 case NEON::BI__builtin_neon_vld3_lane_v:
7425 case NEON::BI__builtin_neon_vld3q_lane_v: {
7426 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7427 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
7428 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7429 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7430 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7431 Ops[3] =
Builder.CreateBitCast(Ops[3], Ty);
7434 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7436 case NEON::BI__builtin_neon_vld4_lane_v:
7437 case NEON::BI__builtin_neon_vld4q_lane_v: {
7438 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7439 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
7440 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7441 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7442 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7443 Ops[3] =
Builder.CreateBitCast(Ops[3], Ty);
7444 Ops[4] =
Builder.CreateBitCast(Ops[4], Ty);
7447 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7449 case NEON::BI__builtin_neon_vst2_v:
7450 case NEON::BI__builtin_neon_vst2q_v: {
7451 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7452 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7453 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
7456 case NEON::BI__builtin_neon_vst2_lane_v:
7457 case NEON::BI__builtin_neon_vst2q_lane_v: {
7458 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7460 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7461 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
7464 case NEON::BI__builtin_neon_vst3_v:
7465 case NEON::BI__builtin_neon_vst3q_v: {
7466 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7467 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7468 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
7471 case NEON::BI__builtin_neon_vst3_lane_v:
7472 case NEON::BI__builtin_neon_vst3q_lane_v: {
7473 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7475 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7476 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
7479 case NEON::BI__builtin_neon_vst4_v:
7480 case NEON::BI__builtin_neon_vst4q_v: {
7481 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7482 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7483 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
7486 case NEON::BI__builtin_neon_vst4_lane_v:
7487 case NEON::BI__builtin_neon_vst4q_lane_v: {
7488 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7490 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7491 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
7494 case NEON::BI__builtin_neon_vtrn_v:
7495 case NEON::BI__builtin_neon_vtrnq_v: {
7496 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7497 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7498 Value *SV =
nullptr;
7500 for (
unsigned vi = 0; vi != 2; ++vi) {
7502 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7503 Indices.push_back(i+vi);
7504 Indices.push_back(i+e+vi);
7507 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vtrn");
7512 case NEON::BI__builtin_neon_vuzp_v:
7513 case NEON::BI__builtin_neon_vuzpq_v: {
7514 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7515 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7516 Value *SV =
nullptr;
7518 for (
unsigned vi = 0; vi != 2; ++vi) {
7520 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7521 Indices.push_back(2*i+vi);
7524 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vuzp");
7529 case NEON::BI__builtin_neon_vzip_v:
7530 case NEON::BI__builtin_neon_vzipq_v: {
7531 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7532 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7533 Value *SV =
nullptr;
7535 for (
unsigned vi = 0; vi != 2; ++vi) {
7537 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7538 Indices.push_back((i + vi*e) >> 1);
7539 Indices.push_back(((i + vi*e) >> 1)+e);
7542 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vzip");
7547 case NEON::BI__builtin_neon_vqtbl1q_v: {
7548 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7551 case NEON::BI__builtin_neon_vqtbl2q_v: {
7552 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7555 case NEON::BI__builtin_neon_vqtbl3q_v: {
7556 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7559 case NEON::BI__builtin_neon_vqtbl4q_v: {
7560 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7563 case NEON::BI__builtin_neon_vqtbx1q_v: {
7564 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7567 case NEON::BI__builtin_neon_vqtbx2q_v: {
7568 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7571 case NEON::BI__builtin_neon_vqtbx3q_v: {
7572 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7575 case NEON::BI__builtin_neon_vqtbx4q_v: {
7576 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7579 case NEON::BI__builtin_neon_vsqadd_v:
7580 case NEON::BI__builtin_neon_vsqaddq_v: {
7581 Int = Intrinsic::aarch64_neon_usqadd;
7584 case NEON::BI__builtin_neon_vuqadd_v:
7585 case NEON::BI__builtin_neon_vuqaddq_v: {
7586 Int = Intrinsic::aarch64_neon_suqadd;
7590 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
7591 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
7592 case NEON::BI__builtin_neon_vluti2_laneq_f16:
7593 case NEON::BI__builtin_neon_vluti2_laneq_p16:
7594 case NEON::BI__builtin_neon_vluti2_laneq_p8:
7595 case NEON::BI__builtin_neon_vluti2_laneq_s16:
7596 case NEON::BI__builtin_neon_vluti2_laneq_s8:
7597 case NEON::BI__builtin_neon_vluti2_laneq_u16:
7598 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
7599 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7606 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
7607 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
7608 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
7609 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
7610 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
7611 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
7612 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
7613 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
7614 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
7615 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7622 case NEON::BI__builtin_neon_vluti2_lane_mf8:
7623 case NEON::BI__builtin_neon_vluti2_lane_bf16:
7624 case NEON::BI__builtin_neon_vluti2_lane_f16:
7625 case NEON::BI__builtin_neon_vluti2_lane_p16:
7626 case NEON::BI__builtin_neon_vluti2_lane_p8:
7627 case NEON::BI__builtin_neon_vluti2_lane_s16:
7628 case NEON::BI__builtin_neon_vluti2_lane_s8:
7629 case NEON::BI__builtin_neon_vluti2_lane_u16:
7630 case NEON::BI__builtin_neon_vluti2_lane_u8: {
7631 Int = Intrinsic::aarch64_neon_vluti2_lane;
7638 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
7639 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
7640 case NEON::BI__builtin_neon_vluti2q_lane_f16:
7641 case NEON::BI__builtin_neon_vluti2q_lane_p16:
7642 case NEON::BI__builtin_neon_vluti2q_lane_p8:
7643 case NEON::BI__builtin_neon_vluti2q_lane_s16:
7644 case NEON::BI__builtin_neon_vluti2q_lane_s8:
7645 case NEON::BI__builtin_neon_vluti2q_lane_u16:
7646 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
7647 Int = Intrinsic::aarch64_neon_vluti2_lane;
7654 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
7655 case NEON::BI__builtin_neon_vluti4q_lane_p8:
7656 case NEON::BI__builtin_neon_vluti4q_lane_s8:
7657 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
7658 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7661 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7662 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7663 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7664 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7665 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7668 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7669 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7670 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7671 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7672 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7673 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7674 return EmitNeonCall(
CGM.getIntrinsic(Int, Ty), Ops,
"vluti4q_lane_x2");
7676 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7677 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7678 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7679 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7680 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7681 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7682 return EmitNeonCall(
CGM.getIntrinsic(Int, Ty), Ops,
"vluti4q_laneq_x2");
7684 case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
7686 {llvm::FixedVectorType::get(
HalfTy, 8),
7687 llvm::FixedVectorType::get(
Int8Ty, 16)},
7689 case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
7691 {llvm::FixedVectorType::get(
FloatTy, 4),
7692 llvm::FixedVectorType::get(
Int8Ty, 16)},
7694 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7697 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7698 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7700 llvm::FixedVectorType::get(
BFloatTy, 8),
7701 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt1");
7702 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7705 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7706 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7708 llvm::FixedVectorType::get(
BFloatTy, 8),
7709 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt2");
7710 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7713 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7714 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7716 llvm::FixedVectorType::get(
HalfTy, 8),
7717 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt1");
7718 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7721 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7722 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7724 llvm::FixedVectorType::get(
HalfTy, 8),
7725 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt2");
7726 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7728 llvm::FixedVectorType::get(
Int8Ty, 8),
7729 Ops[0]->
getType(),
false, Ops, E,
"vfcvtn");
7730 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7732 llvm::FixedVectorType::get(
Int8Ty, 8),
7733 llvm::FixedVectorType::get(
HalfTy, 4),
false, Ops,
7735 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7737 llvm::FixedVectorType::get(
Int8Ty, 16),
7738 llvm::FixedVectorType::get(
HalfTy, 8),
false, Ops,
7740 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7741 llvm::Type *Ty = llvm::FixedVectorType::get(
Int8Ty, 16);
7742 Ops[0] =
Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7745 Ops[1]->
getType(),
false, Ops, E,
"vfcvtn2");
7748 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7749 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7752 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7753 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7754 ExtendLaneArg =
true;
7756 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7757 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7759 ExtendLaneArg,
HalfTy, Ops, E,
"fdot2_lane");
7760 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7761 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7764 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7765 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7766 ExtendLaneArg =
true;
7768 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7769 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7771 ExtendLaneArg,
FloatTy, Ops, E,
"fdot4_lane");
7773 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7775 {llvm::FixedVectorType::get(
HalfTy, 8)}, Ops, E,
7777 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7779 {llvm::FixedVectorType::get(
HalfTy, 8)}, Ops, E,
7781 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7783 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7785 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7787 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7789 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7791 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7793 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7795 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7797 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7798 ExtendLaneArg =
true;
7800 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7802 ExtendLaneArg,
HalfTy, Ops, E,
"vmlal_lane");
7803 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7804 ExtendLaneArg =
true;
7806 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7808 ExtendLaneArg,
HalfTy, Ops, E,
"vmlal_lane");
7809 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7810 ExtendLaneArg =
true;
7812 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7814 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7815 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7816 ExtendLaneArg =
true;
7818 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7820 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7821 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7822 ExtendLaneArg =
true;
7824 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7826 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7827 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7828 ExtendLaneArg =
true;
7830 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7832 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7833 case NEON::BI__builtin_neon_vamin_f16:
7834 case NEON::BI__builtin_neon_vaminq_f16:
7835 case NEON::BI__builtin_neon_vamin_f32:
7836 case NEON::BI__builtin_neon_vaminq_f32:
7837 case NEON::BI__builtin_neon_vaminq_f64: {
7838 Int = Intrinsic::aarch64_neon_famin;
7841 case NEON::BI__builtin_neon_vamax_f16:
7842 case NEON::BI__builtin_neon_vamaxq_f16:
7843 case NEON::BI__builtin_neon_vamax_f32:
7844 case NEON::BI__builtin_neon_vamaxq_f32:
7845 case NEON::BI__builtin_neon_vamaxq_f64: {
7846 Int = Intrinsic::aarch64_neon_famax;
7849 case NEON::BI__builtin_neon_vscale_f16:
7850 case NEON::BI__builtin_neon_vscaleq_f16:
7851 case NEON::BI__builtin_neon_vscale_f32:
7852 case NEON::BI__builtin_neon_vscaleq_f32:
7853 case NEON::BI__builtin_neon_vscaleq_f64: {
7854 Int = Intrinsic::aarch64_neon_fp8_fscale;