595 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
603 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
607 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
608 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
609 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
610 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
611 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
612 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
613 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
614 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
615 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
628 NEONMAP1(vcage_v, arm_neon_vacge, 0),
629 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
630 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
631 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
632 NEONMAP1(vcale_v, arm_neon_vacge, 0),
633 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
634 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
635 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
652 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
655 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
657 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
658 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
659 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
660 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
661 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
662 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
663 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
664 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
665 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
672 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
673 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
674 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
675 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
676 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
677 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
678 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
679 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
680 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
681 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
682 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
683 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
684 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
685 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
686 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
687 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
688 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
689 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
690 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
691 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
692 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
693 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
694 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
695 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
696 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
697 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
698 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
699 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
700 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
701 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
702 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
703 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
704 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
705 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
706 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
707 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
708 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
709 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
710 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
711 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
712 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
713 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
714 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
715 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
716 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
717 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
718 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
719 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
720 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
724 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
725 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
726 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
727 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
728 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
729 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
730 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
731 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
732 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
739 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
740 NEONMAP1(vdot_u32, arm_neon_udot, 0),
741 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
742 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
753 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
754 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
755 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
757 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
758 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
759 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
760 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
761 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
762 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
764 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
765 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
766 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
767 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
768 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
770 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
771 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
772 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
773 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
774 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
776 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
777 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
778 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
787 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
788 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
806 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
807 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
831 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
832 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
836 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
837 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
860 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
861 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
865 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
866 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
867 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
868 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
869 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
870 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
880 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
881 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
882 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
883 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
884 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
885 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
886 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
887 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
889 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
890 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
891 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
893 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
894 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
895 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
897 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
898 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
904 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
905 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
906 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
918 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
919 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
924 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
925 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
926 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
927 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
936 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
937 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
938 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
939 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
940 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
951 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
952 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
953 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
954 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
955 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
956 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
957 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
958 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
995 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
998 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
1000 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1001 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1002 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1003 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1004 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1005 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1006 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1007 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1008 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1009 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1015 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1016 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1017 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1018 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1019 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1020 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1021 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1022 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1023 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1024 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1026 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
1027 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
1028 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
1029 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
1042 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
1043 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
1044 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
1045 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
1046 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
1047 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
1048 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
1049 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
1054 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
1055 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
1056 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
1057 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
1058 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
1059 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
1060 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
1061 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
1074 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
1075 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
1076 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
1077 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1079 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
1080 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1095 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1096 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1098 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1099 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1107 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
1108 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
1112 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
1113 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1114 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1141 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1142 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1146 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
1147 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
1148 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
1149 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
1150 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
1151 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
1152 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
1153 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
1154 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
1155 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
1164 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
1165 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
1166 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
1167 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
1168 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
1169 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
1170 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
1171 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
1172 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
1173 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
1174 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
1175 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
1176 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
1177 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
1178 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
1182 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
1183 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
1184 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
1185 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
1270 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
1307 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
1335 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
1416 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
1417 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
1418 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
1419 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
1787 unsigned BuiltinID,
unsigned LLVMIntrinsic,
unsigned AltLLVMIntrinsic,
1788 const char *NameHint,
unsigned Modifier,
const CallExpr *E,
1790 llvm::Triple::ArchType
Arch) {
1793 std::optional<llvm::APSInt> NeonTypeConst =
1800 const bool Usgn =
Type.isUnsigned();
1801 const bool Quad =
Type.isQuad();
1802 const bool Floating =
Type.isFloatingPoint();
1804 const bool AllowBFloatArgsAndRet =
1807 llvm::FixedVectorType *VTy =
1808 GetNeonType(
this,
Type, HasFastHalfType,
false, AllowBFloatArgsAndRet);
1809 llvm::Type *Ty = VTy;
1813 auto getAlignmentValue32 = [&](
Address addr) ->
Value* {
1814 return Builder.getInt32(addr.getAlignment().getQuantity());
1817 unsigned Int = LLVMIntrinsic;
1819 Int = AltLLVMIntrinsic;
1821 switch (BuiltinID) {
1823 case NEON::BI__builtin_neon_splat_lane_v:
1824 case NEON::BI__builtin_neon_splat_laneq_v:
1825 case NEON::BI__builtin_neon_splatq_lane_v:
1826 case NEON::BI__builtin_neon_splatq_laneq_v: {
1827 auto NumElements = VTy->getElementCount();
1828 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1829 NumElements = NumElements * 2;
1830 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1831 NumElements = NumElements.divideCoefficientBy(2);
1833 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
1836 case NEON::BI__builtin_neon_vpadd_v:
1837 case NEON::BI__builtin_neon_vpaddq_v:
1839 if (VTy->getElementType()->isFloatingPointTy() &&
1840 Int == Intrinsic::aarch64_neon_addp)
1841 Int = Intrinsic::aarch64_neon_faddp;
1843 case NEON::BI__builtin_neon_vabs_v:
1844 case NEON::BI__builtin_neon_vabsq_v:
1845 if (VTy->getElementType()->isFloatingPointTy())
1846 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops,
"vabs");
1847 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops,
"vabs");
1848 case NEON::BI__builtin_neon_vadd_v:
1849 case NEON::BI__builtin_neon_vaddq_v: {
1850 llvm::Type *VTy = llvm::FixedVectorType::get(
Int8Ty, Quad ? 16 : 8);
1851 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
1852 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
1853 Ops[0] =
Builder.CreateXor(Ops[0], Ops[1]);
1854 return Builder.CreateBitCast(Ops[0], Ty);
1856 case NEON::BI__builtin_neon_vaddhn_v: {
1857 llvm::FixedVectorType *SrcTy =
1858 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1861 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
1862 Ops[1] =
Builder.CreateBitCast(Ops[1], SrcTy);
1863 Ops[0] =
Builder.CreateAdd(Ops[0], Ops[1],
"vaddhn");
1866 Constant *ShiftAmt =
1867 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
1868 Ops[0] =
Builder.CreateLShr(Ops[0], ShiftAmt,
"vaddhn");
1871 return Builder.CreateTrunc(Ops[0], VTy,
"vaddhn");
1873 case NEON::BI__builtin_neon_vcale_v:
1874 case NEON::BI__builtin_neon_vcaleq_v:
1875 case NEON::BI__builtin_neon_vcalt_v:
1876 case NEON::BI__builtin_neon_vcaltq_v:
1877 std::swap(Ops[0], Ops[1]);
1879 case NEON::BI__builtin_neon_vcage_v:
1880 case NEON::BI__builtin_neon_vcageq_v:
1881 case NEON::BI__builtin_neon_vcagt_v:
1882 case NEON::BI__builtin_neon_vcagtq_v: {
1884 switch (VTy->getScalarSizeInBits()) {
1885 default: llvm_unreachable(
"unexpected type");
1896 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
1897 llvm::Type *Tys[] = { VTy, VecFlt };
1898 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1901 case NEON::BI__builtin_neon_vceqz_v:
1902 case NEON::BI__builtin_neon_vceqzq_v:
1904 Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ,
"vceqz");
1905 case NEON::BI__builtin_neon_vcgez_v:
1906 case NEON::BI__builtin_neon_vcgezq_v:
1908 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1910 case NEON::BI__builtin_neon_vclez_v:
1911 case NEON::BI__builtin_neon_vclezq_v:
1913 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1915 case NEON::BI__builtin_neon_vcgtz_v:
1916 case NEON::BI__builtin_neon_vcgtzq_v:
1918 Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1920 case NEON::BI__builtin_neon_vcltz_v:
1921 case NEON::BI__builtin_neon_vcltzq_v:
1923 Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1925 case NEON::BI__builtin_neon_vclz_v:
1926 case NEON::BI__builtin_neon_vclzq_v:
1931 case NEON::BI__builtin_neon_vcvt_f32_v:
1932 case NEON::BI__builtin_neon_vcvtq_f32_v:
1933 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1936 return Usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
1937 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
1938 case NEON::BI__builtin_neon_vcvt_f16_s16:
1939 case NEON::BI__builtin_neon_vcvt_f16_u16:
1940 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1941 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1942 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
1945 return Usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
1946 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
1947 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1948 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1949 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1950 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1955 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1956 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1957 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1958 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1960 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1964 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1965 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1966 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1967 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1968 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1969 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1970 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1971 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1972 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1973 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1974 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1975 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1977 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
1980 case NEON::BI__builtin_neon_vcvt_s32_v:
1981 case NEON::BI__builtin_neon_vcvt_u32_v:
1982 case NEON::BI__builtin_neon_vcvt_s64_v:
1983 case NEON::BI__builtin_neon_vcvt_u64_v:
1984 case NEON::BI__builtin_neon_vcvt_s16_f16:
1985 case NEON::BI__builtin_neon_vcvt_u16_f16:
1986 case NEON::BI__builtin_neon_vcvtq_s32_v:
1987 case NEON::BI__builtin_neon_vcvtq_u32_v:
1988 case NEON::BI__builtin_neon_vcvtq_s64_v:
1989 case NEON::BI__builtin_neon_vcvtq_u64_v:
1990 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1991 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1993 return Usgn ?
Builder.CreateFPToUI(Ops[0], Ty,
"vcvt")
1994 :
Builder.CreateFPToSI(Ops[0], Ty,
"vcvt");
1996 case NEON::BI__builtin_neon_vcvta_s16_f16:
1997 case NEON::BI__builtin_neon_vcvta_s32_v:
1998 case NEON::BI__builtin_neon_vcvta_s64_v:
1999 case NEON::BI__builtin_neon_vcvta_u16_f16:
2000 case NEON::BI__builtin_neon_vcvta_u32_v:
2001 case NEON::BI__builtin_neon_vcvta_u64_v:
2002 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
2003 case NEON::BI__builtin_neon_vcvtaq_s32_v:
2004 case NEON::BI__builtin_neon_vcvtaq_s64_v:
2005 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
2006 case NEON::BI__builtin_neon_vcvtaq_u32_v:
2007 case NEON::BI__builtin_neon_vcvtaq_u64_v:
2008 case NEON::BI__builtin_neon_vcvtn_s16_f16:
2009 case NEON::BI__builtin_neon_vcvtn_s32_v:
2010 case NEON::BI__builtin_neon_vcvtn_s64_v:
2011 case NEON::BI__builtin_neon_vcvtn_u16_f16:
2012 case NEON::BI__builtin_neon_vcvtn_u32_v:
2013 case NEON::BI__builtin_neon_vcvtn_u64_v:
2014 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
2015 case NEON::BI__builtin_neon_vcvtnq_s32_v:
2016 case NEON::BI__builtin_neon_vcvtnq_s64_v:
2017 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
2018 case NEON::BI__builtin_neon_vcvtnq_u32_v:
2019 case NEON::BI__builtin_neon_vcvtnq_u64_v:
2020 case NEON::BI__builtin_neon_vcvtp_s16_f16:
2021 case NEON::BI__builtin_neon_vcvtp_s32_v:
2022 case NEON::BI__builtin_neon_vcvtp_s64_v:
2023 case NEON::BI__builtin_neon_vcvtp_u16_f16:
2024 case NEON::BI__builtin_neon_vcvtp_u32_v:
2025 case NEON::BI__builtin_neon_vcvtp_u64_v:
2026 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
2027 case NEON::BI__builtin_neon_vcvtpq_s32_v:
2028 case NEON::BI__builtin_neon_vcvtpq_s64_v:
2029 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
2030 case NEON::BI__builtin_neon_vcvtpq_u32_v:
2031 case NEON::BI__builtin_neon_vcvtpq_u64_v:
2032 case NEON::BI__builtin_neon_vcvtm_s16_f16:
2033 case NEON::BI__builtin_neon_vcvtm_s32_v:
2034 case NEON::BI__builtin_neon_vcvtm_s64_v:
2035 case NEON::BI__builtin_neon_vcvtm_u16_f16:
2036 case NEON::BI__builtin_neon_vcvtm_u32_v:
2037 case NEON::BI__builtin_neon_vcvtm_u64_v:
2038 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
2039 case NEON::BI__builtin_neon_vcvtmq_s32_v:
2040 case NEON::BI__builtin_neon_vcvtmq_s64_v:
2041 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
2042 case NEON::BI__builtin_neon_vcvtmq_u32_v:
2043 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
2045 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2047 case NEON::BI__builtin_neon_vcvtx_f32_v: {
2048 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
2049 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2052 case NEON::BI__builtin_neon_vext_v:
2053 case NEON::BI__builtin_neon_vextq_v: {
2056 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2057 Indices.push_back(i+CV);
2059 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
2060 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2061 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices,
"vext");
2063 case NEON::BI__builtin_neon_vfma_v:
2064 case NEON::BI__builtin_neon_vfmaq_v: {
2065 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
2066 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2067 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2071 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
2072 {Ops[1], Ops[2], Ops[0]});
2074 case NEON::BI__builtin_neon_vld1_v:
2075 case NEON::BI__builtin_neon_vld1q_v: {
2077 Ops.push_back(getAlignmentValue32(PtrOp0));
2078 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops,
"vld1");
2080 case NEON::BI__builtin_neon_vld1_x2_v:
2081 case NEON::BI__builtin_neon_vld1q_x2_v:
2082 case NEON::BI__builtin_neon_vld1_x3_v:
2083 case NEON::BI__builtin_neon_vld1q_x3_v:
2084 case NEON::BI__builtin_neon_vld1_x4_v:
2085 case NEON::BI__builtin_neon_vld1q_x4_v: {
2087 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
2088 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld1xN");
2089 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2091 case NEON::BI__builtin_neon_vld2_v:
2092 case NEON::BI__builtin_neon_vld2q_v:
2093 case NEON::BI__builtin_neon_vld3_v:
2094 case NEON::BI__builtin_neon_vld3q_v:
2095 case NEON::BI__builtin_neon_vld4_v:
2096 case NEON::BI__builtin_neon_vld4q_v:
2097 case NEON::BI__builtin_neon_vld2_dup_v:
2098 case NEON::BI__builtin_neon_vld2q_dup_v:
2099 case NEON::BI__builtin_neon_vld3_dup_v:
2100 case NEON::BI__builtin_neon_vld3q_dup_v:
2101 case NEON::BI__builtin_neon_vld4_dup_v:
2102 case NEON::BI__builtin_neon_vld4q_dup_v: {
2104 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
2105 Value *Align = getAlignmentValue32(PtrOp1);
2106 Ops[1] =
Builder.CreateCall(F, {Ops[1], Align}, NameHint);
2107 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2109 case NEON::BI__builtin_neon_vld1_dup_v:
2110 case NEON::BI__builtin_neon_vld1q_dup_v: {
2111 Value *
V = PoisonValue::get(Ty);
2113 LoadInst *Ld =
Builder.CreateLoad(PtrOp0);
2114 llvm::Constant *CI = ConstantInt::get(
SizeTy, 0);
2115 Ops[0] =
Builder.CreateInsertElement(
V, Ld, CI);
2118 case NEON::BI__builtin_neon_vld2_lane_v:
2119 case NEON::BI__builtin_neon_vld2q_lane_v:
2120 case NEON::BI__builtin_neon_vld3_lane_v:
2121 case NEON::BI__builtin_neon_vld3q_lane_v:
2122 case NEON::BI__builtin_neon_vld4_lane_v:
2123 case NEON::BI__builtin_neon_vld4q_lane_v: {
2125 Function *F =
CGM.getIntrinsic(LLVMIntrinsic, Tys);
2126 for (
unsigned I = 2; I < Ops.size() - 1; ++I)
2127 Ops[I] =
Builder.CreateBitCast(Ops[I], Ty);
2128 Ops.push_back(getAlignmentValue32(PtrOp1));
2130 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2132 case NEON::BI__builtin_neon_vmovl_v: {
2133 llvm::FixedVectorType *DTy =
2134 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2135 Ops[0] =
Builder.CreateBitCast(Ops[0], DTy);
2137 return Builder.CreateZExt(Ops[0], Ty,
"vmovl");
2138 return Builder.CreateSExt(Ops[0], Ty,
"vmovl");
2140 case NEON::BI__builtin_neon_vmovn_v: {
2141 llvm::FixedVectorType *QTy =
2142 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2143 Ops[0] =
Builder.CreateBitCast(Ops[0], QTy);
2144 return Builder.CreateTrunc(Ops[0], Ty,
"vmovn");
2146 case NEON::BI__builtin_neon_vmull_v:
2152 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2153 Int =
Type.isPoly() ? (
unsigned)Intrinsic::arm_neon_vmullp : Int;
2155 case NEON::BI__builtin_neon_vpadal_v:
2156 case NEON::BI__builtin_neon_vpadalq_v: {
2158 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2162 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2163 llvm::Type *Tys[2] = { Ty, NarrowTy };
2166 case NEON::BI__builtin_neon_vpaddl_v:
2167 case NEON::BI__builtin_neon_vpaddlq_v: {
2169 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2170 llvm::Type *EltTy = llvm::IntegerType::get(
getLLVMContext(), EltBits / 2);
2172 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
2173 llvm::Type *Tys[2] = { Ty, NarrowTy };
2176 case NEON::BI__builtin_neon_vqdmlal_v:
2177 case NEON::BI__builtin_neon_vqdmlsl_v: {
2182 return EmitNeonCall(
CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
2184 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
2185 case NEON::BI__builtin_neon_vqdmulh_lane_v:
2186 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
2187 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
2189 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2190 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
2191 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
2192 RTy->getNumElements() * 2);
2193 llvm::Type *Tys[2] = {
2198 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
2199 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
2200 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
2201 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
2202 llvm::Type *Tys[2] = {
2207 case NEON::BI__builtin_neon_vqshl_n_v:
2208 case NEON::BI__builtin_neon_vqshlq_n_v:
2211 case NEON::BI__builtin_neon_vqshlu_n_v:
2212 case NEON::BI__builtin_neon_vqshluq_n_v:
2215 case NEON::BI__builtin_neon_vrecpe_v:
2216 case NEON::BI__builtin_neon_vrecpeq_v:
2217 case NEON::BI__builtin_neon_vrsqrte_v:
2218 case NEON::BI__builtin_neon_vrsqrteq_v:
2219 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
2221 case NEON::BI__builtin_neon_vrndi_v:
2222 case NEON::BI__builtin_neon_vrndiq_v:
2223 Int =
Builder.getIsFPConstrained()
2224 ? Intrinsic::experimental_constrained_nearbyint
2225 : Intrinsic::nearbyint;
2227 case NEON::BI__builtin_neon_vrshr_n_v:
2228 case NEON::BI__builtin_neon_vrshrq_n_v:
2231 case NEON::BI__builtin_neon_vsha512hq_u64:
2232 case NEON::BI__builtin_neon_vsha512h2q_u64:
2233 case NEON::BI__builtin_neon_vsha512su0q_u64:
2234 case NEON::BI__builtin_neon_vsha512su1q_u64: {
2238 case NEON::BI__builtin_neon_vshl_n_v:
2239 case NEON::BI__builtin_neon_vshlq_n_v:
2241 return Builder.CreateShl(
Builder.CreateBitCast(Ops[0],Ty), Ops[1],
2243 case NEON::BI__builtin_neon_vshll_n_v: {
2244 llvm::FixedVectorType *SrcTy =
2245 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2246 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
2248 Ops[0] =
Builder.CreateZExt(Ops[0], VTy);
2250 Ops[0] =
Builder.CreateSExt(Ops[0], VTy);
2252 return Builder.CreateShl(Ops[0], Ops[1],
"vshll_n");
2254 case NEON::BI__builtin_neon_vshrn_n_v: {
2255 llvm::FixedVectorType *SrcTy =
2256 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2257 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
2260 Ops[0] =
Builder.CreateLShr(Ops[0], Ops[1]);
2262 Ops[0] =
Builder.CreateAShr(Ops[0], Ops[1]);
2263 return Builder.CreateTrunc(Ops[0], Ty,
"vshrn_n");
2265 case NEON::BI__builtin_neon_vshr_n_v:
2266 case NEON::BI__builtin_neon_vshrq_n_v:
2268 case NEON::BI__builtin_neon_vst1_v:
2269 case NEON::BI__builtin_neon_vst1q_v:
2270 case NEON::BI__builtin_neon_vst2_v:
2271 case NEON::BI__builtin_neon_vst2q_v:
2272 case NEON::BI__builtin_neon_vst3_v:
2273 case NEON::BI__builtin_neon_vst3q_v:
2274 case NEON::BI__builtin_neon_vst4_v:
2275 case NEON::BI__builtin_neon_vst4q_v:
2276 case NEON::BI__builtin_neon_vst2_lane_v:
2277 case NEON::BI__builtin_neon_vst2q_lane_v:
2278 case NEON::BI__builtin_neon_vst3_lane_v:
2279 case NEON::BI__builtin_neon_vst3q_lane_v:
2280 case NEON::BI__builtin_neon_vst4_lane_v:
2281 case NEON::BI__builtin_neon_vst4q_lane_v: {
2283 Ops.push_back(getAlignmentValue32(PtrOp0));
2286 case NEON::BI__builtin_neon_vsm3partw1q_u32:
2287 case NEON::BI__builtin_neon_vsm3partw2q_u32:
2288 case NEON::BI__builtin_neon_vsm3ss1q_u32:
2289 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
2290 case NEON::BI__builtin_neon_vsm4eq_u32: {
2294 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
2295 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
2296 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
2297 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
2302 case NEON::BI__builtin_neon_vst1_x2_v:
2303 case NEON::BI__builtin_neon_vst1q_x2_v:
2304 case NEON::BI__builtin_neon_vst1_x3_v:
2305 case NEON::BI__builtin_neon_vst1q_x3_v:
2306 case NEON::BI__builtin_neon_vst1_x4_v:
2307 case NEON::BI__builtin_neon_vst1q_x4_v: {
2310 if (
Arch == llvm::Triple::aarch64 ||
Arch == llvm::Triple::aarch64_be ||
2311 Arch == llvm::Triple::aarch64_32) {
2313 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
2319 case NEON::BI__builtin_neon_vsubhn_v: {
2320 llvm::FixedVectorType *SrcTy =
2321 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2324 Ops[0] =
Builder.CreateBitCast(Ops[0], SrcTy);
2325 Ops[1] =
Builder.CreateBitCast(Ops[1], SrcTy);
2326 Ops[0] =
Builder.CreateSub(Ops[0], Ops[1],
"vsubhn");
2329 Constant *ShiftAmt =
2330 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
2331 Ops[0] =
Builder.CreateLShr(Ops[0], ShiftAmt,
"vsubhn");
2334 return Builder.CreateTrunc(Ops[0], VTy,
"vsubhn");
2336 case NEON::BI__builtin_neon_vtrn_v:
2337 case NEON::BI__builtin_neon_vtrnq_v: {
2338 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2339 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2340 Value *SV =
nullptr;
2342 for (
unsigned vi = 0; vi != 2; ++vi) {
2344 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2345 Indices.push_back(i+vi);
2346 Indices.push_back(i+e+vi);
2349 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vtrn");
2354 case NEON::BI__builtin_neon_vtst_v:
2355 case NEON::BI__builtin_neon_vtstq_v: {
2356 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
2357 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2358 Ops[0] =
Builder.CreateAnd(Ops[0], Ops[1]);
2359 Ops[0] =
Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
2360 ConstantAggregateZero::get(Ty));
2361 return Builder.CreateSExt(Ops[0], Ty,
"vtst");
2363 case NEON::BI__builtin_neon_vuzp_v:
2364 case NEON::BI__builtin_neon_vuzpq_v: {
2365 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2366 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2367 Value *SV =
nullptr;
2369 for (
unsigned vi = 0; vi != 2; ++vi) {
2371 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2372 Indices.push_back(2*i+vi);
2375 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vuzp");
2380 case NEON::BI__builtin_neon_vxarq_u64: {
2385 case NEON::BI__builtin_neon_vzip_v:
2386 case NEON::BI__builtin_neon_vzipq_v: {
2387 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
2388 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
2389 Value *SV =
nullptr;
2391 for (
unsigned vi = 0; vi != 2; ++vi) {
2393 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2394 Indices.push_back((i + vi*e) >> 1);
2395 Indices.push_back(((i + vi*e) >> 1)+e);
2398 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vzip");
2403 case NEON::BI__builtin_neon_vdot_s32:
2404 case NEON::BI__builtin_neon_vdot_u32:
2405 case NEON::BI__builtin_neon_vdotq_s32:
2406 case NEON::BI__builtin_neon_vdotq_u32: {
2408 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2409 llvm::Type *Tys[2] = { Ty, InputTy };
2412 case NEON::BI__builtin_neon_vfmlal_low_f16:
2413 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
2415 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2416 llvm::Type *Tys[2] = { Ty, InputTy };
2419 case NEON::BI__builtin_neon_vfmlsl_low_f16:
2420 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
2422 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2423 llvm::Type *Tys[2] = { Ty, InputTy };
2426 case NEON::BI__builtin_neon_vfmlal_high_f16:
2427 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
2429 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2430 llvm::Type *Tys[2] = { Ty, InputTy };
2433 case NEON::BI__builtin_neon_vfmlsl_high_f16:
2434 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
2436 llvm::FixedVectorType::get(
HalfTy, Ty->getPrimitiveSizeInBits() / 16);
2437 llvm::Type *Tys[2] = { Ty, InputTy };
2440 case NEON::BI__builtin_neon_vmmlaq_s32:
2441 case NEON::BI__builtin_neon_vmmlaq_u32: {
2443 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2444 llvm::Type *Tys[2] = { Ty, InputTy };
2445 return EmitNeonCall(
CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops,
"vmmla");
2447 case NEON::BI__builtin_neon_vusmmlaq_s32: {
2449 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2450 llvm::Type *Tys[2] = { Ty, InputTy };
2453 case NEON::BI__builtin_neon_vusdot_s32:
2454 case NEON::BI__builtin_neon_vusdotq_s32: {
2456 llvm::FixedVectorType::get(
Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
2457 llvm::Type *Tys[2] = { Ty, InputTy };
2460 case NEON::BI__builtin_neon_vbfdot_f32:
2461 case NEON::BI__builtin_neon_vbfdotq_f32: {
2462 llvm::Type *InputTy =
2463 llvm::FixedVectorType::get(
BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
2464 llvm::Type *Tys[2] = { Ty, InputTy };
2467 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
2468 llvm::Type *Tys[1] = { Ty };
2475 assert(Int &&
"Expected valid intrinsic number");
2763 llvm::Triple::ArchType
Arch) {
2764 if (
auto Hint = GetValueForARMHint(BuiltinID))
2767 if (BuiltinID == clang::ARM::BI__emit) {
2769 llvm::FunctionType *FTy =
2770 llvm::FunctionType::get(
VoidTy,
false);
2774 llvm_unreachable(
"Sema will ensure that the parameter is constant");
2777 uint64_t ZExtValue =
Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
2779 llvm::InlineAsm *Emit =
2780 IsThumb ? InlineAsm::get(FTy,
".inst.n 0x" + utohexstr(ZExtValue),
"",
2782 : InlineAsm::get(FTy,
".inst 0x" + utohexstr(ZExtValue),
"",
2785 return Builder.CreateCall(Emit);
2788 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2790 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
2793 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2805 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2808 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
2811 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2812 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2814 Function *F =
CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
2816 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2822 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2824 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::arm_cls), Arg,
"cls");
2826 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2828 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
2832 if (BuiltinID == clang::ARM::BI__clear_cache) {
2833 assert(E->
getNumArgs() == 2 &&
"__clear_cache takes 2 arguments");
2836 for (
unsigned i = 0; i < 2; i++)
2838 llvm::Type *Ty =
CGM.getTypes().ConvertType(FD->
getType());
2840 StringRef Name = FD->
getName();
2844 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2845 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2848 switch (BuiltinID) {
2849 default: llvm_unreachable(
"unexpected builtin");
2850 case clang::ARM::BI__builtin_arm_mcrr:
2851 F =
CGM.getIntrinsic(Intrinsic::arm_mcrr);
2853 case clang::ARM::BI__builtin_arm_mcrr2:
2854 F =
CGM.getIntrinsic(Intrinsic::arm_mcrr2);
2875 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
2878 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2879 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2882 switch (BuiltinID) {
2883 default: llvm_unreachable(
"unexpected builtin");
2884 case clang::ARM::BI__builtin_arm_mrrc:
2885 F =
CGM.getIntrinsic(Intrinsic::arm_mrrc);
2887 case clang::ARM::BI__builtin_arm_mrrc2:
2888 F =
CGM.getIntrinsic(Intrinsic::arm_mrrc2);
2895 Value *RtAndRt2 =
Builder.CreateCall(F, {Coproc, Opc1, CRm});
2905 Value *ShiftCast = llvm::ConstantInt::get(
Int64Ty, 32);
2906 RtAndRt2 =
Builder.CreateShl(Rt, ShiftCast,
"shl",
true);
2907 RtAndRt2 =
Builder.CreateOr(RtAndRt2, Rt1);
2912 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2913 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2914 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2916 BuiltinID == clang::ARM::BI__ldrexd) {
2919 switch (BuiltinID) {
2920 default: llvm_unreachable(
"unexpected builtin");
2921 case clang::ARM::BI__builtin_arm_ldaex:
2922 F =
CGM.getIntrinsic(Intrinsic::arm_ldaexd);
2924 case clang::ARM::BI__builtin_arm_ldrexd:
2925 case clang::ARM::BI__builtin_arm_ldrex:
2926 case clang::ARM::BI__ldrexd:
2927 F =
CGM.getIntrinsic(Intrinsic::arm_ldrexd);
2940 Val =
Builder.CreateShl(Val0, ShiftCst,
"shl",
true );
2941 Val =
Builder.CreateOr(Val, Val1);
2945 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2946 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2955 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2956 : Intrinsic::arm_ldrex,
2958 CallInst *Val =
Builder.CreateCall(F, LoadAddr,
"ldrex");
2962 if (RealResTy->isPointerTy())
2963 return Builder.CreateIntToPtr(Val, RealResTy);
2965 llvm::Type *IntResTy = llvm::IntegerType::get(
2967 return Builder.CreateBitCast(
Builder.CreateTruncOrBitCast(Val, IntResTy),
2972 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2973 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2974 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2977 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2978 : Intrinsic::arm_strexd);
2983 Builder.CreateStore(Val, Tmp);
2986 Val =
Builder.CreateLoad(LdPtr);
2991 return Builder.CreateCall(F, {Arg0, Arg1, StPtr},
"strexd");
2994 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
2995 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
3000 llvm::Type *StoreTy =
3003 if (StoreVal->
getType()->isPointerTy())
3006 llvm::Type *
IntTy = llvm::IntegerType::get(
3008 CGM.getDataLayout().getTypeSizeInBits(StoreVal->
getType()));
3014 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
3015 : Intrinsic::arm_strex,
3018 CallInst *CI =
Builder.CreateCall(F, {StoreVal, StoreAddr},
"strex");
3020 1, Attribute::get(
getLLVMContext(), Attribute::ElementType, StoreTy));
3024 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
3025 Function *F =
CGM.getIntrinsic(Intrinsic::arm_clrex);
3030 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
3031 switch (BuiltinID) {
3032 case clang::ARM::BI__builtin_arm_crc32b:
3033 CRCIntrinsicID = Intrinsic::arm_crc32b;
break;
3034 case clang::ARM::BI__builtin_arm_crc32cb:
3035 CRCIntrinsicID = Intrinsic::arm_crc32cb;
break;
3036 case clang::ARM::BI__builtin_arm_crc32h:
3037 CRCIntrinsicID = Intrinsic::arm_crc32h;
break;
3038 case clang::ARM::BI__builtin_arm_crc32ch:
3039 CRCIntrinsicID = Intrinsic::arm_crc32ch;
break;
3040 case clang::ARM::BI__builtin_arm_crc32w:
3041 case clang::ARM::BI__builtin_arm_crc32d:
3042 CRCIntrinsicID = Intrinsic::arm_crc32w;
break;
3043 case clang::ARM::BI__builtin_arm_crc32cw:
3044 case clang::ARM::BI__builtin_arm_crc32cd:
3045 CRCIntrinsicID = Intrinsic::arm_crc32cw;
break;
3048 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
3054 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
3055 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
3063 return Builder.CreateCall(F, {Res, Arg1b});
3068 return Builder.CreateCall(F, {Arg0, Arg1});
3072 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
3073 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3074 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
3075 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
3076 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
3077 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
3080 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
3081 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3082 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
3085 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
3086 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
3088 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3089 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
3091 llvm::Type *ValueType;
3093 if (IsPointerBuiltin) {
3096 }
else if (Is64Bit) {
3106 if (BuiltinID == ARM::BI__builtin_sponentry) {
3125 return P.first == BuiltinID;
3128 BuiltinID = It->second;
3132 unsigned ICEArguments = 0;
3137 auto getAlignmentValue32 = [&](
Address addr) ->
Value* {
3138 return Builder.getInt32(addr.getAlignment().getQuantity());
3145 unsigned NumArgs = E->
getNumArgs() - (HasExtraArg ? 1 : 0);
3146 for (
unsigned i = 0, e = NumArgs; i != e; i++) {
3148 switch (BuiltinID) {
3149 case NEON::BI__builtin_neon_vld1_v:
3150 case NEON::BI__builtin_neon_vld1q_v:
3151 case NEON::BI__builtin_neon_vld1q_lane_v:
3152 case NEON::BI__builtin_neon_vld1_lane_v:
3153 case NEON::BI__builtin_neon_vld1_dup_v:
3154 case NEON::BI__builtin_neon_vld1q_dup_v:
3155 case NEON::BI__builtin_neon_vst1_v:
3156 case NEON::BI__builtin_neon_vst1q_v:
3157 case NEON::BI__builtin_neon_vst1q_lane_v:
3158 case NEON::BI__builtin_neon_vst1_lane_v:
3159 case NEON::BI__builtin_neon_vst2_v:
3160 case NEON::BI__builtin_neon_vst2q_v:
3161 case NEON::BI__builtin_neon_vst2_lane_v:
3162 case NEON::BI__builtin_neon_vst2q_lane_v:
3163 case NEON::BI__builtin_neon_vst3_v:
3164 case NEON::BI__builtin_neon_vst3q_v:
3165 case NEON::BI__builtin_neon_vst3_lane_v:
3166 case NEON::BI__builtin_neon_vst3q_lane_v:
3167 case NEON::BI__builtin_neon_vst4_v:
3168 case NEON::BI__builtin_neon_vst4q_v:
3169 case NEON::BI__builtin_neon_vst4_lane_v:
3170 case NEON::BI__builtin_neon_vst4q_lane_v:
3179 switch (BuiltinID) {
3180 case NEON::BI__builtin_neon_vld2_v:
3181 case NEON::BI__builtin_neon_vld2q_v:
3182 case NEON::BI__builtin_neon_vld3_v:
3183 case NEON::BI__builtin_neon_vld3q_v:
3184 case NEON::BI__builtin_neon_vld4_v:
3185 case NEON::BI__builtin_neon_vld4q_v:
3186 case NEON::BI__builtin_neon_vld2_lane_v:
3187 case NEON::BI__builtin_neon_vld2q_lane_v:
3188 case NEON::BI__builtin_neon_vld3_lane_v:
3189 case NEON::BI__builtin_neon_vld3q_lane_v:
3190 case NEON::BI__builtin_neon_vld4_lane_v:
3191 case NEON::BI__builtin_neon_vld4q_lane_v:
3192 case NEON::BI__builtin_neon_vld2_dup_v:
3193 case NEON::BI__builtin_neon_vld2q_dup_v:
3194 case NEON::BI__builtin_neon_vld3_dup_v:
3195 case NEON::BI__builtin_neon_vld3q_dup_v:
3196 case NEON::BI__builtin_neon_vld4_dup_v:
3197 case NEON::BI__builtin_neon_vld4q_dup_v:
3209 switch (BuiltinID) {
3212 case NEON::BI__builtin_neon_vget_lane_i8:
3213 case NEON::BI__builtin_neon_vget_lane_i16:
3214 case NEON::BI__builtin_neon_vget_lane_i32:
3215 case NEON::BI__builtin_neon_vget_lane_i64:
3216 case NEON::BI__builtin_neon_vget_lane_bf16:
3217 case NEON::BI__builtin_neon_vget_lane_f32:
3218 case NEON::BI__builtin_neon_vgetq_lane_i8:
3219 case NEON::BI__builtin_neon_vgetq_lane_i16:
3220 case NEON::BI__builtin_neon_vgetq_lane_i32:
3221 case NEON::BI__builtin_neon_vgetq_lane_i64:
3222 case NEON::BI__builtin_neon_vgetq_lane_bf16:
3223 case NEON::BI__builtin_neon_vgetq_lane_f32:
3224 case NEON::BI__builtin_neon_vduph_lane_bf16:
3225 case NEON::BI__builtin_neon_vduph_laneq_bf16:
3226 return Builder.CreateExtractElement(Ops[0], Ops[1],
"vget_lane");
3228 case NEON::BI__builtin_neon_vrndns_f32: {
3230 llvm::Type *Tys[] = {Arg->
getType()};
3231 Function *F =
CGM.getIntrinsic(Intrinsic::roundeven, Tys);
3232 return Builder.CreateCall(F, {Arg},
"vrndn"); }
3234 case NEON::BI__builtin_neon_vset_lane_i8:
3235 case NEON::BI__builtin_neon_vset_lane_i16:
3236 case NEON::BI__builtin_neon_vset_lane_i32:
3237 case NEON::BI__builtin_neon_vset_lane_i64:
3238 case NEON::BI__builtin_neon_vset_lane_bf16:
3239 case NEON::BI__builtin_neon_vset_lane_f32:
3240 case NEON::BI__builtin_neon_vsetq_lane_i8:
3241 case NEON::BI__builtin_neon_vsetq_lane_i16:
3242 case NEON::BI__builtin_neon_vsetq_lane_i32:
3243 case NEON::BI__builtin_neon_vsetq_lane_i64:
3244 case NEON::BI__builtin_neon_vsetq_lane_bf16:
3245 case NEON::BI__builtin_neon_vsetq_lane_f32:
3246 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
3248 case NEON::BI__builtin_neon_vsha1h_u32:
3251 case NEON::BI__builtin_neon_vsha1cq_u32:
3254 case NEON::BI__builtin_neon_vsha1pq_u32:
3257 case NEON::BI__builtin_neon_vsha1mq_u32:
3261 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
3262 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
3268 case clang::ARM::BI_MoveToCoprocessor:
3269 case clang::ARM::BI_MoveToCoprocessor2: {
3270 Function *F =
CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
3271 ? Intrinsic::arm_mcr
3272 : Intrinsic::arm_mcr2);
3273 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
3274 Ops[3], Ops[4], Ops[5]});
3279 assert(HasExtraArg);
3281 std::optional<llvm::APSInt>
Result =
3286 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
3287 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
3290 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
3296 bool usgn =
Result->getZExtValue() == 1;
3297 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3301 return Builder.CreateCall(F, Ops,
"vcvtr");
3306 bool usgn =
Type.isUnsigned();
3307 bool rightShift =
false;
3309 llvm::FixedVectorType *VTy =
3312 llvm::Type *Ty = VTy;
3327 switch (BuiltinID) {
3328 default:
return nullptr;
3329 case NEON::BI__builtin_neon_vld1q_lane_v:
3332 if (VTy->getElementType()->isIntegerTy(64)) {
3334 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
3336 Value *SV = llvm::ConstantVector::get(ConstantInt::get(
Int32Ty, 1-Lane));
3337 Ops[1] =
Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3339 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
3341 Function *F =
CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
3342 Value *Align = getAlignmentValue32(PtrOp0);
3345 int Indices[] = {1 - Lane, Lane};
3346 return Builder.CreateShuffleVector(Ops[1], Ld, Indices,
"vld1q_lane");
3349 case NEON::BI__builtin_neon_vld1_lane_v: {
3350 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
3353 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2],
"vld1_lane");
3355 case NEON::BI__builtin_neon_vqrshrn_n_v:
3357 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
3360 case NEON::BI__builtin_neon_vqrshrun_n_v:
3361 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
3362 Ops,
"vqrshrun_n", 1,
true);
3363 case NEON::BI__builtin_neon_vqshrn_n_v:
3364 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
3367 case NEON::BI__builtin_neon_vqshrun_n_v:
3368 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
3369 Ops,
"vqshrun_n", 1,
true);
3370 case NEON::BI__builtin_neon_vrecpe_v:
3371 case NEON::BI__builtin_neon_vrecpeq_v:
3374 case NEON::BI__builtin_neon_vrshrn_n_v:
3375 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
3376 Ops,
"vrshrn_n", 1,
true);
3377 case NEON::BI__builtin_neon_vrsra_n_v:
3378 case NEON::BI__builtin_neon_vrsraq_n_v:
3379 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
3380 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
3382 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3383 Ops[1] =
Builder.CreateCall(
CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
3384 return Builder.CreateAdd(Ops[0], Ops[1],
"vrsra_n");
3385 case NEON::BI__builtin_neon_vsri_n_v:
3386 case NEON::BI__builtin_neon_vsriq_n_v:
3389 case NEON::BI__builtin_neon_vsli_n_v:
3390 case NEON::BI__builtin_neon_vsliq_n_v:
3392 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
3394 case NEON::BI__builtin_neon_vsra_n_v:
3395 case NEON::BI__builtin_neon_vsraq_n_v:
3396 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
3398 return Builder.CreateAdd(Ops[0], Ops[1]);
3399 case NEON::BI__builtin_neon_vst1q_lane_v:
3402 if (VTy->getElementType()->isIntegerTy(64)) {
3403 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
3405 Ops[1] =
Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3406 Ops[2] = getAlignmentValue32(PtrOp0);
3407 llvm::Type *Tys[] = {
Int8PtrTy, Ops[1]->getType()};
3408 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
3412 case NEON::BI__builtin_neon_vst1_lane_v: {
3413 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
3414 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
3415 return Builder.CreateStore(Ops[1],
3418 case NEON::BI__builtin_neon_vtbl1_v:
3421 case NEON::BI__builtin_neon_vtbl2_v:
3424 case NEON::BI__builtin_neon_vtbl3_v:
3427 case NEON::BI__builtin_neon_vtbl4_v:
3430 case NEON::BI__builtin_neon_vtbx1_v:
3433 case NEON::BI__builtin_neon_vtbx2_v:
3436 case NEON::BI__builtin_neon_vtbx3_v:
3439 case NEON::BI__builtin_neon_vtbx4_v:
5104 llvm::Triple::ArchType
Arch) {
5113 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
5114 return EmitAArch64CpuSupports(E);
5116 unsigned HintID =
static_cast<unsigned>(-1);
5117 switch (BuiltinID) {
5119 case clang::AArch64::BI__builtin_arm_nop:
5122 case clang::AArch64::BI__builtin_arm_yield:
5123 case clang::AArch64::BI__yield:
5126 case clang::AArch64::BI__builtin_arm_wfe:
5127 case clang::AArch64::BI__wfe:
5130 case clang::AArch64::BI__builtin_arm_wfi:
5131 case clang::AArch64::BI__wfi:
5134 case clang::AArch64::BI__builtin_arm_sev:
5135 case clang::AArch64::BI__sev:
5138 case clang::AArch64::BI__builtin_arm_sevl:
5139 case clang::AArch64::BI__sevl:
5144 if (HintID !=
static_cast<unsigned>(-1)) {
5145 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_hint);
5146 return Builder.CreateCall(F, llvm::ConstantInt::get(
Int32Ty, HintID));
5149 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
5150 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_break);
5155 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
5158 llvm::FunctionType::get(StructType::get(
CGM.Int64Ty,
CGM.Int64Ty), {},
5160 "__arm_sme_state"));
5162 "aarch64_pstate_sm_compatible");
5163 CI->setAttributes(Attrs);
5166 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
5173 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
5175 "rbit of unusual size!");
5178 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
5180 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
5182 "rbit of unusual size!");
5185 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg,
"rbit");
5188 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
5189 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
5191 Function *F =
CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
5193 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
5198 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
5200 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
5203 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
5205 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
5209 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
5210 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
5212 llvm::Type *Ty = Arg->getType();
5213 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
5217 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
5218 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
5220 llvm::Type *Ty = Arg->getType();
5221 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
5225 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
5226 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
5228 llvm::Type *Ty = Arg->getType();
5229 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
5233 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
5234 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
5236 llvm::Type *Ty = Arg->getType();
5237 return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
5241 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
5243 "__jcvt of unusual size!");
5246 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
5249 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
5250 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
5251 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
5252 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
5256 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
5259 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
5260 llvm::Value *Val =
Builder.CreateCall(F, MemAddr);
5262 for (
size_t i = 0; i < 8; i++) {
5263 llvm::Value *ValOffsetPtr =
5274 Args.push_back(MemAddr);
5275 for (
size_t i = 0; i < 8; i++) {
5276 llvm::Value *ValOffsetPtr =
5283 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
5284 ? Intrinsic::aarch64_st64b
5285 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
5286 ? Intrinsic::aarch64_st64bv
5287 : Intrinsic::aarch64_st64bv0);
5289 return Builder.CreateCall(F, Args);
5293 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
5294 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
5296 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
5297 ? Intrinsic::aarch64_rndr
5298 : Intrinsic::aarch64_rndrrs);
5300 llvm::Value *Val =
Builder.CreateCall(F);
5301 Value *RandomValue =
Builder.CreateExtractValue(Val, 0);
5305 Builder.CreateStore(RandomValue, MemAddress);
5310 if (BuiltinID == clang::AArch64::BI__clear_cache) {
5311 assert(E->
getNumArgs() == 2 &&
"__clear_cache takes 2 arguments");
5314 for (
unsigned i = 0; i < 2; i++)
5316 llvm::Type *Ty =
CGM.getTypes().ConvertType(FD->
getType());
5318 StringRef Name = FD->
getName();
5322 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5323 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
5326 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5327 ? Intrinsic::aarch64_ldaxp
5328 : Intrinsic::aarch64_ldxp);
5335 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
5336 Val0 =
Builder.CreateZExt(Val0, Int128Ty);
5337 Val1 =
Builder.CreateZExt(Val1, Int128Ty);
5339 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5340 Val =
Builder.CreateShl(Val0, ShiftCst,
"shl",
true );
5341 Val =
Builder.CreateOr(Val, Val1);
5343 }
else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5344 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
5353 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5354 ? Intrinsic::aarch64_ldaxr
5355 : Intrinsic::aarch64_ldxr,
5357 CallInst *Val =
Builder.CreateCall(F, LoadAddr,
"ldxr");
5361 if (RealResTy->isPointerTy())
5362 return Builder.CreateIntToPtr(Val, RealResTy);
5364 llvm::Type *IntResTy = llvm::IntegerType::get(
5366 return Builder.CreateBitCast(
Builder.CreateTruncOrBitCast(Val, IntResTy),
5370 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5371 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
5374 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5375 ? Intrinsic::aarch64_stlxp
5376 : Intrinsic::aarch64_stxp);
5383 llvm::Value *Val =
Builder.CreateLoad(Tmp);
5388 return Builder.CreateCall(F, {Arg0, Arg1, StPtr},
"stxp");
5391 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5392 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
5397 llvm::Type *StoreTy =
5400 if (StoreVal->
getType()->isPointerTy())
5403 llvm::Type *
IntTy = llvm::IntegerType::get(
5405 CGM.getDataLayout().getTypeSizeInBits(StoreVal->
getType()));
5411 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5412 ? Intrinsic::aarch64_stlxr
5413 : Intrinsic::aarch64_stxr,
5415 CallInst *CI =
Builder.CreateCall(F, {StoreVal, StoreAddr},
"stxr");
5417 1, Attribute::get(
getLLVMContext(), Attribute::ElementType, StoreTy));
5421 if (BuiltinID == clang::AArch64::BI__getReg) {
5424 llvm_unreachable(
"Sema will ensure that the parameter is constant");
5427 LLVMContext &Context =
CGM.getLLVMContext();
5430 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
5431 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5432 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5435 CGM.getIntrinsic(Intrinsic::read_register, {
Int64Ty});
5436 return Builder.CreateCall(F, Metadata);
5439 if (BuiltinID == clang::AArch64::BI__break) {
5442 llvm_unreachable(
"Sema will ensure that the parameter is constant");
5444 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_break);
5448 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
5449 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5453 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
5454 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5455 llvm::SyncScope::SingleThread);
5458 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5459 switch (BuiltinID) {
5460 case clang::AArch64::BI__builtin_arm_crc32b:
5461 CRCIntrinsicID = Intrinsic::aarch64_crc32b;
break;
5462 case clang::AArch64::BI__builtin_arm_crc32cb:
5463 CRCIntrinsicID = Intrinsic::aarch64_crc32cb;
break;
5464 case clang::AArch64::BI__builtin_arm_crc32h:
5465 CRCIntrinsicID = Intrinsic::aarch64_crc32h;
break;
5466 case clang::AArch64::BI__builtin_arm_crc32ch:
5467 CRCIntrinsicID = Intrinsic::aarch64_crc32ch;
break;
5468 case clang::AArch64::BI__builtin_arm_crc32w:
5469 CRCIntrinsicID = Intrinsic::aarch64_crc32w;
break;
5470 case clang::AArch64::BI__builtin_arm_crc32cw:
5471 CRCIntrinsicID = Intrinsic::aarch64_crc32cw;
break;
5472 case clang::AArch64::BI__builtin_arm_crc32d:
5473 CRCIntrinsicID = Intrinsic::aarch64_crc32x;
break;
5474 case clang::AArch64::BI__builtin_arm_crc32cd:
5475 CRCIntrinsicID = Intrinsic::aarch64_crc32cx;
break;
5478 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5483 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5484 Arg1 =
Builder.CreateZExtOrBitCast(Arg1, DataTy);
5486 return Builder.CreateCall(F, {Arg0, Arg1});
5490 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
5497 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
5500 if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch ||
5501 BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
5505 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
5506 switch (BuiltinID) {
5507 case clang::AArch64::BI__builtin_arm_irg:
5508 MTEIntrinsicID = Intrinsic::aarch64_irg;
break;
5509 case clang::AArch64::BI__builtin_arm_addg:
5510 MTEIntrinsicID = Intrinsic::aarch64_addg;
break;
5511 case clang::AArch64::BI__builtin_arm_gmi:
5512 MTEIntrinsicID = Intrinsic::aarch64_gmi;
break;
5513 case clang::AArch64::BI__builtin_arm_ldg:
5514 MTEIntrinsicID = Intrinsic::aarch64_ldg;
break;
5515 case clang::AArch64::BI__builtin_arm_stg:
5516 MTEIntrinsicID = Intrinsic::aarch64_stg;
break;
5517 case clang::AArch64::BI__builtin_arm_subp:
5518 MTEIntrinsicID = Intrinsic::aarch64_subp;
break;
5521 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
5522 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
5527 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
5530 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
5535 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
5536 {Pointer, TagOffset});
5538 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
5544 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
5549 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
5551 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
5552 {TagAddress, TagAddress});
5557 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
5559 return Builder.CreateCall(
CGM.getIntrinsic(MTEIntrinsicID),
5560 {TagAddress, TagAddress});
5562 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
5566 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
5570 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5571 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5572 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5573 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5574 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
5575 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
5576 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
5577 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
5580 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5581 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5582 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5583 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
5586 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5587 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
5589 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5590 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5592 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5593 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5595 llvm::Type *ValueType;
5599 }
else if (Is128Bit) {
5600 llvm::Type *Int128Ty =
5601 llvm::IntegerType::getInt128Ty(
CGM.getLLVMContext());
5602 ValueType = Int128Ty;
5604 }
else if (IsPointerBuiltin) {
5614 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5615 BuiltinID == clang::AArch64::BI_WriteStatusReg ||
5616 BuiltinID == clang::AArch64::BI__sys) {
5617 LLVMContext &Context =
CGM.getLLVMContext();
5622 std::string SysRegStr;
5623 unsigned SysRegOp0 = (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5624 BuiltinID == clang::AArch64::BI_WriteStatusReg)
5625 ? ((1 << 1) | ((SysReg >> 14) & 1))
5627 llvm::raw_string_ostream(SysRegStr)
5628 << SysRegOp0 <<
":" << ((SysReg >> 11) & 7) <<
":"
5629 << ((SysReg >> 7) & 15) <<
":" << ((SysReg >> 3) & 15) <<
":"
5632 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
5633 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5634 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5639 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5640 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::read_register, Types);
5642 return Builder.CreateCall(F, Metadata);
5645 llvm::Function *F =
CGM.getIntrinsic(Intrinsic::write_register, Types);
5647 llvm::Value *
Result =
Builder.CreateCall(F, {Metadata, ArgValue});
5648 if (BuiltinID == clang::AArch64::BI__sys) {
5656 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5662 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5667 if (BuiltinID == clang::AArch64::BI__mulh ||
5668 BuiltinID == clang::AArch64::BI__umulh) {
5670 llvm::Type *Int128Ty = llvm::IntegerType::get(
getLLVMContext(), 128);
5672 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5678 Value *MulResult, *HigherBits;
5680 MulResult =
Builder.CreateNSWMul(LHS, RHS);
5681 HigherBits =
Builder.CreateAShr(MulResult, 64);
5683 MulResult =
Builder.CreateNUWMul(LHS, RHS);
5684 HigherBits =
Builder.CreateLShr(MulResult, 64);
5686 HigherBits =
Builder.CreateIntCast(HigherBits, ResType, IsSigned);
5691 if (BuiltinID == AArch64::BI__writex18byte ||
5692 BuiltinID == AArch64::BI__writex18word ||
5693 BuiltinID == AArch64::BI__writex18dword ||
5694 BuiltinID == AArch64::BI__writex18qword) {
5710 if (BuiltinID == AArch64::BI__readx18byte ||
5711 BuiltinID == AArch64::BI__readx18word ||
5712 BuiltinID == AArch64::BI__readx18dword ||
5713 BuiltinID == AArch64::BI__readx18qword) {
5728 if (BuiltinID == AArch64::BI__addx18byte ||
5729 BuiltinID == AArch64::BI__addx18word ||
5730 BuiltinID == AArch64::BI__addx18dword ||
5731 BuiltinID == AArch64::BI__addx18qword ||
5732 BuiltinID == AArch64::BI__incx18byte ||
5733 BuiltinID == AArch64::BI__incx18word ||
5734 BuiltinID == AArch64::BI__incx18dword ||
5735 BuiltinID == AArch64::BI__incx18qword) {
5738 switch (BuiltinID) {
5739 case AArch64::BI__incx18byte:
5743 case AArch64::BI__incx18word:
5747 case AArch64::BI__incx18dword:
5751 case AArch64::BI__incx18qword:
5757 isIncrement =
false;
5782 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5783 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5784 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5785 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5788 return Builder.CreateBitCast(Arg, RetTy);
5791 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5792 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5793 BuiltinID == AArch64::BI_CountLeadingZeros ||
5794 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5798 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5799 BuiltinID == AArch64::BI_CountLeadingOnes64)
5800 Arg =
Builder.CreateXor(Arg, Constant::getAllOnesValue(
ArgType));
5805 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5806 BuiltinID == AArch64::BI_CountLeadingZeros64)
5811 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5812 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5815 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5816 ?
CGM.getIntrinsic(Intrinsic::aarch64_cls)
5817 :
CGM.getIntrinsic(Intrinsic::aarch64_cls64);
5820 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5825 if (BuiltinID == AArch64::BI_CountOneBits ||
5826 BuiltinID == AArch64::BI_CountOneBits64) {
5832 if (BuiltinID == AArch64::BI_CountOneBits64)
5837 if (BuiltinID == AArch64::BI__prefetch) {
5846 if (BuiltinID == AArch64::BI__hlt) {
5847 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_hlt);
5852 return ConstantInt::get(
Builder.getInt32Ty(), 0);
5855 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5863 if (std::optional<MSVCIntrin> MsvcIntId =
5869 return P.first == BuiltinID;
5872 BuiltinID = It->second;
5876 unsigned ICEArguments = 0;
5883 for (
unsigned i = 0, e = E->
getNumArgs() - 1; i != e; i++) {
5885 switch (BuiltinID) {
5886 case NEON::BI__builtin_neon_vld1_v:
5887 case NEON::BI__builtin_neon_vld1q_v:
5888 case NEON::BI__builtin_neon_vld1_dup_v:
5889 case NEON::BI__builtin_neon_vld1q_dup_v:
5890 case NEON::BI__builtin_neon_vld1_lane_v:
5891 case NEON::BI__builtin_neon_vld1q_lane_v:
5892 case NEON::BI__builtin_neon_vst1_v:
5893 case NEON::BI__builtin_neon_vst1q_v:
5894 case NEON::BI__builtin_neon_vst1_lane_v:
5895 case NEON::BI__builtin_neon_vst1q_lane_v:
5896 case NEON::BI__builtin_neon_vldap1_lane_s64:
5897 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5898 case NEON::BI__builtin_neon_vstl1_lane_s64:
5899 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5917 assert(
Result &&
"SISD intrinsic should have been handled");
5923 if (std::optional<llvm::APSInt>
Result =
5928 bool usgn =
Type.isUnsigned();
5929 bool quad =
Type.isQuad();
5932 switch (BuiltinID) {
5934 case NEON::BI__builtin_neon_vabsh_f16:
5937 case NEON::BI__builtin_neon_vaddq_p128: {
5940 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
5941 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
5942 Ops[0] =
Builder.CreateXor(Ops[0], Ops[1]);
5943 llvm::Type *Int128Ty = llvm::Type::getIntNTy(
getLLVMContext(), 128);
5944 return Builder.CreateBitCast(Ops[0], Int128Ty);
5946 case NEON::BI__builtin_neon_vldrq_p128: {
5947 llvm::Type *Int128Ty = llvm::Type::getIntNTy(
getLLVMContext(), 128);
5949 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5952 case NEON::BI__builtin_neon_vstrq_p128: {
5953 Value *Ptr = Ops[0];
5956 case NEON::BI__builtin_neon_vcvts_f32_u32:
5957 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5960 case NEON::BI__builtin_neon_vcvts_f32_s32:
5961 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5963 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5966 Ops[0] =
Builder.CreateBitCast(Ops[0], InTy);
5968 return Builder.CreateUIToFP(Ops[0], FTy);
5969 return Builder.CreateSIToFP(Ops[0], FTy);
5971 case NEON::BI__builtin_neon_vcvth_f16_u16:
5972 case NEON::BI__builtin_neon_vcvth_f16_u32:
5973 case NEON::BI__builtin_neon_vcvth_f16_u64:
5976 case NEON::BI__builtin_neon_vcvth_f16_s16:
5977 case NEON::BI__builtin_neon_vcvth_f16_s32:
5978 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5980 llvm::Type *FTy =
HalfTy;
5982 if (Ops[0]->
getType()->getPrimitiveSizeInBits() == 64)
5984 else if (Ops[0]->
getType()->getPrimitiveSizeInBits() == 32)
5988 Ops[0] =
Builder.CreateBitCast(Ops[0], InTy);
5990 return Builder.CreateUIToFP(Ops[0], FTy);
5991 return Builder.CreateSIToFP(Ops[0], FTy);
5993 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5994 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5995 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5996 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5997 case NEON::BI__builtin_neon_vcvth_u16_f16:
5998 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5999 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
6000 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
6001 case NEON::BI__builtin_neon_vcvtph_s16_f16:
6002 case NEON::BI__builtin_neon_vcvth_s16_f16: {
6005 llvm::Type* FTy =
HalfTy;
6006 llvm::Type *Tys[2] = {InTy, FTy};
6008 switch (BuiltinID) {
6009 default: llvm_unreachable(
"missing builtin ID in switch!");
6010 case NEON::BI__builtin_neon_vcvtah_u16_f16:
6011 Int = Intrinsic::aarch64_neon_fcvtau;
break;
6012 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
6013 Int = Intrinsic::aarch64_neon_fcvtmu;
break;
6014 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
6015 Int = Intrinsic::aarch64_neon_fcvtnu;
break;
6016 case NEON::BI__builtin_neon_vcvtph_u16_f16:
6017 Int = Intrinsic::aarch64_neon_fcvtpu;
break;
6018 case NEON::BI__builtin_neon_vcvth_u16_f16:
6019 Int = Intrinsic::aarch64_neon_fcvtzu;
break;
6020 case NEON::BI__builtin_neon_vcvtah_s16_f16:
6021 Int = Intrinsic::aarch64_neon_fcvtas;
break;
6022 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
6023 Int = Intrinsic::aarch64_neon_fcvtms;
break;
6024 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
6025 Int = Intrinsic::aarch64_neon_fcvtns;
break;
6026 case NEON::BI__builtin_neon_vcvtph_s16_f16:
6027 Int = Intrinsic::aarch64_neon_fcvtps;
break;
6028 case NEON::BI__builtin_neon_vcvth_s16_f16:
6029 Int = Intrinsic::aarch64_neon_fcvtzs;
break;
6033 case NEON::BI__builtin_neon_vcaleh_f16:
6034 case NEON::BI__builtin_neon_vcalth_f16:
6035 case NEON::BI__builtin_neon_vcageh_f16:
6036 case NEON::BI__builtin_neon_vcagth_f16: {
6039 llvm::Type* FTy =
HalfTy;
6040 llvm::Type *Tys[2] = {InTy, FTy};
6042 switch (BuiltinID) {
6043 default: llvm_unreachable(
"missing builtin ID in switch!");
6044 case NEON::BI__builtin_neon_vcageh_f16:
6045 Int = Intrinsic::aarch64_neon_facge;
break;
6046 case NEON::BI__builtin_neon_vcagth_f16:
6047 Int = Intrinsic::aarch64_neon_facgt;
break;
6048 case NEON::BI__builtin_neon_vcaleh_f16:
6049 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]);
break;
6050 case NEON::BI__builtin_neon_vcalth_f16:
6051 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]);
break;
6056 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
6057 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
6060 llvm::Type* FTy =
HalfTy;
6061 llvm::Type *Tys[2] = {InTy, FTy};
6063 switch (BuiltinID) {
6064 default: llvm_unreachable(
"missing builtin ID in switch!");
6065 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
6066 Int = Intrinsic::aarch64_neon_vcvtfp2fxs;
break;
6067 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
6068 Int = Intrinsic::aarch64_neon_vcvtfp2fxu;
break;
6073 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
6074 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
6076 llvm::Type* FTy =
HalfTy;
6078 llvm::Type *Tys[2] = {FTy, InTy};
6080 switch (BuiltinID) {
6081 default: llvm_unreachable(
"missing builtin ID in switch!");
6082 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
6083 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
6084 Ops[0] =
Builder.CreateSExt(Ops[0], InTy,
"sext");
6086 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
6087 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
6088 Ops[0] =
Builder.CreateZExt(Ops[0], InTy);
6093 case NEON::BI__builtin_neon_vpaddd_s64: {
6094 auto *Ty = llvm::FixedVectorType::get(
Int64Ty, 2);
6097 Vec =
Builder.CreateBitCast(Vec, Ty,
"v2i64");
6098 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
6099 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
6100 Value *Op0 =
Builder.CreateExtractElement(Vec, Idx0,
"lane0");
6101 Value *Op1 =
Builder.CreateExtractElement(Vec, Idx1,
"lane1");
6103 return Builder.CreateAdd(Op0, Op1,
"vpaddd");
6105 case NEON::BI__builtin_neon_vpaddd_f64: {
6106 auto *Ty = llvm::FixedVectorType::get(
DoubleTy, 2);
6109 Vec =
Builder.CreateBitCast(Vec, Ty,
"v2f64");
6110 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
6111 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
6112 Value *Op0 =
Builder.CreateExtractElement(Vec, Idx0,
"lane0");
6113 Value *Op1 =
Builder.CreateExtractElement(Vec, Idx1,
"lane1");
6115 return Builder.CreateFAdd(Op0, Op1,
"vpaddd");
6117 case NEON::BI__builtin_neon_vpadds_f32: {
6118 auto *Ty = llvm::FixedVectorType::get(
FloatTy, 2);
6121 Vec =
Builder.CreateBitCast(Vec, Ty,
"v2f32");
6122 llvm::Value *Idx0 = llvm::ConstantInt::get(
SizeTy, 0);
6123 llvm::Value *Idx1 = llvm::ConstantInt::get(
SizeTy, 1);
6124 Value *Op0 =
Builder.CreateExtractElement(Vec, Idx0,
"lane0");
6125 Value *Op1 =
Builder.CreateExtractElement(Vec, Idx1,
"lane1");
6127 return Builder.CreateFAdd(Op0, Op1,
"vpaddd");
6129 case NEON::BI__builtin_neon_vceqzd_s64:
6133 ICmpInst::ICMP_EQ,
"vceqz");
6134 case NEON::BI__builtin_neon_vceqzd_f64:
6135 case NEON::BI__builtin_neon_vceqzs_f32:
6136 case NEON::BI__builtin_neon_vceqzh_f16:
6140 ICmpInst::FCMP_OEQ,
"vceqz");
6141 case NEON::BI__builtin_neon_vcgezd_s64:
6145 ICmpInst::ICMP_SGE,
"vcgez");
6146 case NEON::BI__builtin_neon_vcgezd_f64:
6147 case NEON::BI__builtin_neon_vcgezs_f32:
6148 case NEON::BI__builtin_neon_vcgezh_f16:
6152 ICmpInst::FCMP_OGE,
"vcgez");
6153 case NEON::BI__builtin_neon_vclezd_s64:
6157 ICmpInst::ICMP_SLE,
"vclez");
6158 case NEON::BI__builtin_neon_vclezd_f64:
6159 case NEON::BI__builtin_neon_vclezs_f32:
6160 case NEON::BI__builtin_neon_vclezh_f16:
6164 ICmpInst::FCMP_OLE,
"vclez");
6165 case NEON::BI__builtin_neon_vcgtzd_s64:
6169 ICmpInst::ICMP_SGT,
"vcgtz");
6170 case NEON::BI__builtin_neon_vcgtzd_f64:
6171 case NEON::BI__builtin_neon_vcgtzs_f32:
6172 case NEON::BI__builtin_neon_vcgtzh_f16:
6176 ICmpInst::FCMP_OGT,
"vcgtz");
6177 case NEON::BI__builtin_neon_vcltzd_s64:
6181 ICmpInst::ICMP_SLT,
"vcltz");
6183 case NEON::BI__builtin_neon_vcltzd_f64:
6184 case NEON::BI__builtin_neon_vcltzs_f32:
6185 case NEON::BI__builtin_neon_vcltzh_f16:
6189 ICmpInst::FCMP_OLT,
"vcltz");
6191 case NEON::BI__builtin_neon_vceqzd_u64: {
6195 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(
Int64Ty));
6198 case NEON::BI__builtin_neon_vceqd_f64:
6199 case NEON::BI__builtin_neon_vcled_f64:
6200 case NEON::BI__builtin_neon_vcltd_f64:
6201 case NEON::BI__builtin_neon_vcged_f64:
6202 case NEON::BI__builtin_neon_vcgtd_f64: {
6203 llvm::CmpInst::Predicate P;
6204 switch (BuiltinID) {
6205 default: llvm_unreachable(
"missing builtin ID in switch!");
6206 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ;
break;
6207 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE;
break;
6208 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT;
break;
6209 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE;
break;
6210 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT;
break;
6215 if (P == llvm::FCmpInst::FCMP_OEQ)
6216 Ops[0] =
Builder.CreateFCmp(P, Ops[0], Ops[1]);
6218 Ops[0] =
Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6221 case NEON::BI__builtin_neon_vceqs_f32:
6222 case NEON::BI__builtin_neon_vcles_f32:
6223 case NEON::BI__builtin_neon_vclts_f32:
6224 case NEON::BI__builtin_neon_vcges_f32:
6225 case NEON::BI__builtin_neon_vcgts_f32: {
6226 llvm::CmpInst::Predicate P;
6227 switch (BuiltinID) {
6228 default: llvm_unreachable(
"missing builtin ID in switch!");
6229 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ;
break;
6230 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE;
break;
6231 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT;
break;
6232 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE;
break;
6233 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT;
break;
6238 if (P == llvm::FCmpInst::FCMP_OEQ)
6239 Ops[0] =
Builder.CreateFCmp(P, Ops[0], Ops[1]);
6241 Ops[0] =
Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6244 case NEON::BI__builtin_neon_vceqh_f16:
6245 case NEON::BI__builtin_neon_vcleh_f16:
6246 case NEON::BI__builtin_neon_vclth_f16:
6247 case NEON::BI__builtin_neon_vcgeh_f16:
6248 case NEON::BI__builtin_neon_vcgth_f16: {
6249 llvm::CmpInst::Predicate P;
6250 switch (BuiltinID) {
6251 default: llvm_unreachable(
"missing builtin ID in switch!");
6252 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ;
break;
6253 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE;
break;
6254 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT;
break;
6255 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE;
break;
6256 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT;
break;
6261 if (P == llvm::FCmpInst::FCMP_OEQ)
6262 Ops[0] =
Builder.CreateFCmp(P, Ops[0], Ops[1]);
6264 Ops[0] =
Builder.CreateFCmpS(P, Ops[0], Ops[1]);
6267 case NEON::BI__builtin_neon_vceqd_s64:
6268 case NEON::BI__builtin_neon_vceqd_u64:
6269 case NEON::BI__builtin_neon_vcgtd_s64:
6270 case NEON::BI__builtin_neon_vcgtd_u64:
6271 case NEON::BI__builtin_neon_vcltd_s64:
6272 case NEON::BI__builtin_neon_vcltd_u64:
6273 case NEON::BI__builtin_neon_vcged_u64:
6274 case NEON::BI__builtin_neon_vcged_s64:
6275 case NEON::BI__builtin_neon_vcled_u64:
6276 case NEON::BI__builtin_neon_vcled_s64: {
6277 llvm::CmpInst::Predicate P;
6278 switch (BuiltinID) {
6279 default: llvm_unreachable(
"missing builtin ID in switch!");
6280 case NEON::BI__builtin_neon_vceqd_s64:
6281 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;
break;
6282 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;
break;
6283 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;
break;
6284 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;
break;
6285 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;
break;
6286 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;
break;
6287 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;
break;
6288 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;
break;
6289 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;
break;
6294 Ops[0] =
Builder.CreateICmp(P, Ops[0], Ops[1]);
6297 case NEON::BI__builtin_neon_vtstd_s64:
6298 case NEON::BI__builtin_neon_vtstd_u64: {
6302 Ops[0] =
Builder.CreateAnd(Ops[0], Ops[1]);
6303 Ops[0] =
Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
6304 llvm::Constant::getNullValue(
Int64Ty));
6307 case NEON::BI__builtin_neon_vset_lane_i8:
6308 case NEON::BI__builtin_neon_vset_lane_i16:
6309 case NEON::BI__builtin_neon_vset_lane_i32:
6310 case NEON::BI__builtin_neon_vset_lane_i64:
6311 case NEON::BI__builtin_neon_vset_lane_bf16:
6312 case NEON::BI__builtin_neon_vset_lane_f32:
6313 case NEON::BI__builtin_neon_vsetq_lane_i8:
6314 case NEON::BI__builtin_neon_vsetq_lane_i16:
6315 case NEON::BI__builtin_neon_vsetq_lane_i32:
6316 case NEON::BI__builtin_neon_vsetq_lane_i64:
6317 case NEON::BI__builtin_neon_vsetq_lane_bf16:
6318 case NEON::BI__builtin_neon_vsetq_lane_f32:
6320 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6321 case NEON::BI__builtin_neon_vset_lane_f64:
6324 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(
DoubleTy, 1));
6326 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6327 case NEON::BI__builtin_neon_vset_lane_mf8:
6328 case NEON::BI__builtin_neon_vsetq_lane_mf8:
6333 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6334 case NEON::BI__builtin_neon_vsetq_lane_f64:
6337 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(
DoubleTy, 2));
6339 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vset_lane");
6341 case NEON::BI__builtin_neon_vget_lane_i8:
6342 case NEON::BI__builtin_neon_vdupb_lane_i8:
6344 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int8Ty, 8));
6347 case NEON::BI__builtin_neon_vgetq_lane_i8:
6348 case NEON::BI__builtin_neon_vdupb_laneq_i8:
6350 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int8Ty, 16));
6353 case NEON::BI__builtin_neon_vget_lane_mf8:
6354 case NEON::BI__builtin_neon_vdupb_lane_mf8:
6355 case NEON::BI__builtin_neon_vgetq_lane_mf8:
6356 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
6359 case NEON::BI__builtin_neon_vget_lane_i16:
6360 case NEON::BI__builtin_neon_vduph_lane_i16:
6362 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int16Ty, 4));
6365 case NEON::BI__builtin_neon_vgetq_lane_i16:
6366 case NEON::BI__builtin_neon_vduph_laneq_i16:
6368 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int16Ty, 8));
6371 case NEON::BI__builtin_neon_vget_lane_i32:
6372 case NEON::BI__builtin_neon_vdups_lane_i32:
6374 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int32Ty, 2));
6377 case NEON::BI__builtin_neon_vdups_lane_f32:
6379 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 2));
6382 case NEON::BI__builtin_neon_vgetq_lane_i32:
6383 case NEON::BI__builtin_neon_vdups_laneq_i32:
6385 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int32Ty, 4));
6388 case NEON::BI__builtin_neon_vget_lane_i64:
6389 case NEON::BI__builtin_neon_vdupd_lane_i64:
6391 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int64Ty, 1));
6394 case NEON::BI__builtin_neon_vdupd_lane_f64:
6396 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 1));
6399 case NEON::BI__builtin_neon_vgetq_lane_i64:
6400 case NEON::BI__builtin_neon_vdupd_laneq_i64:
6402 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
Int64Ty, 2));
6405 case NEON::BI__builtin_neon_vget_lane_f32:
6407 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 2));
6410 case NEON::BI__builtin_neon_vget_lane_f64:
6412 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 1));
6415 case NEON::BI__builtin_neon_vgetq_lane_f32:
6416 case NEON::BI__builtin_neon_vdups_laneq_f32:
6418 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
FloatTy, 4));
6421 case NEON::BI__builtin_neon_vgetq_lane_f64:
6422 case NEON::BI__builtin_neon_vdupd_laneq_f64:
6424 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(
DoubleTy, 2));
6427 case NEON::BI__builtin_neon_vaddh_f16:
6429 return Builder.CreateFAdd(Ops[0], Ops[1],
"vaddh");
6430 case NEON::BI__builtin_neon_vsubh_f16:
6432 return Builder.CreateFSub(Ops[0], Ops[1],
"vsubh");
6433 case NEON::BI__builtin_neon_vmulh_f16:
6435 return Builder.CreateFMul(Ops[0], Ops[1],
"vmulh");
6436 case NEON::BI__builtin_neon_vdivh_f16:
6438 return Builder.CreateFDiv(Ops[0], Ops[1],
"vdivh");
6439 case NEON::BI__builtin_neon_vfmah_f16:
6442 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
HalfTy,
6444 case NEON::BI__builtin_neon_vfmsh_f16: {
6449 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
HalfTy,
6452 case NEON::BI__builtin_neon_vaddd_s64:
6453 case NEON::BI__builtin_neon_vaddd_u64:
6455 case NEON::BI__builtin_neon_vsubd_s64:
6456 case NEON::BI__builtin_neon_vsubd_u64:
6458 case NEON::BI__builtin_neon_vqdmlalh_s16:
6459 case NEON::BI__builtin_neon_vqdmlslh_s16: {
6463 auto *VTy = llvm::FixedVectorType::get(
Int32Ty, 4);
6464 Ops[1] =
EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6465 ProductOps,
"vqdmlXl");
6466 Constant *CI = ConstantInt::get(
SizeTy, 0);
6467 Ops[1] =
Builder.CreateExtractElement(Ops[1], CI,
"lane0");
6469 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6470 ? Intrinsic::aarch64_neon_sqadd
6471 : Intrinsic::aarch64_neon_sqsub;
6474 case NEON::BI__builtin_neon_vqshlud_n_s64: {
6480 case NEON::BI__builtin_neon_vqshld_n_u64:
6481 case NEON::BI__builtin_neon_vqshld_n_s64: {
6482 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6483 ? Intrinsic::aarch64_neon_uqshl
6484 : Intrinsic::aarch64_neon_sqshl;
6489 case NEON::BI__builtin_neon_vrshrd_n_u64:
6490 case NEON::BI__builtin_neon_vrshrd_n_s64: {
6491 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6492 ? Intrinsic::aarch64_neon_urshl
6493 : Intrinsic::aarch64_neon_srshl;
6496 Ops[1] = ConstantInt::get(
Int64Ty, -SV);
6499 case NEON::BI__builtin_neon_vrsrad_n_u64:
6500 case NEON::BI__builtin_neon_vrsrad_n_s64: {
6501 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6502 ? Intrinsic::aarch64_neon_urshl
6503 : Intrinsic::aarch64_neon_srshl;
6507 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
6510 case NEON::BI__builtin_neon_vshld_n_s64:
6511 case NEON::BI__builtin_neon_vshld_n_u64: {
6514 Ops[0], ConstantInt::get(
Int64Ty, Amt->getZExtValue()),
"shld_n");
6516 case NEON::BI__builtin_neon_vshrd_n_s64: {
6519 Ops[0], ConstantInt::get(
Int64Ty, std::min(
static_cast<uint64_t
>(63),
6520 Amt->getZExtValue())),
6523 case NEON::BI__builtin_neon_vshrd_n_u64: {
6525 uint64_t ShiftAmt = Amt->getZExtValue();
6528 return ConstantInt::get(
Int64Ty, 0);
6529 return Builder.CreateLShr(Ops[0], ConstantInt::get(
Int64Ty, ShiftAmt),
6532 case NEON::BI__builtin_neon_vsrad_n_s64: {
6535 Ops[1], ConstantInt::get(
Int64Ty, std::min(
static_cast<uint64_t
>(63),
6536 Amt->getZExtValue())),
6538 return Builder.CreateAdd(Ops[0], Ops[1]);
6540 case NEON::BI__builtin_neon_vsrad_n_u64: {
6542 uint64_t ShiftAmt = Amt->getZExtValue();
6547 Ops[1] =
Builder.CreateLShr(Ops[1], ConstantInt::get(
Int64Ty, ShiftAmt),
6549 return Builder.CreateAdd(Ops[0], Ops[1]);
6551 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6552 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6553 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6554 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6560 auto *VTy = llvm::FixedVectorType::get(
Int32Ty, 4);
6561 Ops[1] =
EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6562 ProductOps,
"vqdmlXl");
6563 Constant *CI = ConstantInt::get(
SizeTy, 0);
6564 Ops[1] =
Builder.CreateExtractElement(Ops[1], CI,
"lane0");
6567 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6568 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6569 ? Intrinsic::aarch64_neon_sqadd
6570 : Intrinsic::aarch64_neon_sqsub;
6573 case NEON::BI__builtin_neon_vqdmlals_s32:
6574 case NEON::BI__builtin_neon_vqdmlsls_s32: {
6576 ProductOps.push_back(Ops[1]);
6579 EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6580 ProductOps,
"vqdmlXl");
6582 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6583 ? Intrinsic::aarch64_neon_sqadd
6584 : Intrinsic::aarch64_neon_sqsub;
6587 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6588 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6589 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6590 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6594 ProductOps.push_back(Ops[1]);
6595 ProductOps.push_back(Ops[2]);
6597 EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6598 ProductOps,
"vqdmlXl");
6601 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6602 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6603 ? Intrinsic::aarch64_neon_sqadd
6604 : Intrinsic::aarch64_neon_sqsub;
6607 case NEON::BI__builtin_neon_vget_lane_bf16:
6608 case NEON::BI__builtin_neon_vduph_lane_bf16:
6609 case NEON::BI__builtin_neon_vduph_lane_f16: {
6613 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6614 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6615 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6619 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6620 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6621 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
6622 return Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6624 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6626 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6627 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6628 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
6629 llvm::Value *Trunc =
6630 Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
6631 return Builder.CreateShuffleVector(
6632 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
6634 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6636 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
6638 std::iota(LoMask.begin(), LoMask.end(), 0);
6639 llvm::Type *V4F32 = FixedVectorType::get(
Builder.getFloatTy(), 4);
6640 llvm::Type *V4BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 4);
6641 llvm::Type *V8BF16 = FixedVectorType::get(
Builder.getBFloatTy(), 8);
6642 llvm::Value *Inactive =
Builder.CreateShuffleVector(
6643 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
6644 llvm::Value *Trunc =
6645 Builder.CreateFPTrunc(
Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
6646 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
6649 case clang::AArch64::BI_InterlockedAdd:
6650 case clang::AArch64::BI_InterlockedAdd_acq:
6651 case clang::AArch64::BI_InterlockedAdd_rel:
6652 case clang::AArch64::BI_InterlockedAdd_nf:
6653 case clang::AArch64::BI_InterlockedAdd64:
6654 case clang::AArch64::BI_InterlockedAdd64_acq:
6655 case clang::AArch64::BI_InterlockedAdd64_rel:
6656 case clang::AArch64::BI_InterlockedAdd64_nf: {
6659 llvm::AtomicOrdering Ordering;
6660 switch (BuiltinID) {
6661 case clang::AArch64::BI_InterlockedAdd:
6662 case clang::AArch64::BI_InterlockedAdd64:
6663 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6665 case clang::AArch64::BI_InterlockedAdd_acq:
6666 case clang::AArch64::BI_InterlockedAdd64_acq:
6667 Ordering = llvm::AtomicOrdering::Acquire;
6669 case clang::AArch64::BI_InterlockedAdd_rel:
6670 case clang::AArch64::BI_InterlockedAdd64_rel:
6671 Ordering = llvm::AtomicOrdering::Release;
6673 case clang::AArch64::BI_InterlockedAdd_nf:
6674 case clang::AArch64::BI_InterlockedAdd64_nf:
6675 Ordering = llvm::AtomicOrdering::Monotonic;
6678 llvm_unreachable(
"missing builtin ID in switch!");
6680 AtomicRMWInst *RMWI =
6681 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, Ordering);
6682 return Builder.CreateAdd(RMWI, Val);
6687 llvm::Type *Ty = VTy;
6706 bool ExtractLow =
false;
6707 bool ExtendLaneArg =
false;
6708 switch (BuiltinID) {
6709 default:
return nullptr;
6710 case NEON::BI__builtin_neon_vbsl_v:
6711 case NEON::BI__builtin_neon_vbslq_v: {
6712 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6713 Ops[0] =
Builder.CreateBitCast(Ops[0], BitTy,
"vbsl");
6714 Ops[1] =
Builder.CreateBitCast(Ops[1], BitTy,
"vbsl");
6715 Ops[2] =
Builder.CreateBitCast(Ops[2], BitTy,
"vbsl");
6717 Ops[1] =
Builder.CreateAnd(Ops[0], Ops[1],
"vbsl");
6718 Ops[2] =
Builder.CreateAnd(
Builder.CreateNot(Ops[0]), Ops[2],
"vbsl");
6719 Ops[0] =
Builder.CreateOr(Ops[1], Ops[2],
"vbsl");
6720 return Builder.CreateBitCast(Ops[0], Ty);
6722 case NEON::BI__builtin_neon_vfma_lane_v:
6723 case NEON::BI__builtin_neon_vfmaq_lane_v: {
6726 Value *Addend = Ops[0];
6727 Value *Multiplicand = Ops[1];
6728 Value *LaneSource = Ops[2];
6729 Ops[0] = Multiplicand;
6730 Ops[1] = LaneSource;
6734 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6735 ? llvm::FixedVectorType::get(VTy->getElementType(),
6736 VTy->getNumElements() / 2)
6739 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
6740 Ops[1] =
Builder.CreateBitCast(Ops[1], SourceTy);
6741 Ops[1] =
Builder.CreateShuffleVector(Ops[1], Ops[1], SV,
"lane");
6744 Int =
Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6748 case NEON::BI__builtin_neon_vfma_laneq_v: {
6751 if (VTy && VTy->getElementType() ==
DoubleTy) {
6754 llvm::FixedVectorType *VTy =
6756 Ops[2] =
Builder.CreateBitCast(Ops[2], VTy);
6757 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"extract");
6760 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
6761 DoubleTy, {Ops[1], Ops[2], Ops[0]});
6764 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6765 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6767 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
6768 VTy->getNumElements() * 2);
6769 Ops[2] =
Builder.CreateBitCast(Ops[2], STy);
6770 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
6772 Ops[2] =
Builder.CreateShuffleVector(Ops[2], Ops[2], SV,
"lane");
6775 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6776 {Ops[2], Ops[1], Ops[0]});
6778 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6779 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
6780 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
6782 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
6785 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6786 {Ops[2], Ops[1], Ops[0]});
6788 case NEON::BI__builtin_neon_vfmah_lane_f16:
6789 case NEON::BI__builtin_neon_vfmas_lane_f32:
6790 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6791 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6792 case NEON::BI__builtin_neon_vfmad_lane_f64:
6793 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6796 Ops[2] =
Builder.CreateExtractElement(Ops[2], Ops[3],
"extract");
6798 *
this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
6799 {Ops[1], Ops[2], Ops[0]});
6801 case NEON::BI__builtin_neon_vmull_v:
6803 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6804 if (
Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6806 case NEON::BI__builtin_neon_vmax_v:
6807 case NEON::BI__builtin_neon_vmaxq_v:
6809 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6810 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6812 case NEON::BI__builtin_neon_vmaxh_f16: {
6814 Int = Intrinsic::aarch64_neon_fmax;
6817 case NEON::BI__builtin_neon_vmin_v:
6818 case NEON::BI__builtin_neon_vminq_v:
6820 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6821 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6823 case NEON::BI__builtin_neon_vminh_f16: {
6825 Int = Intrinsic::aarch64_neon_fmin;
6828 case NEON::BI__builtin_neon_vabd_v:
6829 case NEON::BI__builtin_neon_vabdq_v:
6831 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6832 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6834 case NEON::BI__builtin_neon_vpadal_v:
6835 case NEON::BI__builtin_neon_vpadalq_v: {
6836 unsigned ArgElts = VTy->getNumElements();
6838 unsigned BitWidth = EltTy->getBitWidth();
6839 auto *ArgTy = llvm::FixedVectorType::get(
6840 llvm::IntegerType::get(
getLLVMContext(), BitWidth / 2), 2 * ArgElts);
6841 llvm::Type* Tys[2] = { VTy, ArgTy };
6842 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6844 TmpOps.push_back(Ops[1]);
6847 llvm::Value *addend =
Builder.CreateBitCast(Ops[0], tmp->getType());
6848 return Builder.CreateAdd(tmp, addend);
6850 case NEON::BI__builtin_neon_vpmin_v:
6851 case NEON::BI__builtin_neon_vpminq_v:
6853 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6854 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6856 case NEON::BI__builtin_neon_vpmax_v:
6857 case NEON::BI__builtin_neon_vpmaxq_v:
6859 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6860 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6862 case NEON::BI__builtin_neon_vminnm_v:
6863 case NEON::BI__builtin_neon_vminnmq_v:
6864 Int = Intrinsic::aarch64_neon_fminnm;
6866 case NEON::BI__builtin_neon_vminnmh_f16:
6868 Int = Intrinsic::aarch64_neon_fminnm;
6870 case NEON::BI__builtin_neon_vmaxnm_v:
6871 case NEON::BI__builtin_neon_vmaxnmq_v:
6872 Int = Intrinsic::aarch64_neon_fmaxnm;
6874 case NEON::BI__builtin_neon_vmaxnmh_f16:
6876 Int = Intrinsic::aarch64_neon_fmaxnm;
6878 case NEON::BI__builtin_neon_vrecpss_f32: {
6883 case NEON::BI__builtin_neon_vrecpsd_f64:
6887 case NEON::BI__builtin_neon_vrecpsh_f16:
6891 case NEON::BI__builtin_neon_vqshrun_n_v:
6892 Int = Intrinsic::aarch64_neon_sqshrun;
6894 case NEON::BI__builtin_neon_vqrshrun_n_v:
6895 Int = Intrinsic::aarch64_neon_sqrshrun;
6897 case NEON::BI__builtin_neon_vqshrn_n_v:
6898 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6900 case NEON::BI__builtin_neon_vrshrn_n_v:
6901 Int = Intrinsic::aarch64_neon_rshrn;
6903 case NEON::BI__builtin_neon_vqrshrn_n_v:
6904 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6906 case NEON::BI__builtin_neon_vrndah_f16: {
6908 Int =
Builder.getIsFPConstrained()
6909 ? Intrinsic::experimental_constrained_round
6913 case NEON::BI__builtin_neon_vrnda_v:
6914 case NEON::BI__builtin_neon_vrndaq_v: {
6915 Int =
Builder.getIsFPConstrained()
6916 ? Intrinsic::experimental_constrained_round
6920 case NEON::BI__builtin_neon_vrndih_f16: {
6922 Int =
Builder.getIsFPConstrained()
6923 ? Intrinsic::experimental_constrained_nearbyint
6924 : Intrinsic::nearbyint;
6927 case NEON::BI__builtin_neon_vrndmh_f16: {
6929 Int =
Builder.getIsFPConstrained()
6930 ? Intrinsic::experimental_constrained_floor
6934 case NEON::BI__builtin_neon_vrndm_v:
6935 case NEON::BI__builtin_neon_vrndmq_v: {
6936 Int =
Builder.getIsFPConstrained()
6937 ? Intrinsic::experimental_constrained_floor
6941 case NEON::BI__builtin_neon_vrndnh_f16: {
6943 Int =
Builder.getIsFPConstrained()
6944 ? Intrinsic::experimental_constrained_roundeven
6945 : Intrinsic::roundeven;
6948 case NEON::BI__builtin_neon_vrndn_v:
6949 case NEON::BI__builtin_neon_vrndnq_v: {
6950 Int =
Builder.getIsFPConstrained()
6951 ? Intrinsic::experimental_constrained_roundeven
6952 : Intrinsic::roundeven;
6955 case NEON::BI__builtin_neon_vrndns_f32: {
6957 Int =
Builder.getIsFPConstrained()
6958 ? Intrinsic::experimental_constrained_roundeven
6959 : Intrinsic::roundeven;
6962 case NEON::BI__builtin_neon_vrndph_f16: {
6964 Int =
Builder.getIsFPConstrained()
6965 ? Intrinsic::experimental_constrained_ceil
6969 case NEON::BI__builtin_neon_vrndp_v:
6970 case NEON::BI__builtin_neon_vrndpq_v: {
6971 Int =
Builder.getIsFPConstrained()
6972 ? Intrinsic::experimental_constrained_ceil
6976 case NEON::BI__builtin_neon_vrndxh_f16: {
6978 Int =
Builder.getIsFPConstrained()
6979 ? Intrinsic::experimental_constrained_rint
6983 case NEON::BI__builtin_neon_vrndx_v:
6984 case NEON::BI__builtin_neon_vrndxq_v: {
6985 Int =
Builder.getIsFPConstrained()
6986 ? Intrinsic::experimental_constrained_rint
6990 case NEON::BI__builtin_neon_vrndh_f16: {
6992 Int =
Builder.getIsFPConstrained()
6993 ? Intrinsic::experimental_constrained_trunc
6997 case NEON::BI__builtin_neon_vrnd32x_f32:
6998 case NEON::BI__builtin_neon_vrnd32xq_f32:
6999 case NEON::BI__builtin_neon_vrnd32x_f64:
7000 case NEON::BI__builtin_neon_vrnd32xq_f64: {
7002 Int = Intrinsic::aarch64_neon_frint32x;
7005 case NEON::BI__builtin_neon_vrnd32z_f32:
7006 case NEON::BI__builtin_neon_vrnd32zq_f32:
7007 case NEON::BI__builtin_neon_vrnd32z_f64:
7008 case NEON::BI__builtin_neon_vrnd32zq_f64: {
7010 Int = Intrinsic::aarch64_neon_frint32z;
7013 case NEON::BI__builtin_neon_vrnd64x_f32:
7014 case NEON::BI__builtin_neon_vrnd64xq_f32:
7015 case NEON::BI__builtin_neon_vrnd64x_f64:
7016 case NEON::BI__builtin_neon_vrnd64xq_f64: {
7018 Int = Intrinsic::aarch64_neon_frint64x;
7021 case NEON::BI__builtin_neon_vrnd64z_f32:
7022 case NEON::BI__builtin_neon_vrnd64zq_f32:
7023 case NEON::BI__builtin_neon_vrnd64z_f64:
7024 case NEON::BI__builtin_neon_vrnd64zq_f64: {
7026 Int = Intrinsic::aarch64_neon_frint64z;
7029 case NEON::BI__builtin_neon_vrnd_v:
7030 case NEON::BI__builtin_neon_vrndq_v: {
7031 Int =
Builder.getIsFPConstrained()
7032 ? Intrinsic::experimental_constrained_trunc
7036 case NEON::BI__builtin_neon_vcvt_f64_v:
7037 case NEON::BI__builtin_neon_vcvtq_f64_v:
7038 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
7040 return usgn ?
Builder.CreateUIToFP(Ops[0], Ty,
"vcvt")
7041 :
Builder.CreateSIToFP(Ops[0], Ty,
"vcvt");
7042 case NEON::BI__builtin_neon_vcvt_f64_f32: {
7044 "unexpected vcvt_f64_f32 builtin");
7048 return Builder.CreateFPExt(Ops[0], Ty,
"vcvt");
7050 case NEON::BI__builtin_neon_vcvt_f32_f64: {
7052 "unexpected vcvt_f32_f64 builtin");
7056 return Builder.CreateFPTrunc(Ops[0], Ty,
"vcvt");
7058 case NEON::BI__builtin_neon_vcvt_s32_v:
7059 case NEON::BI__builtin_neon_vcvt_u32_v:
7060 case NEON::BI__builtin_neon_vcvt_s64_v:
7061 case NEON::BI__builtin_neon_vcvt_u64_v:
7062 case NEON::BI__builtin_neon_vcvt_s16_f16:
7063 case NEON::BI__builtin_neon_vcvt_u16_f16:
7064 case NEON::BI__builtin_neon_vcvtq_s32_v:
7065 case NEON::BI__builtin_neon_vcvtq_u32_v:
7066 case NEON::BI__builtin_neon_vcvtq_s64_v:
7067 case NEON::BI__builtin_neon_vcvtq_u64_v:
7068 case NEON::BI__builtin_neon_vcvtq_s16_f16:
7069 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
7071 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
7075 case NEON::BI__builtin_neon_vcvta_s16_f16:
7076 case NEON::BI__builtin_neon_vcvta_u16_f16:
7077 case NEON::BI__builtin_neon_vcvta_s32_v:
7078 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
7079 case NEON::BI__builtin_neon_vcvtaq_s32_v:
7080 case NEON::BI__builtin_neon_vcvta_u32_v:
7081 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
7082 case NEON::BI__builtin_neon_vcvtaq_u32_v:
7083 case NEON::BI__builtin_neon_vcvta_s64_v:
7084 case NEON::BI__builtin_neon_vcvtaq_s64_v:
7085 case NEON::BI__builtin_neon_vcvta_u64_v:
7086 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
7087 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
7091 case NEON::BI__builtin_neon_vcvtm_s16_f16:
7092 case NEON::BI__builtin_neon_vcvtm_s32_v:
7093 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7094 case NEON::BI__builtin_neon_vcvtmq_s32_v:
7095 case NEON::BI__builtin_neon_vcvtm_u16_f16:
7096 case NEON::BI__builtin_neon_vcvtm_u32_v:
7097 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7098 case NEON::BI__builtin_neon_vcvtmq_u32_v:
7099 case NEON::BI__builtin_neon_vcvtm_s64_v:
7100 case NEON::BI__builtin_neon_vcvtmq_s64_v:
7101 case NEON::BI__builtin_neon_vcvtm_u64_v:
7102 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7103 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
7107 case NEON::BI__builtin_neon_vcvtn_s16_f16:
7108 case NEON::BI__builtin_neon_vcvtn_s32_v:
7109 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7110 case NEON::BI__builtin_neon_vcvtnq_s32_v:
7111 case NEON::BI__builtin_neon_vcvtn_u16_f16:
7112 case NEON::BI__builtin_neon_vcvtn_u32_v:
7113 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7114 case NEON::BI__builtin_neon_vcvtnq_u32_v:
7115 case NEON::BI__builtin_neon_vcvtn_s64_v:
7116 case NEON::BI__builtin_neon_vcvtnq_s64_v:
7117 case NEON::BI__builtin_neon_vcvtn_u64_v:
7118 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
7119 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
7123 case NEON::BI__builtin_neon_vcvtp_s16_f16:
7124 case NEON::BI__builtin_neon_vcvtp_s32_v:
7125 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7126 case NEON::BI__builtin_neon_vcvtpq_s32_v:
7127 case NEON::BI__builtin_neon_vcvtp_u16_f16:
7128 case NEON::BI__builtin_neon_vcvtp_u32_v:
7129 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7130 case NEON::BI__builtin_neon_vcvtpq_u32_v:
7131 case NEON::BI__builtin_neon_vcvtp_s64_v:
7132 case NEON::BI__builtin_neon_vcvtpq_s64_v:
7133 case NEON::BI__builtin_neon_vcvtp_u64_v:
7134 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
7135 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
7139 case NEON::BI__builtin_neon_vmulx_v:
7140 case NEON::BI__builtin_neon_vmulxq_v: {
7141 Int = Intrinsic::aarch64_neon_fmulx;
7144 case NEON::BI__builtin_neon_vmulxh_lane_f16:
7145 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
7149 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2],
"extract");
7151 Int = Intrinsic::aarch64_neon_fmulx;
7154 case NEON::BI__builtin_neon_vmul_lane_v:
7155 case NEON::BI__builtin_neon_vmul_laneq_v: {
7158 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
7161 llvm::FixedVectorType *VTy =
7163 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
7164 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2],
"extract");
7168 case NEON::BI__builtin_neon_vnegd_s64:
7170 case NEON::BI__builtin_neon_vnegh_f16:
7172 case NEON::BI__builtin_neon_vpmaxnm_v:
7173 case NEON::BI__builtin_neon_vpmaxnmq_v: {
7174 Int = Intrinsic::aarch64_neon_fmaxnmp;
7177 case NEON::BI__builtin_neon_vpminnm_v:
7178 case NEON::BI__builtin_neon_vpminnmq_v: {
7179 Int = Intrinsic::aarch64_neon_fminnmp;
7182 case NEON::BI__builtin_neon_vsqrth_f16: {
7184 Int =
Builder.getIsFPConstrained()
7185 ? Intrinsic::experimental_constrained_sqrt
7189 case NEON::BI__builtin_neon_vsqrt_v:
7190 case NEON::BI__builtin_neon_vsqrtq_v: {
7191 Int =
Builder.getIsFPConstrained()
7192 ? Intrinsic::experimental_constrained_sqrt
7194 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
7197 case NEON::BI__builtin_neon_vrbit_v:
7198 case NEON::BI__builtin_neon_vrbitq_v: {
7199 Int = Intrinsic::bitreverse;
7202 case NEON::BI__builtin_neon_vmaxv_f16: {
7203 Int = Intrinsic::aarch64_neon_fmaxv;
7205 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7206 llvm::Type *Tys[2] = { Ty, VTy };
7211 case NEON::BI__builtin_neon_vmaxvq_f16: {
7212 Int = Intrinsic::aarch64_neon_fmaxv;
7214 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7215 llvm::Type *Tys[2] = { Ty, VTy };
7220 case NEON::BI__builtin_neon_vminv_f16: {
7221 Int = Intrinsic::aarch64_neon_fminv;
7223 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7224 llvm::Type *Tys[2] = { Ty, VTy };
7229 case NEON::BI__builtin_neon_vminvq_f16: {
7230 Int = Intrinsic::aarch64_neon_fminv;
7232 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7233 llvm::Type *Tys[2] = { Ty, VTy };
7238 case NEON::BI__builtin_neon_vmaxnmv_f16: {
7239 Int = Intrinsic::aarch64_neon_fmaxnmv;
7241 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7242 llvm::Type *Tys[2] = { Ty, VTy };
7247 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
7248 Int = Intrinsic::aarch64_neon_fmaxnmv;
7250 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7251 llvm::Type *Tys[2] = { Ty, VTy };
7256 case NEON::BI__builtin_neon_vminnmv_f16: {
7257 Int = Intrinsic::aarch64_neon_fminnmv;
7259 VTy = llvm::FixedVectorType::get(
HalfTy, 4);
7260 llvm::Type *Tys[2] = { Ty, VTy };
7265 case NEON::BI__builtin_neon_vminnmvq_f16: {
7266 Int = Intrinsic::aarch64_neon_fminnmv;
7268 VTy = llvm::FixedVectorType::get(
HalfTy, 8);
7269 llvm::Type *Tys[2] = { Ty, VTy };
7274 case NEON::BI__builtin_neon_vmul_n_f64: {
7277 return Builder.CreateFMul(Ops[0], RHS);
7279 case NEON::BI__builtin_neon_vaddlv_u8: {
7280 Int = Intrinsic::aarch64_neon_uaddlv;
7282 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7283 llvm::Type *Tys[2] = { Ty, VTy };
7288 case NEON::BI__builtin_neon_vaddlv_u16: {
7289 Int = Intrinsic::aarch64_neon_uaddlv;
7291 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7292 llvm::Type *Tys[2] = { Ty, VTy };
7296 case NEON::BI__builtin_neon_vaddlvq_u8: {
7297 Int = Intrinsic::aarch64_neon_uaddlv;
7299 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7300 llvm::Type *Tys[2] = { Ty, VTy };
7305 case NEON::BI__builtin_neon_vaddlvq_u16: {
7306 Int = Intrinsic::aarch64_neon_uaddlv;
7308 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7309 llvm::Type *Tys[2] = { Ty, VTy };
7313 case NEON::BI__builtin_neon_vaddlv_s8: {
7314 Int = Intrinsic::aarch64_neon_saddlv;
7316 VTy = llvm::FixedVectorType::get(
Int8Ty, 8);
7317 llvm::Type *Tys[2] = { Ty, VTy };
7322 case NEON::BI__builtin_neon_vaddlv_s16: {
7323 Int = Intrinsic::aarch64_neon_saddlv;
7325 VTy = llvm::FixedVectorType::get(
Int16Ty, 4);
7326 llvm::Type *Tys[2] = { Ty, VTy };
7330 case NEON::BI__builtin_neon_vaddlvq_s8: {
7331 Int = Intrinsic::aarch64_neon_saddlv;
7333 VTy = llvm::FixedVectorType::get(
Int8Ty, 16);
7334 llvm::Type *Tys[2] = { Ty, VTy };
7339 case NEON::BI__builtin_neon_vaddlvq_s16: {
7340 Int = Intrinsic::aarch64_neon_saddlv;
7342 VTy = llvm::FixedVectorType::get(
Int16Ty, 8);
7343 llvm::Type *Tys[2] = { Ty, VTy };
7347 case NEON::BI__builtin_neon_vsri_n_v:
7348 case NEON::BI__builtin_neon_vsriq_n_v: {
7349 Int = Intrinsic::aarch64_neon_vsri;
7350 llvm::Function *Intrin =
CGM.getIntrinsic(Int, Ty);
7353 case NEON::BI__builtin_neon_vsli_n_v:
7354 case NEON::BI__builtin_neon_vsliq_n_v: {
7355 Int = Intrinsic::aarch64_neon_vsli;
7356 llvm::Function *Intrin =
CGM.getIntrinsic(Int, Ty);
7359 case NEON::BI__builtin_neon_vsra_n_v:
7360 case NEON::BI__builtin_neon_vsraq_n_v:
7361 Ops[0] =
Builder.CreateBitCast(Ops[0], Ty);
7363 return Builder.CreateAdd(Ops[0], Ops[1]);
7364 case NEON::BI__builtin_neon_vrsra_n_v:
7365 case NEON::BI__builtin_neon_vrsraq_n_v: {
7366 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
7368 TmpOps.push_back(Ops[1]);
7369 TmpOps.push_back(Ops[2]);
7371 llvm::Value *tmp =
EmitNeonCall(F, TmpOps,
"vrshr_n", 1,
true);
7372 Ops[0] =
Builder.CreateBitCast(Ops[0], VTy);
7373 return Builder.CreateAdd(Ops[0], tmp);
7375 case NEON::BI__builtin_neon_vld1_v:
7376 case NEON::BI__builtin_neon_vld1q_v: {
7379 case NEON::BI__builtin_neon_vst1_v:
7380 case NEON::BI__builtin_neon_vst1q_v:
7381 Ops[1] =
Builder.CreateBitCast(Ops[1], VTy);
7383 case NEON::BI__builtin_neon_vld1_lane_v:
7384 case NEON::BI__builtin_neon_vld1q_lane_v: {
7385 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7386 Ops[0] =
Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7388 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vld1_lane");
7390 case NEON::BI__builtin_neon_vldap1_lane_s64:
7391 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
7392 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7393 llvm::LoadInst *LI =
Builder.CreateAlignedLoad(
7395 LI->setAtomic(llvm::AtomicOrdering::Acquire);
7397 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2],
"vldap1_lane");
7399 case NEON::BI__builtin_neon_vld1_dup_v:
7400 case NEON::BI__builtin_neon_vld1q_dup_v: {
7401 Value *
V = PoisonValue::get(Ty);
7402 Ops[0] =
Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
7404 llvm::Constant *CI = ConstantInt::get(
Int32Ty, 0);
7405 Ops[0] =
Builder.CreateInsertElement(
V, Ops[0], CI);
7408 case NEON::BI__builtin_neon_vst1_lane_v:
7409 case NEON::BI__builtin_neon_vst1q_lane_v:
7410 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7411 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
7413 case NEON::BI__builtin_neon_vstl1_lane_s64:
7414 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
7415 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7416 Ops[1] =
Builder.CreateExtractElement(Ops[1], Ops[2]);
7417 llvm::StoreInst *SI =
7419 SI->setAtomic(llvm::AtomicOrdering::Release);
7422 case NEON::BI__builtin_neon_vld2_v:
7423 case NEON::BI__builtin_neon_vld2q_v: {
7425 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
7426 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld2");
7427 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7429 case NEON::BI__builtin_neon_vld3_v:
7430 case NEON::BI__builtin_neon_vld3q_v: {
7432 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
7433 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld3");
7434 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7436 case NEON::BI__builtin_neon_vld4_v:
7437 case NEON::BI__builtin_neon_vld4q_v: {
7439 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
7440 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld4");
7441 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7443 case NEON::BI__builtin_neon_vld2_dup_v:
7444 case NEON::BI__builtin_neon_vld2q_dup_v: {
7446 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
7447 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld2");
7448 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7450 case NEON::BI__builtin_neon_vld3_dup_v:
7451 case NEON::BI__builtin_neon_vld3q_dup_v: {
7453 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
7454 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld3");
7455 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7457 case NEON::BI__builtin_neon_vld4_dup_v:
7458 case NEON::BI__builtin_neon_vld4q_dup_v: {
7460 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
7461 Ops[1] =
Builder.CreateCall(F, Ops[1],
"vld4");
7462 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7464 case NEON::BI__builtin_neon_vld2_lane_v:
7465 case NEON::BI__builtin_neon_vld2q_lane_v: {
7466 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7467 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
7468 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7469 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7470 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7473 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7475 case NEON::BI__builtin_neon_vld3_lane_v:
7476 case NEON::BI__builtin_neon_vld3q_lane_v: {
7477 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7478 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
7479 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7480 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7481 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7482 Ops[3] =
Builder.CreateBitCast(Ops[3], Ty);
7485 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7487 case NEON::BI__builtin_neon_vld4_lane_v:
7488 case NEON::BI__builtin_neon_vld4q_lane_v: {
7489 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7490 Function *F =
CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
7491 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
7492 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7493 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7494 Ops[3] =
Builder.CreateBitCast(Ops[3], Ty);
7495 Ops[4] =
Builder.CreateBitCast(Ops[4], Ty);
7498 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7500 case NEON::BI__builtin_neon_vst2_v:
7501 case NEON::BI__builtin_neon_vst2q_v: {
7502 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7503 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7504 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
7507 case NEON::BI__builtin_neon_vst2_lane_v:
7508 case NEON::BI__builtin_neon_vst2q_lane_v: {
7509 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7511 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7512 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
7515 case NEON::BI__builtin_neon_vst3_v:
7516 case NEON::BI__builtin_neon_vst3q_v: {
7517 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7518 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7519 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
7522 case NEON::BI__builtin_neon_vst3_lane_v:
7523 case NEON::BI__builtin_neon_vst3q_lane_v: {
7524 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7526 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7527 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
7530 case NEON::BI__builtin_neon_vst4_v:
7531 case NEON::BI__builtin_neon_vst4q_v: {
7532 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7533 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7534 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
7537 case NEON::BI__builtin_neon_vst4_lane_v:
7538 case NEON::BI__builtin_neon_vst4q_lane_v: {
7539 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7541 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7542 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
7545 case NEON::BI__builtin_neon_vtrn_v:
7546 case NEON::BI__builtin_neon_vtrnq_v: {
7547 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7548 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7549 Value *SV =
nullptr;
7551 for (
unsigned vi = 0; vi != 2; ++vi) {
7553 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7554 Indices.push_back(i+vi);
7555 Indices.push_back(i+e+vi);
7558 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vtrn");
7563 case NEON::BI__builtin_neon_vuzp_v:
7564 case NEON::BI__builtin_neon_vuzpq_v: {
7565 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7566 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7567 Value *SV =
nullptr;
7569 for (
unsigned vi = 0; vi != 2; ++vi) {
7571 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7572 Indices.push_back(2*i+vi);
7575 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vuzp");
7580 case NEON::BI__builtin_neon_vzip_v:
7581 case NEON::BI__builtin_neon_vzipq_v: {
7582 Ops[1] =
Builder.CreateBitCast(Ops[1], Ty);
7583 Ops[2] =
Builder.CreateBitCast(Ops[2], Ty);
7584 Value *SV =
nullptr;
7586 for (
unsigned vi = 0; vi != 2; ++vi) {
7588 for (
unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7589 Indices.push_back((i + vi*e) >> 1);
7590 Indices.push_back(((i + vi*e) >> 1)+e);
7593 SV =
Builder.CreateShuffleVector(Ops[1], Ops[2], Indices,
"vzip");
7598 case NEON::BI__builtin_neon_vqtbl1q_v: {
7599 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7602 case NEON::BI__builtin_neon_vqtbl2q_v: {
7603 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7606 case NEON::BI__builtin_neon_vqtbl3q_v: {
7607 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7610 case NEON::BI__builtin_neon_vqtbl4q_v: {
7611 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7614 case NEON::BI__builtin_neon_vqtbx1q_v: {
7615 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7618 case NEON::BI__builtin_neon_vqtbx2q_v: {
7619 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7622 case NEON::BI__builtin_neon_vqtbx3q_v: {
7623 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7626 case NEON::BI__builtin_neon_vqtbx4q_v: {
7627 return EmitNeonCall(
CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7630 case NEON::BI__builtin_neon_vsqadd_v:
7631 case NEON::BI__builtin_neon_vsqaddq_v: {
7632 Int = Intrinsic::aarch64_neon_usqadd;
7635 case NEON::BI__builtin_neon_vuqadd_v:
7636 case NEON::BI__builtin_neon_vuqaddq_v: {
7637 Int = Intrinsic::aarch64_neon_suqadd;
7641 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
7642 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
7643 case NEON::BI__builtin_neon_vluti2_laneq_f16:
7644 case NEON::BI__builtin_neon_vluti2_laneq_p16:
7645 case NEON::BI__builtin_neon_vluti2_laneq_p8:
7646 case NEON::BI__builtin_neon_vluti2_laneq_s16:
7647 case NEON::BI__builtin_neon_vluti2_laneq_s8:
7648 case NEON::BI__builtin_neon_vluti2_laneq_u16:
7649 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
7650 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7657 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
7658 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
7659 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
7660 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
7661 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
7662 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
7663 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
7664 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
7665 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
7666 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7673 case NEON::BI__builtin_neon_vluti2_lane_mf8:
7674 case NEON::BI__builtin_neon_vluti2_lane_bf16:
7675 case NEON::BI__builtin_neon_vluti2_lane_f16:
7676 case NEON::BI__builtin_neon_vluti2_lane_p16:
7677 case NEON::BI__builtin_neon_vluti2_lane_p8:
7678 case NEON::BI__builtin_neon_vluti2_lane_s16:
7679 case NEON::BI__builtin_neon_vluti2_lane_s8:
7680 case NEON::BI__builtin_neon_vluti2_lane_u16:
7681 case NEON::BI__builtin_neon_vluti2_lane_u8: {
7682 Int = Intrinsic::aarch64_neon_vluti2_lane;
7689 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
7690 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
7691 case NEON::BI__builtin_neon_vluti2q_lane_f16:
7692 case NEON::BI__builtin_neon_vluti2q_lane_p16:
7693 case NEON::BI__builtin_neon_vluti2q_lane_p8:
7694 case NEON::BI__builtin_neon_vluti2q_lane_s16:
7695 case NEON::BI__builtin_neon_vluti2q_lane_s8:
7696 case NEON::BI__builtin_neon_vluti2q_lane_u16:
7697 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
7698 Int = Intrinsic::aarch64_neon_vluti2_lane;
7705 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
7706 case NEON::BI__builtin_neon_vluti4q_lane_p8:
7707 case NEON::BI__builtin_neon_vluti4q_lane_s8:
7708 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
7709 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7712 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7713 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7714 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7715 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7716 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7719 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7720 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7721 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7722 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7723 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7724 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7725 return EmitNeonCall(
CGM.getIntrinsic(Int, Ty), Ops,
"vluti4q_lane_x2");
7727 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7728 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7729 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7730 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7731 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7732 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7733 return EmitNeonCall(
CGM.getIntrinsic(Int, Ty), Ops,
"vluti4q_laneq_x2");
7735 case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
7737 {llvm::FixedVectorType::get(
HalfTy, 8),
7738 llvm::FixedVectorType::get(
Int8Ty, 16)},
7740 case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
7742 {llvm::FixedVectorType::get(
FloatTy, 4),
7743 llvm::FixedVectorType::get(
Int8Ty, 16)},
7745 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7748 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7749 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7751 llvm::FixedVectorType::get(
BFloatTy, 8),
7752 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt1");
7753 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7756 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7757 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7759 llvm::FixedVectorType::get(
BFloatTy, 8),
7760 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt2");
7761 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7764 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7765 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7767 llvm::FixedVectorType::get(
HalfTy, 8),
7768 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt1");
7769 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7772 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7773 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7775 llvm::FixedVectorType::get(
HalfTy, 8),
7776 Ops[0]->
getType(), ExtractLow, Ops, E,
"vbfcvt2");
7777 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7779 llvm::FixedVectorType::get(
Int8Ty, 8),
7780 Ops[0]->
getType(),
false, Ops, E,
"vfcvtn");
7781 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7783 llvm::FixedVectorType::get(
Int8Ty, 8),
7784 llvm::FixedVectorType::get(
HalfTy, 4),
false, Ops,
7786 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7788 llvm::FixedVectorType::get(
Int8Ty, 16),
7789 llvm::FixedVectorType::get(
HalfTy, 8),
false, Ops,
7791 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7792 llvm::Type *Ty = llvm::FixedVectorType::get(
Int8Ty, 16);
7793 Ops[0] =
Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
7796 Ops[1]->
getType(),
false, Ops, E,
"vfcvtn2");
7799 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7800 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7803 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7804 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7805 ExtendLaneArg =
true;
7807 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7808 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7810 ExtendLaneArg,
HalfTy, Ops, E,
"fdot2_lane");
7811 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7812 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7815 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7816 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7817 ExtendLaneArg =
true;
7819 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7820 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7822 ExtendLaneArg,
FloatTy, Ops, E,
"fdot4_lane");
7824 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7826 {llvm::FixedVectorType::get(
HalfTy, 8)}, Ops, E,
7828 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7830 {llvm::FixedVectorType::get(
HalfTy, 8)}, Ops, E,
7832 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7834 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7836 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7838 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7840 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7842 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7844 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7846 {llvm::FixedVectorType::get(
FloatTy, 4)}, Ops, E,
7848 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7849 ExtendLaneArg =
true;
7851 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7853 ExtendLaneArg,
HalfTy, Ops, E,
"vmlal_lane");
7854 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7855 ExtendLaneArg =
true;
7857 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7859 ExtendLaneArg,
HalfTy, Ops, E,
"vmlal_lane");
7860 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7861 ExtendLaneArg =
true;
7863 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7865 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7866 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7867 ExtendLaneArg =
true;
7869 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7871 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7872 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7873 ExtendLaneArg =
true;
7875 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7877 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7878 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7879 ExtendLaneArg =
true;
7881 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7883 ExtendLaneArg,
FloatTy, Ops, E,
"vmlall_lane");
7884 case NEON::BI__builtin_neon_vamin_f16:
7885 case NEON::BI__builtin_neon_vaminq_f16:
7886 case NEON::BI__builtin_neon_vamin_f32:
7887 case NEON::BI__builtin_neon_vaminq_f32:
7888 case NEON::BI__builtin_neon_vaminq_f64: {
7889 Int = Intrinsic::aarch64_neon_famin;
7892 case NEON::BI__builtin_neon_vamax_f16:
7893 case NEON::BI__builtin_neon_vamaxq_f16:
7894 case NEON::BI__builtin_neon_vamax_f32:
7895 case NEON::BI__builtin_neon_vamaxq_f32:
7896 case NEON::BI__builtin_neon_vamaxq_f64: {
7897 Int = Intrinsic::aarch64_neon_famax;
7900 case NEON::BI__builtin_neon_vscale_f16:
7901 case NEON::BI__builtin_neon_vscaleq_f16:
7902 case NEON::BI__builtin_neon_vscale_f32:
7903 case NEON::BI__builtin_neon_vscaleq_f32:
7904 case NEON::BI__builtin_neon_vscaleq_f64: {
7905 Int = Intrinsic::aarch64_neon_fp8_fscale;