11#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
14#ifndef __AVX512VLDQINTRIN_H
15#define __AVX512VLDQINTRIN_H
18#define __DEFAULT_FN_ATTRS128 \
19 __attribute__((__always_inline__, __nodebug__, \
20 __target__("avx512vl,avx512dq,no-evex512"), \
21 __min_vector_width__(128)))
22#define __DEFAULT_FN_ATTRS256 \
23 __attribute__((__always_inline__, __nodebug__, \
24 __target__("avx512vl,avx512dq,no-evex512"), \
25 __min_vector_width__(256)))
29 return (__m256i) ((__v4du) __A * (__v4du) __B);
34 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
41 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
48 return (__m128i) ((__v2du) __A * (__v2du) __B);
53 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
60 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
67 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
74 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
81 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
88 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
95 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
102 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
109 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
116 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
123 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
130 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
137 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
144 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
151 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
158 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
165 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
172 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
179 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
186 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
193 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
200 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
207 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
214 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
221 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
228 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
235 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
242 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
249 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
256 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
263 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
270 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
277 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
284 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
291 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
298 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
305 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
312 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
319 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
326 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
333 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
340 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
347 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
354 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
361 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
368 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
375 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
382 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
389 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
396 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
403 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
410 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
417 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
424 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
431 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
438 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
445 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
452 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
459 return (__m128d)__builtin_convertvector((__v2di)__A, __v2df);
464 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
471 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
478 return (__m256d)__builtin_convertvector((__v4di)__A, __v4df);
483 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
490 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
497 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
504 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
511 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
518 return (__m128)__builtin_convertvector((__v4di)__A, __v4sf);
523 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
530 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
537 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
544 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
551 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
558 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
565 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
572 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
579 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
586 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
593 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
600 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
607 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
614 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
621 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
628 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
635 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
642 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
649 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
656 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
663 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
670 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
677 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
684 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
691 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
698 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
705 return (__m128d)__builtin_convertvector((__v2du)__A, __v2df);
710 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
717 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
724 return (__m256d)__builtin_convertvector((__v4du)__A, __v4df);
729 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
736 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
743 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
750 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
757 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
764 return (__m128)__builtin_convertvector((__v4du)__A, __v4sf);
769 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
776 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
781#define _mm_range_pd(A, B, C) \
782 ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
783 (__v2df)(__m128d)(B), (int)(C), \
784 (__v2df)_mm_setzero_pd(), \
787#define _mm_mask_range_pd(W, U, A, B, C) \
788 ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
789 (__v2df)(__m128d)(B), (int)(C), \
790 (__v2df)(__m128d)(W), \
793#define _mm_maskz_range_pd(U, A, B, C) \
794 ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
795 (__v2df)(__m128d)(B), (int)(C), \
796 (__v2df)_mm_setzero_pd(), \
799#define _mm256_range_pd(A, B, C) \
800 ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
801 (__v4df)(__m256d)(B), (int)(C), \
802 (__v4df)_mm256_setzero_pd(), \
805#define _mm256_mask_range_pd(W, U, A, B, C) \
806 ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
807 (__v4df)(__m256d)(B), (int)(C), \
808 (__v4df)(__m256d)(W), \
811#define _mm256_maskz_range_pd(U, A, B, C) \
812 ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
813 (__v4df)(__m256d)(B), (int)(C), \
814 (__v4df)_mm256_setzero_pd(), \
817#define _mm_range_ps(A, B, C) \
818 ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
819 (__v4sf)(__m128)(B), (int)(C), \
820 (__v4sf)_mm_setzero_ps(), \
823#define _mm_mask_range_ps(W, U, A, B, C) \
824 ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
825 (__v4sf)(__m128)(B), (int)(C), \
826 (__v4sf)(__m128)(W), (__mmask8)(U)))
828#define _mm_maskz_range_ps(U, A, B, C) \
829 ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
830 (__v4sf)(__m128)(B), (int)(C), \
831 (__v4sf)_mm_setzero_ps(), \
834#define _mm256_range_ps(A, B, C) \
835 ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
836 (__v8sf)(__m256)(B), (int)(C), \
837 (__v8sf)_mm256_setzero_ps(), \
840#define _mm256_mask_range_ps(W, U, A, B, C) \
841 ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
842 (__v8sf)(__m256)(B), (int)(C), \
843 (__v8sf)(__m256)(W), (__mmask8)(U)))
845#define _mm256_maskz_range_ps(U, A, B, C) \
846 ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
847 (__v8sf)(__m256)(B), (int)(C), \
848 (__v8sf)_mm256_setzero_ps(), \
851#define _mm_reduce_pd(A, B) \
852 ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
853 (__v2df)_mm_setzero_pd(), \
856#define _mm_mask_reduce_pd(W, U, A, B) \
857 ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
858 (__v2df)(__m128d)(W), \
861#define _mm_maskz_reduce_pd(U, A, B) \
862 ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
863 (__v2df)_mm_setzero_pd(), \
866#define _mm256_reduce_pd(A, B) \
867 ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
868 (__v4df)_mm256_setzero_pd(), \
871#define _mm256_mask_reduce_pd(W, U, A, B) \
872 ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
873 (__v4df)(__m256d)(W), \
876#define _mm256_maskz_reduce_pd(U, A, B) \
877 ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
878 (__v4df)_mm256_setzero_pd(), \
881#define _mm_reduce_ps(A, B) \
882 ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
883 (__v4sf)_mm_setzero_ps(), \
886#define _mm_mask_reduce_ps(W, U, A, B) \
887 ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
888 (__v4sf)(__m128)(W), \
891#define _mm_maskz_reduce_ps(U, A, B) \
892 ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
893 (__v4sf)_mm_setzero_ps(), \
896#define _mm256_reduce_ps(A, B) \
897 ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
898 (__v8sf)_mm256_setzero_ps(), \
901#define _mm256_mask_reduce_ps(W, U, A, B) \
902 ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
903 (__v8sf)(__m256)(W), \
906#define _mm256_maskz_reduce_ps(U, A, B) \
907 ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
908 (__v8sf)_mm256_setzero_ps(), \
914 return (
__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
920 return (
__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
926 return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
932 return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
938 return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
944 return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
950 return (
__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
956 return (
__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
962 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
963 0, 1, 0, 1, 0, 1, 0, 1);
969 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__M,
977 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__M,
985 return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
992 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__M,
1000 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__M,
1008 return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1015 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
1023 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
1031 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1032 0, 1, 0, 1, 0, 1, 0, 1);
1038 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
1046 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
1054 return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1061 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
1069 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
1074#define _mm256_extractf64x2_pd(A, imm) \
1075 ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1077 (__v2df)_mm_undefined_pd(), \
1080#define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
1081 ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1083 (__v2df)(__m128d)(W), \
1086#define _mm256_maskz_extractf64x2_pd(U, A, imm) \
1087 ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1089 (__v2df)_mm_setzero_pd(), \
1092#define _mm256_extracti64x2_epi64(A, imm) \
1093 ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1095 (__v2di)_mm_undefined_si128(), \
1098#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
1099 ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1101 (__v2di)(__m128i)(W), \
1104#define _mm256_maskz_extracti64x2_epi64(U, A, imm) \
1105 ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1107 (__v2di)_mm_setzero_si128(), \
1110#define _mm256_insertf64x2(A, B, imm) \
1111 ((__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \
1112 (__v2df)(__m128d)(B), (int)(imm)))
1114#define _mm256_mask_insertf64x2(W, U, A, B, imm) \
1115 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1116 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1117 (__v4df)(__m256d)(W)))
1119#define _mm256_maskz_insertf64x2(U, A, B, imm) \
1120 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1121 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1122 (__v4df)_mm256_setzero_pd()))
1124#define _mm256_inserti64x2(A, B, imm) \
1125 ((__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \
1126 (__v2di)(__m128i)(B), (int)(imm)))
1128#define _mm256_mask_inserti64x2(W, U, A, B, imm) \
1129 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1130 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1131 (__v4di)(__m256i)(W)))
1133#define _mm256_maskz_inserti64x2(U, A, B, imm) \
1134 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1135 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1136 (__v4di)_mm256_setzero_si256()))
1138#define _mm_mask_fpclass_pd_mask(U, A, imm) \
1139 ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1142#define _mm_fpclass_pd_mask(A, imm) \
1143 ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1146#define _mm256_mask_fpclass_pd_mask(U, A, imm) \
1147 ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1150#define _mm256_fpclass_pd_mask(A, imm) \
1151 ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1154#define _mm_mask_fpclass_ps_mask(U, A, imm) \
1155 ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1158#define _mm_fpclass_ps_mask(A, imm) \
1159 ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1162#define _mm256_mask_fpclass_ps_mask(U, A, imm) \
1163 ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1166#define _mm256_fpclass_ps_mask(A, imm) \
1167 ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1170#undef __DEFAULT_FN_ATTRS128
1171#undef __DEFAULT_FN_ATTRS256
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_movepi32_mask(__m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu64(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epi64(__m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi32(__mmask8 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi64(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu64(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_ps(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi64(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epi64(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i32x2(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu64(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi64(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_ps(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_xor_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mullo_epi64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epi64(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi64(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu64(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu64(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_ps(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_pd(__m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu64(__mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epi64(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epi64(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_ps(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i64x2(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_movepi64_mask(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi64(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_pd(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi64(__mmask8 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi64(__m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ps(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_pd(__m256d __W, __mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepu64_pd(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepi64_pd(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_ps(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi32_mask(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi32(__mmask8 __A)
#define __DEFAULT_FN_ATTRS256
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
#define __DEFAULT_FN_ATTRS128
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu64(__m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi64(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu64(__m256d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu64(__m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epi64(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ps(__m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi64(__mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcast_i32x2(__m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi64(__mmask8 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epi64(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_broadcast_f64x2(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epi64(__m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_pd(__mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi64(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_pd(__m256d __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcast_f32x2(__m128 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi64_mask(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_pd(__mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu64(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu64(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu64(__m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_xor_pd(__m256d __a, __m256d __b)
Performs a bitwise XOR of two 256-bit vectors of [4 x double].
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_xor_ps(__m256 __a, __m256 __b)
Performs a bitwise XOR of two 256-bit vectors of [8 x float].
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_and_ps(__m256 __a, __m256 __b)
Performs a bitwise AND of two 256-bit vectors of [8 x float].
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_or_ps(__m256 __a, __m256 __b)
Performs a bitwise OR of two 256-bit vectors of [8 x float].
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_andnot_pd(__m256d __a, __m256d __b)
Performs a bitwise AND of two 256-bit vectors of [4 x double], using the one's complement of the valu...
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_or_pd(__m256d __a, __m256d __b)
Performs a bitwise OR of two 256-bit vectors of [4 x double].
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_and_pd(__m256d __a, __m256d __b)
Performs a bitwise AND of two 256-bit vectors of [4 x double].
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_andnot_ps(__m256 __a, __m256 __b)
Performs a bitwise AND of two 256-bit vectors of [8 x float], using the one's complement of the value...
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_xor_pd(__m128d __a, __m128d __b)
Performs a bitwise XOR of two 128-bit vectors of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_or_pd(__m128d __a, __m128d __b)
Performs a bitwise OR of two 128-bit vectors of [2 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_and_pd(__m128d __a, __m128d __b)
Performs a bitwise AND of two 128-bit vectors of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_andnot_pd(__m128d __a, __m128d __b)
Performs a bitwise AND of two 128-bit vectors of [2 x double], using the one's complement of the valu...
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_xor_ps(__m128 __a, __m128 __b)
Performs a bitwise exclusive OR of two 128-bit vectors of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_andnot_ps(__m128 __a, __m128 __b)
Performs a bitwise AND of two 128-bit vectors of [4 x float], using the one's complement of the value...
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_or_ps(__m128 __a, __m128 __b)
Performs a bitwise OR of two 128-bit vectors of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_and_ps(__m128 __a, __m128 __b)
Performs a bitwise AND of two 128-bit vectors of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.