11#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
14#ifndef __AVX512VLDQINTRIN_H
15#define __AVX512VLDQINTRIN_H
18#define __DEFAULT_FN_ATTRS128 \
19 __attribute__((__always_inline__, __nodebug__, \
20 __target__("avx512vl,avx512dq"), __min_vector_width__(128)))
21#define __DEFAULT_FN_ATTRS256 \
22 __attribute__((__always_inline__, __nodebug__, \
23 __target__("avx512vl,avx512dq"), __min_vector_width__(256)))
25#if defined(__cplusplus) && (__cplusplus >= 201103L)
26#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
27#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
29#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
30#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
35 return (__m256i) ((__v4du) __A * (__v4du) __B);
40 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
47 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
54 return (__m128i) ((__v2du) __A * (__v2du) __B);
59 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
66 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
73 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
80 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
87 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
94 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
101 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
108 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
115 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
122 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
129 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
136 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
143 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
150 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
157 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
164 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
171 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
178 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
185 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
192 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
199 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
206 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
213 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
220 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
227 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
234 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
241 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
248 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
255 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
262 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
269 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
276 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
283 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
290 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
297 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
304 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
311 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
318 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
325 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
332 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
339 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
346 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
353 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
360 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
367 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
374 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
381 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
388 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
395 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
402 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
409 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
416 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
423 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
430 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
437 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
444 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
451 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
458 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
465 return (__m128d)__builtin_convertvector((__v2di)__A, __v2df);
470 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
477 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
484 return (__m256d)__builtin_convertvector((__v4di)__A, __v4df);
489 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
496 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
503 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
510 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
517 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
524 return (__m128)__builtin_convertvector((__v4di)__A, __v4sf);
529 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
536 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
543 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
550 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
557 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
564 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
571 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
578 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
585 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
592 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
599 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
606 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
613 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
620 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
627 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
634 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
641 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
648 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
655 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
662 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
669 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
676 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
683 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
690 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
697 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
704 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
711 return (__m128d)__builtin_convertvector((__v2du)__A, __v2df);
716 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
723 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
730 return (__m256d)__builtin_convertvector((__v4du)__A, __v4df);
735 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
742 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
749 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
756 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
763 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
770 return (__m128)__builtin_convertvector((__v4du)__A, __v4sf);
775 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
782 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
787#define _mm_range_pd(A, B, C) \
788 ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
789 (__v2df)(__m128d)(B), (int)(C), \
790 (__v2df)_mm_setzero_pd(), \
793#define _mm_mask_range_pd(W, U, A, B, C) \
794 ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
795 (__v2df)(__m128d)(B), (int)(C), \
796 (__v2df)(__m128d)(W), \
799#define _mm_maskz_range_pd(U, A, B, C) \
800 ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
801 (__v2df)(__m128d)(B), (int)(C), \
802 (__v2df)_mm_setzero_pd(), \
805#define _mm256_range_pd(A, B, C) \
806 ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
807 (__v4df)(__m256d)(B), (int)(C), \
808 (__v4df)_mm256_setzero_pd(), \
811#define _mm256_mask_range_pd(W, U, A, B, C) \
812 ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
813 (__v4df)(__m256d)(B), (int)(C), \
814 (__v4df)(__m256d)(W), \
817#define _mm256_maskz_range_pd(U, A, B, C) \
818 ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
819 (__v4df)(__m256d)(B), (int)(C), \
820 (__v4df)_mm256_setzero_pd(), \
823#define _mm_range_ps(A, B, C) \
824 ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
825 (__v4sf)(__m128)(B), (int)(C), \
826 (__v4sf)_mm_setzero_ps(), \
829#define _mm_mask_range_ps(W, U, A, B, C) \
830 ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
831 (__v4sf)(__m128)(B), (int)(C), \
832 (__v4sf)(__m128)(W), (__mmask8)(U)))
834#define _mm_maskz_range_ps(U, A, B, C) \
835 ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
836 (__v4sf)(__m128)(B), (int)(C), \
837 (__v4sf)_mm_setzero_ps(), \
840#define _mm256_range_ps(A, B, C) \
841 ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
842 (__v8sf)(__m256)(B), (int)(C), \
843 (__v8sf)_mm256_setzero_ps(), \
846#define _mm256_mask_range_ps(W, U, A, B, C) \
847 ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
848 (__v8sf)(__m256)(B), (int)(C), \
849 (__v8sf)(__m256)(W), (__mmask8)(U)))
851#define _mm256_maskz_range_ps(U, A, B, C) \
852 ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
853 (__v8sf)(__m256)(B), (int)(C), \
854 (__v8sf)_mm256_setzero_ps(), \
857#define _mm_reduce_pd(A, B) \
858 ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
859 (__v2df)_mm_setzero_pd(), \
862#define _mm_mask_reduce_pd(W, U, A, B) \
863 ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
864 (__v2df)(__m128d)(W), \
867#define _mm_maskz_reduce_pd(U, A, B) \
868 ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
869 (__v2df)_mm_setzero_pd(), \
872#define _mm256_reduce_pd(A, B) \
873 ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
874 (__v4df)_mm256_setzero_pd(), \
877#define _mm256_mask_reduce_pd(W, U, A, B) \
878 ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
879 (__v4df)(__m256d)(W), \
882#define _mm256_maskz_reduce_pd(U, A, B) \
883 ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
884 (__v4df)_mm256_setzero_pd(), \
887#define _mm_reduce_ps(A, B) \
888 ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
889 (__v4sf)_mm_setzero_ps(), \
892#define _mm_mask_reduce_ps(W, U, A, B) \
893 ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
894 (__v4sf)(__m128)(W), \
897#define _mm_maskz_reduce_ps(U, A, B) \
898 ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
899 (__v4sf)_mm_setzero_ps(), \
902#define _mm256_reduce_ps(A, B) \
903 ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
904 (__v8sf)_mm256_setzero_ps(), \
907#define _mm256_mask_reduce_ps(W, U, A, B) \
908 ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
909 (__v8sf)(__m256)(W), \
912#define _mm256_maskz_reduce_ps(U, A, B) \
913 ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
914 (__v8sf)_mm256_setzero_ps(), \
920 return (
__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
926 return (
__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
932 return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
938 return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
944 return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
950 return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
956 return (
__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
962 return (
__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
967 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
968 0, 1, 0, 1, 0, 1, 0, 1);
974 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__M,
982 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__M,
989 return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
996 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__M,
1004 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__M,
1011 return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1018 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
1026 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
1033 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1034 0, 1, 0, 1, 0, 1, 0, 1);
1040 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
1048 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
1055 return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1062 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
1070 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
1075#define _mm256_extractf64x2_pd(A, imm) \
1076 ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1078 (__v2df)_mm_undefined_pd(), \
1081#define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
1082 ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1084 (__v2df)(__m128d)(W), \
1087#define _mm256_maskz_extractf64x2_pd(U, A, imm) \
1088 ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1090 (__v2df)_mm_setzero_pd(), \
1093#define _mm256_extracti64x2_epi64(A, imm) \
1094 ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1096 (__v2di)_mm_undefined_si128(), \
1099#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
1100 ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1102 (__v2di)(__m128i)(W), \
1105#define _mm256_maskz_extracti64x2_epi64(U, A, imm) \
1106 ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1108 (__v2di)_mm_setzero_si128(), \
1111#define _mm256_insertf64x2(A, B, imm) \
1112 ((__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \
1113 (__v2df)(__m128d)(B), (int)(imm)))
1115#define _mm256_mask_insertf64x2(W, U, A, B, imm) \
1116 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1117 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1118 (__v4df)(__m256d)(W)))
1120#define _mm256_maskz_insertf64x2(U, A, B, imm) \
1121 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1122 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1123 (__v4df)_mm256_setzero_pd()))
1125#define _mm256_inserti64x2(A, B, imm) \
1126 ((__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \
1127 (__v2di)(__m128i)(B), (int)(imm)))
1129#define _mm256_mask_inserti64x2(W, U, A, B, imm) \
1130 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1131 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1132 (__v4di)(__m256i)(W)))
1134#define _mm256_maskz_inserti64x2(U, A, B, imm) \
1135 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1136 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1137 (__v4di)_mm256_setzero_si256()))
1139#define _mm_mask_fpclass_pd_mask(U, A, imm) \
1140 ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1143#define _mm_fpclass_pd_mask(A, imm) \
1144 ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1147#define _mm256_mask_fpclass_pd_mask(U, A, imm) \
1148 ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1151#define _mm256_fpclass_pd_mask(A, imm) \
1152 ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1155#define _mm_mask_fpclass_ps_mask(U, A, imm) \
1156 ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1159#define _mm_fpclass_ps_mask(A, imm) \
1160 ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1163#define _mm256_mask_fpclass_ps_mask(U, A, imm) \
1164 ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1167#define _mm256_fpclass_ps_mask(A, imm) \
1168 ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1171#undef __DEFAULT_FN_ATTRS128
1172#undef __DEFAULT_FN_ATTRS256
1173#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
1174#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
#define __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_movepi32_mask(__m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu64(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epi64(__m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi32(__mmask8 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi64(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu64(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcast_f32x2(__m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi64(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epi64(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu64(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi64(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_xor_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epi64(__m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepi64_pd(__m256d __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi64(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu64(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu64(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_cvtepu64_pd(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu64(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mullo_epi64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epi64(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epi64(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_ps(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcast_f64x2(__m128d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_movepi64_mask(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi64(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcast_i64x2(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi64(__mmask8 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtepu64_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ps(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtepi64_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu64_ps(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_broadcast_i32x2(__m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi32_mask(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm256_mullo_epi64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_cvtepi64_pd(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi32(__mmask8 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepi64_pd(__mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi64_ps(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu64(__m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi64(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu64(__m256d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu64(__m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtepi64_pd(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epi64(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ps(__m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi64(__mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi64(__mmask8 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epi64(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epi64(__m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi64(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepu64_pd(__m256d __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi64_pd(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcast_i32x2(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi64_mask(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepi64_ps(__mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtepu64_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu64_pd(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu64(__m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepu64_pd(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu64(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu64(__m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_andnot_pd(__m256d __a, __m256d __b)
Performs a bitwise AND of two 256-bit vectors of [4 x double], using the one's complement of the valu...
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_andnot_ps(__m256 __a, __m256 __b)
Performs a bitwise AND of two 256-bit vectors of [8 x float], using the one's complement of the value...
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_xor_ps(__m256 __a, __m256 __b)
Performs a bitwise XOR of two 256-bit vectors of [8 x float].
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_or_pd(__m256d __a, __m256d __b)
Performs a bitwise OR of two 256-bit vectors of [4 x double].
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_xor_pd(__m256d __a, __m256d __b)
Performs a bitwise XOR of two 256-bit vectors of [4 x double].
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_or_ps(__m256 __a, __m256 __b)
Performs a bitwise OR of two 256-bit vectors of [8 x float].
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_and_ps(__m256 __a, __m256 __b)
Performs a bitwise AND of two 256-bit vectors of [8 x float].
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_and_pd(__m256d __a, __m256d __b)
Performs a bitwise AND of two 256-bit vectors of [4 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_xor_pd(__m128d __a, __m128d __b)
Performs a bitwise XOR of two 128-bit vectors of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_or_pd(__m128d __a, __m128d __b)
Performs a bitwise OR of two 128-bit vectors of [2 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_and_pd(__m128d __a, __m128d __b)
Performs a bitwise AND of two 128-bit vectors of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_andnot_pd(__m128d __a, __m128d __b)
Performs a bitwise AND of two 128-bit vectors of [2 x double], using the one's complement of the valu...
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_xor_ps(__m128 __a, __m128 __b)
Performs a bitwise exclusive OR of two 128-bit vectors of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_andnot_ps(__m128 __a, __m128 __b)
Performs a bitwise AND of two 128-bit vectors of [4 x float], using the one's complement of the value...
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_or_ps(__m128 __a, __m128 __b)
Performs a bitwise OR of two 128-bit vectors of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_and_ps(__m128 __a, __m128 __b)
Performs a bitwise AND of two 128-bit vectors of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.