10#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
13#ifndef __AVX512FINTRIN_H
14#define __AVX512FINTRIN_H
24typedef unsigned char __v64qu
__attribute__((__vector_size__(64)));
25typedef unsigned short __v32hu
__attribute__((__vector_size__(64)));
26typedef unsigned long long __v8du
__attribute__((__vector_size__(64)));
27typedef unsigned int __v16su
__attribute__((__vector_size__(64)));
31typedef signed char __v64qs
__attribute__((__vector_size__(64)));
33typedef float __m512
__attribute__((__vector_size__(64), __aligned__(64)));
34typedef double __m512d
__attribute__((__vector_size__(64), __aligned__(64)));
35typedef long long __m512i
__attribute__((__vector_size__(64), __aligned__(64)));
37typedef float __m512_u
__attribute__((__vector_size__(64), __aligned__(1)));
38typedef double __m512d_u
__attribute__((__vector_size__(64), __aligned__(1)));
39typedef long long __m512i_u
__attribute__((__vector_size__(64), __aligned__(1)));
45#define _MM_FROUND_TO_NEAREST_INT 0x00
46#define _MM_FROUND_TO_NEG_INF 0x01
47#define _MM_FROUND_TO_POS_INF 0x02
48#define _MM_FROUND_TO_ZERO 0x03
49#define _MM_FROUND_CUR_DIRECTION 0x04
59#define _MM_CMPINT_GE _MM_CMPINT_NLT
61#define _MM_CMPINT_GT _MM_CMPINT_NLE
170#define __DEFAULT_FN_ATTRS512 \
171 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \
172 __min_vector_width__(512)))
173#define __DEFAULT_FN_ATTRS128 \
174 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \
175 __min_vector_width__(128)))
176#define __DEFAULT_FN_ATTRS \
177 __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
179#if defined(__cplusplus) && (__cplusplus >= 201103L)
180#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
181#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
182#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
184#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
185#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
186#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
193 return __extension__(__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0};
196#define _mm512_setzero_epi32 _mm512_setzero_si512
201 return (__m512d)__builtin_ia32_undef512();
207 return (__m512)__builtin_ia32_undef512();
211 return (__m512)__builtin_ia32_undef512();
217 return (__m512i)__builtin_ia32_undef512();
222 return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
223 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
228 return (__m512i)__builtin_ia32_selectd_512(__M,
235 return (__m512i)__builtin_ia32_selectd_512(__M,
242 return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
243 0, 0, 0, 0, 0, 0, 0, 0);
248 return (__m512i)__builtin_ia32_selectq_512(
254 return (__m512i)__builtin_ia32_selectq_512(__M,
260 return __extension__(__m512){0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
261 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
264#define _mm512_setzero _mm512_setzero_ps
268 return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
274 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
275 __w, __w, __w, __w, __w, __w, __w, __w };
281 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
287 return __extension__ (__m512i)(__v64qi){
288 __w, __w, __w, __w, __w, __w, __w, __w,
289 __w, __w, __w, __w, __w, __w, __w, __w,
290 __w, __w, __w, __w, __w, __w, __w, __w,
291 __w, __w, __w, __w, __w, __w, __w, __w,
292 __w, __w, __w, __w, __w, __w, __w, __w,
293 __w, __w, __w, __w, __w, __w, __w, __w,
294 __w, __w, __w, __w, __w, __w, __w, __w,
295 __w, __w, __w, __w, __w, __w, __w, __w };
301 return __extension__ (__m512i)(__v32hi){
302 __w, __w, __w, __w, __w, __w, __w, __w,
303 __w, __w, __w, __w, __w, __w, __w, __w,
304 __w, __w, __w, __w, __w, __w, __w, __w,
305 __w, __w, __w, __w, __w, __w, __w, __w };
311 return __extension__ (__m512i)(__v16si){
312 __s, __s, __s, __s, __s, __s, __s, __s,
313 __s, __s, __s, __s, __s, __s, __s, __s };
318 return (__m512i)__builtin_ia32_selectd_512(__M,
326 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
331 return (__m512i)__builtin_ia32_selectq_512(__M,
338 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
339 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
344 return __extension__ (__m512i)(__v16si)
345 {
__D, __C, __B, __A,
__D, __C, __B, __A,
346 __D, __C, __B, __A,
__D, __C, __B, __A };
351 return __extension__ (__m512i) (__v8di)
352 {
__D, __C, __B, __A,
__D, __C, __B, __A };
357 return __extension__ (__m512d)
358 {
__D, __C, __B, __A,
__D, __C, __B, __A };
363 return __extension__ (__m512)
364 {
__D, __C, __B, __A,
__D, __C, __B, __A,
365 __D, __C, __B, __A,
__D, __C, __B, __A };
390 return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
391 0, 0, 0, 0, 0, 0, 0, 0);
399 return __builtin_shufflevector(
__a, __builtin_nondeterministic_value(
__a), 0,
400 1, 2, 3, 4, 5, 6, 7);
406 return __builtin_shufflevector(
__a, __builtin_nondeterministic_value(
__a), 0,
407 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
413 return __builtin_shufflevector(
__a,
__a, 0, 1);
419 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
425 return __builtin_shufflevector(
__a,
__a, 0, 1, 2, 3);
430 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
436 return (__m512) (__A);
442 return (__m512i) (__A);
448 __m256d __B = __builtin_nondeterministic_value(__B);
449 return __builtin_shufflevector(
450 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
451 __B, 0, 1, 2, 3, 4, 5, 6, 7);
457 return (__m512d) (__A);
463 return (__m512i) (__A);
469 __m256 __B = __builtin_nondeterministic_value(__B);
470 return __builtin_shufflevector(
471 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7),
472 __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
478 __m256i __B = __builtin_nondeterministic_value(__B);
479 return __builtin_shufflevector(
480 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
481 __B, 0, 1, 2, 3, 4, 5, 6, 7);
487 return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7);
493 return (__m512) (__A);
499 return (__m512d) (__A);
505 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
510 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
538 return __builtin_shufflevector((__v2df)
__a, (__v2df)
_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
556 return __builtin_shufflevector((__v4df)
__a, (__v4df)
_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
573 return __builtin_shufflevector((__v4sf)
__a, (__v4sf)
_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
590 return __builtin_shufflevector((__v8sf)
__a, (__v8sf)
_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
607 return __builtin_shufflevector((__v2di)
__a, (__v2di)
_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
631 return (__m512i)((__v16su)
__a & (__v16su)
__b);
636 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__k,
651 return (__m512i)((__v8du)
__a & (__v8du)
__b);
656 return (__m512i)__builtin_ia32_selectq_512(
670 return (__m512i)(~(__v8du)__A & (__v8du)__B);
676 return (__m512i)(~(__v16su)__A & (__v16su)__B);
682 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
697 return (__m512i)(~(__v8du)__A & (__v8du)__B);
703 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
718 return (__m512i)((__v16su)
__a | (__v16su)
__b);
724 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__k,
738 return (__m512i)((__v8du)
__a | (__v8du)
__b);
744 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__k,
758 return (__m512i)((__v16su)
__a ^ (__v16su)
__b);
764 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__k,
778 return (__m512i)((__v8du)
__a ^ (__v8du)
__b);
784 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__k,
798 return (__m512i)((__v8du)
__a & (__v8du)
__b);
804 return (__m512i)((__v8du)
__a | (__v8du)
__b);
810 return (__m512i)((__v8du)
__a ^ (__v8du)
__b);
817 return (__m512d)((__v8df)
__a + (__v8df)
__b);
822 return (__m512)((__v16sf)
__a + (__v16sf)
__b);
827 return (__m512d)((__v8df)
__a * (__v8df)
__b);
832 return (__m512)((__v16sf)
__a * (__v16sf)
__b);
837 return (__m512d)((__v8df)
__a - (__v8df)
__b);
842 return (__m512)((__v16sf)
__a - (__v16sf)
__b);
847 return (__m512i) ((__v8du) __A + (__v8du) __B);
852 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
859 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
866 return (__m512i) ((__v8du) __A - (__v8du) __B);
871 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
878 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
885 return (__m512i) ((__v16su) __A + (__v16su) __B);
890 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
897 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
904 return (__m512i) ((__v16su) __A - (__v16su) __B);
909 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
916 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
921#define _mm512_max_round_pd(A, B, R) \
922 ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
923 (__v8df)(__m512d)(B), (int)(R)))
925#define _mm512_mask_max_round_pd(W, U, A, B, R) \
926 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
927 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
930#define _mm512_maskz_max_round_pd(U, A, B, R) \
931 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
932 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
933 (__v8df)_mm512_setzero_pd()))
935static __inline__ __m512d
937 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
943 return (__m512d)__builtin_ia32_selectpd_512(__U,
950 return (__m512d)__builtin_ia32_selectpd_512(__U,
955#define _mm512_max_round_ps(A, B, R) \
956 ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
957 (__v16sf)(__m512)(B), (int)(R)))
959#define _mm512_mask_max_round_ps(W, U, A, B, R) \
960 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
961 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
964#define _mm512_maskz_max_round_ps(U, A, B, R) \
965 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
966 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
967 (__v16sf)_mm512_setzero_ps()))
969static __inline__ __m512
971 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
977 return (__m512)__builtin_ia32_selectps_512(__U,
984 return (__m512)__builtin_ia32_selectps_512(__U,
991 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1000 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1007#define _mm_max_round_ss(A, B, R) \
1008 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1009 (__v4sf)(__m128)(B), \
1010 (__v4sf)_mm_setzero_ps(), \
1011 (__mmask8)-1, (int)(R)))
1013#define _mm_mask_max_round_ss(W, U, A, B, R) \
1014 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1015 (__v4sf)(__m128)(B), \
1016 (__v4sf)(__m128)(W), (__mmask8)(U), \
1019#define _mm_maskz_max_round_ss(U, A, B, R) \
1020 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1021 (__v4sf)(__m128)(B), \
1022 (__v4sf)_mm_setzero_ps(), \
1023 (__mmask8)(U), (int)(R)))
1027 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1036 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1043#define _mm_max_round_sd(A, B, R) \
1044 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1045 (__v2df)(__m128d)(B), \
1046 (__v2df)_mm_setzero_pd(), \
1047 (__mmask8)-1, (int)(R)))
1049#define _mm_mask_max_round_sd(W, U, A, B, R) \
1050 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1051 (__v2df)(__m128d)(B), \
1052 (__v2df)(__m128d)(W), \
1053 (__mmask8)(U), (int)(R)))
1055#define _mm_maskz_max_round_sd(U, A, B, R) \
1056 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1057 (__v2df)(__m128d)(B), \
1058 (__v2df)_mm_setzero_pd(), \
1059 (__mmask8)(U), (int)(R)))
1061static __inline __m512i
1063 return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
1068 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1075 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1082 return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
1087 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1094 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1101 return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
1106 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1113 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1120 return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
1125 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1132 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1137#define _mm512_min_round_pd(A, B, R) \
1138 ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1139 (__v8df)(__m512d)(B), (int)(R)))
1141#define _mm512_mask_min_round_pd(W, U, A, B, R) \
1142 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1143 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1146#define _mm512_maskz_min_round_pd(U, A, B, R) \
1147 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1148 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1149 (__v8df)_mm512_setzero_pd()))
1151static __inline__ __m512d
1153 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1159 return (__m512d)__builtin_ia32_selectpd_512(__U,
1166 return (__m512d)__builtin_ia32_selectpd_512(__U,
1171#define _mm512_min_round_ps(A, B, R) \
1172 ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1173 (__v16sf)(__m512)(B), (int)(R)))
1175#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1176 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1177 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1180#define _mm512_maskz_min_round_ps(U, A, B, R) \
1181 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1182 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1183 (__v16sf)_mm512_setzero_ps()))
1185static __inline__ __m512
1187 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1193 return (__m512)__builtin_ia32_selectps_512(__U,
1200 return (__m512)__builtin_ia32_selectps_512(__U,
1207 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1216 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1223#define _mm_min_round_ss(A, B, R) \
1224 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1225 (__v4sf)(__m128)(B), \
1226 (__v4sf)_mm_setzero_ps(), \
1227 (__mmask8)-1, (int)(R)))
1229#define _mm_mask_min_round_ss(W, U, A, B, R) \
1230 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1231 (__v4sf)(__m128)(B), \
1232 (__v4sf)(__m128)(W), (__mmask8)(U), \
1235#define _mm_maskz_min_round_ss(U, A, B, R) \
1236 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1237 (__v4sf)(__m128)(B), \
1238 (__v4sf)_mm_setzero_ps(), \
1239 (__mmask8)(U), (int)(R)))
1243 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1252 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1259#define _mm_min_round_sd(A, B, R) \
1260 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1261 (__v2df)(__m128d)(B), \
1262 (__v2df)_mm_setzero_pd(), \
1263 (__mmask8)-1, (int)(R)))
1265#define _mm_mask_min_round_sd(W, U, A, B, R) \
1266 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1267 (__v2df)(__m128d)(B), \
1268 (__v2df)(__m128d)(W), \
1269 (__mmask8)(U), (int)(R)))
1271#define _mm_maskz_min_round_sd(U, A, B, R) \
1272 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1273 (__v2df)(__m128d)(B), \
1274 (__v2df)_mm_setzero_pd(), \
1275 (__mmask8)(U), (int)(R)))
1277static __inline __m512i
1279 return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
1284 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1291 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1298 return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
1303 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1310 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1317 return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
1322 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1329 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1336 return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
1341 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1348 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1355 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si)
__Y);
1360 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1367 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1374 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)
__Y);
1379 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1386 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1393 return (__m512i) ((__v16su) __A * (__v16su) __B);
1398 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1405 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1412 return (__m512i) ((__v8du) __A * (__v8du) __B);
1417 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
1422#define _mm512_sqrt_round_pd(A, R) \
1423 ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)))
1425#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1426 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1427 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1428 (__v8df)(__m512d)(W)))
1430#define _mm512_maskz_sqrt_round_pd(U, A, R) \
1431 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1432 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1433 (__v8df)_mm512_setzero_pd()))
1436 return (__m512d)__builtin_elementwise_sqrt((__v8df)__A);
1441 return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)
_mm512_sqrt_pd(__A),
1447 return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)
_mm512_sqrt_pd(__A),
1451#define _mm512_sqrt_round_ps(A, R) \
1452 ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)))
1454#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1455 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1456 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1457 (__v16sf)(__m512)(W)))
1459#define _mm512_maskz_sqrt_round_ps(U, A, R) \
1460 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1461 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1462 (__v16sf)_mm512_setzero_ps()))
1465 return (__m512)__builtin_elementwise_sqrt((__v16sf)__A);
1470 return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)
_mm512_sqrt_ps(__A),
1476 return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)
_mm512_sqrt_ps(__A),
1483 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1491 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1499 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1508 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1517 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1525 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1534 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1544 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1553 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1562 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1572 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1581 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1590 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1599 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1607 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1616 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1625 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1633 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1642 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1652 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1661 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1670 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1680 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1689 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1698 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1700 (__v16sf) __A, (
unsigned short)-1,
1707 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1716 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1718 (__v8df) __A, (
unsigned char)-1,
1725 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1734 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1743 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1745 (__v16sf) __A, (
unsigned short)-1,
1752 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1754 (__v8df) __A, (
unsigned char)-1,
1761 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1769 return (__m512i)__builtin_elementwise_abs((__v8di)__A);
1774 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
1781 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
1788 return (__m512i)__builtin_elementwise_abs((__v16si) __A);
1793 return (__m512i)__builtin_ia32_selectd_512(__U,
1800 return (__m512i)__builtin_ia32_selectd_512(__U,
1808 return __builtin_ia32_selectss_128(__U, __A, __W);
1817#define _mm_add_round_ss(A, B, R) \
1818 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1819 (__v4sf)(__m128)(B), \
1820 (__v4sf)_mm_setzero_ps(), \
1821 (__mmask8)-1, (int)(R)))
1823#define _mm_mask_add_round_ss(W, U, A, B, R) \
1824 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1825 (__v4sf)(__m128)(B), \
1826 (__v4sf)(__m128)(W), (__mmask8)(U), \
1829#define _mm_maskz_add_round_ss(U, A, B, R) \
1830 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1831 (__v4sf)(__m128)(B), \
1832 (__v4sf)_mm_setzero_ps(), \
1833 (__mmask8)(U), (int)(R)))
1838 return __builtin_ia32_selectsd_128(__U, __A, __W);
1846#define _mm_add_round_sd(A, B, R) \
1847 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1848 (__v2df)(__m128d)(B), \
1849 (__v2df)_mm_setzero_pd(), \
1850 (__mmask8)-1, (int)(R)))
1852#define _mm_mask_add_round_sd(W, U, A, B, R) \
1853 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1854 (__v2df)(__m128d)(B), \
1855 (__v2df)(__m128d)(W), \
1856 (__mmask8)(U), (int)(R)))
1858#define _mm_maskz_add_round_sd(U, A, B, R) \
1859 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1860 (__v2df)(__m128d)(B), \
1861 (__v2df)_mm_setzero_pd(), \
1862 (__mmask8)(U), (int)(R)))
1866 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
1873 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
1880 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
1887 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
1892#define _mm512_add_round_pd(A, B, R) \
1893 ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1894 (__v8df)(__m512d)(B), (int)(R)))
1896#define _mm512_mask_add_round_pd(W, U, A, B, R) \
1897 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1898 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1899 (__v8df)(__m512d)(W)))
1901#define _mm512_maskz_add_round_pd(U, A, B, R) \
1902 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1903 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1904 (__v8df)_mm512_setzero_pd()))
1906#define _mm512_add_round_ps(A, B, R) \
1907 ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
1908 (__v16sf)(__m512)(B), (int)(R)))
1910#define _mm512_mask_add_round_ps(W, U, A, B, R) \
1911 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1912 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1913 (__v16sf)(__m512)(W)))
1915#define _mm512_maskz_add_round_ps(U, A, B, R) \
1916 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1917 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1918 (__v16sf)_mm512_setzero_ps()))
1923 return __builtin_ia32_selectss_128(__U, __A, __W);
1931#define _mm_sub_round_ss(A, B, R) \
1932 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1933 (__v4sf)(__m128)(B), \
1934 (__v4sf)_mm_setzero_ps(), \
1935 (__mmask8)-1, (int)(R)))
1937#define _mm_mask_sub_round_ss(W, U, A, B, R) \
1938 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1939 (__v4sf)(__m128)(B), \
1940 (__v4sf)(__m128)(W), (__mmask8)(U), \
1943#define _mm_maskz_sub_round_ss(U, A, B, R) \
1944 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1945 (__v4sf)(__m128)(B), \
1946 (__v4sf)_mm_setzero_ps(), \
1947 (__mmask8)(U), (int)(R)))
1952 return __builtin_ia32_selectsd_128(__U, __A, __W);
1961#define _mm_sub_round_sd(A, B, R) \
1962 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1963 (__v2df)(__m128d)(B), \
1964 (__v2df)_mm_setzero_pd(), \
1965 (__mmask8)-1, (int)(R)))
1967#define _mm_mask_sub_round_sd(W, U, A, B, R) \
1968 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1969 (__v2df)(__m128d)(B), \
1970 (__v2df)(__m128d)(W), \
1971 (__mmask8)(U), (int)(R)))
1973#define _mm_maskz_sub_round_sd(U, A, B, R) \
1974 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1975 (__v2df)(__m128d)(B), \
1976 (__v2df)_mm_setzero_pd(), \
1977 (__mmask8)(U), (int)(R)))
1981 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
1988 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
1995 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2002 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2007#define _mm512_sub_round_pd(A, B, R) \
2008 ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2009 (__v8df)(__m512d)(B), (int)(R)))
2011#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2012 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2013 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2014 (__v8df)(__m512d)(W)))
2016#define _mm512_maskz_sub_round_pd(U, A, B, R) \
2017 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2018 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2019 (__v8df)_mm512_setzero_pd()))
2021#define _mm512_sub_round_ps(A, B, R) \
2022 ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2023 (__v16sf)(__m512)(B), (int)(R)))
2025#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2026 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2027 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2028 (__v16sf)(__m512)(W)))
2030#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2031 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2032 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2033 (__v16sf)_mm512_setzero_ps()))
2038 return __builtin_ia32_selectss_128(__U, __A, __W);
2046#define _mm_mul_round_ss(A, B, R) \
2047 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2048 (__v4sf)(__m128)(B), \
2049 (__v4sf)_mm_setzero_ps(), \
2050 (__mmask8)-1, (int)(R)))
2052#define _mm_mask_mul_round_ss(W, U, A, B, R) \
2053 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2054 (__v4sf)(__m128)(B), \
2055 (__v4sf)(__m128)(W), (__mmask8)(U), \
2058#define _mm_maskz_mul_round_ss(U, A, B, R) \
2059 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2060 (__v4sf)(__m128)(B), \
2061 (__v4sf)_mm_setzero_ps(), \
2062 (__mmask8)(U), (int)(R)))
2067 return __builtin_ia32_selectsd_128(__U, __A, __W);
2076#define _mm_mul_round_sd(A, B, R) \
2077 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2078 (__v2df)(__m128d)(B), \
2079 (__v2df)_mm_setzero_pd(), \
2080 (__mmask8)-1, (int)(R)))
2082#define _mm_mask_mul_round_sd(W, U, A, B, R) \
2083 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2084 (__v2df)(__m128d)(B), \
2085 (__v2df)(__m128d)(W), \
2086 (__mmask8)(U), (int)(R)))
2088#define _mm_maskz_mul_round_sd(U, A, B, R) \
2089 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2090 (__v2df)(__m128d)(B), \
2091 (__v2df)_mm_setzero_pd(), \
2092 (__mmask8)(U), (int)(R)))
2096 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2103 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2110 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2117 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2122#define _mm512_mul_round_pd(A, B, R) \
2123 ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2124 (__v8df)(__m512d)(B), (int)(R)))
2126#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2127 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2128 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2129 (__v8df)(__m512d)(W)))
2131#define _mm512_maskz_mul_round_pd(U, A, B, R) \
2132 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2133 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2134 (__v8df)_mm512_setzero_pd()))
2136#define _mm512_mul_round_ps(A, B, R) \
2137 ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2138 (__v16sf)(__m512)(B), (int)(R)))
2140#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2141 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2142 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2143 (__v16sf)(__m512)(W)))
2145#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2146 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2147 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2148 (__v16sf)_mm512_setzero_ps()))
2153 return __builtin_ia32_selectss_128(__U, __A, __W);
2162#define _mm_div_round_ss(A, B, R) \
2163 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2164 (__v4sf)(__m128)(B), \
2165 (__v4sf)_mm_setzero_ps(), \
2166 (__mmask8)-1, (int)(R)))
2168#define _mm_mask_div_round_ss(W, U, A, B, R) \
2169 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2170 (__v4sf)(__m128)(B), \
2171 (__v4sf)(__m128)(W), (__mmask8)(U), \
2174#define _mm_maskz_div_round_ss(U, A, B, R) \
2175 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2176 (__v4sf)(__m128)(B), \
2177 (__v4sf)_mm_setzero_ps(), \
2178 (__mmask8)(U), (int)(R)))
2183 return __builtin_ia32_selectsd_128(__U, __A, __W);
2192#define _mm_div_round_sd(A, B, R) \
2193 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2194 (__v2df)(__m128d)(B), \
2195 (__v2df)_mm_setzero_pd(), \
2196 (__mmask8)-1, (int)(R)))
2198#define _mm_mask_div_round_sd(W, U, A, B, R) \
2199 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2200 (__v2df)(__m128d)(B), \
2201 (__v2df)(__m128d)(W), \
2202 (__mmask8)(U), (int)(R)))
2204#define _mm_maskz_div_round_sd(U, A, B, R) \
2205 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2206 (__v2df)(__m128d)(B), \
2207 (__v2df)_mm_setzero_pd(), \
2208 (__mmask8)(U), (int)(R)))
2210static __inline __m512d
2212 return (__m512d)((__v8df)
__a/(__v8df)
__b);
2217 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2224 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2231 return (__m512)((__v16sf)
__a/(__v16sf)
__b);
2236 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2243 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2248#define _mm512_div_round_pd(A, B, R) \
2249 ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2250 (__v8df)(__m512d)(B), (int)(R)))
2252#define _mm512_mask_div_round_pd(W, U, A, B, R) \
2253 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2254 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2255 (__v8df)(__m512d)(W)))
2257#define _mm512_maskz_div_round_pd(U, A, B, R) \
2258 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2259 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2260 (__v8df)_mm512_setzero_pd()))
2262#define _mm512_div_round_ps(A, B, R) \
2263 ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2264 (__v16sf)(__m512)(B), (int)(R)))
2266#define _mm512_mask_div_round_ps(W, U, A, B, R) \
2267 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2268 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2269 (__v16sf)(__m512)(W)))
2271#define _mm512_maskz_div_round_ps(U, A, B, R) \
2272 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2273 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2274 (__v16sf)_mm512_setzero_ps()))
2276#define _mm512_roundscale_ps(A, B) \
2277 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2278 (__v16sf)_mm512_undefined_ps(), \
2280 _MM_FROUND_CUR_DIRECTION))
2282#define _mm512_mask_roundscale_ps(A, B, C, imm) \
2283 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2284 (__v16sf)(__m512)(A), (__mmask16)(B), \
2285 _MM_FROUND_CUR_DIRECTION))
2287#define _mm512_maskz_roundscale_ps(A, B, imm) \
2288 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2289 (__v16sf)_mm512_setzero_ps(), \
2291 _MM_FROUND_CUR_DIRECTION))
2293#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2294 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2295 (__v16sf)(__m512)(A), (__mmask16)(B), \
2298#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2299 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2300 (__v16sf)_mm512_setzero_ps(), \
2301 (__mmask16)(A), (int)(R)))
2303#define _mm512_roundscale_round_ps(A, imm, R) \
2304 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2305 (__v16sf)_mm512_undefined_ps(), \
2306 (__mmask16)-1, (int)(R)))
2308#define _mm512_roundscale_pd(A, B) \
2309 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2310 (__v8df)_mm512_undefined_pd(), \
2312 _MM_FROUND_CUR_DIRECTION))
2314#define _mm512_mask_roundscale_pd(A, B, C, imm) \
2315 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2316 (__v8df)(__m512d)(A), (__mmask8)(B), \
2317 _MM_FROUND_CUR_DIRECTION))
2319#define _mm512_maskz_roundscale_pd(A, B, imm) \
2320 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2321 (__v8df)_mm512_setzero_pd(), \
2323 _MM_FROUND_CUR_DIRECTION))
2325#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2326 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2327 (__v8df)(__m512d)(A), (__mmask8)(B), \
2330#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2331 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2332 (__v8df)_mm512_setzero_pd(), \
2333 (__mmask8)(A), (int)(R)))
2335#define _mm512_roundscale_round_pd(A, imm, R) \
2336 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2337 (__v8df)_mm512_undefined_pd(), \
2338 (__mmask8)-1, (int)(R)))
2340#define _mm512_fmadd_round_pd(A, B, C, R) \
2341 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2342 (__v8df)(__m512d)(B), \
2343 (__v8df)(__m512d)(C), \
2344 (__mmask8)-1, (int)(R)))
2347#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2348 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2349 (__v8df)(__m512d)(B), \
2350 (__v8df)(__m512d)(C), \
2351 (__mmask8)(U), (int)(R)))
2354#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2355 ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2356 (__v8df)(__m512d)(B), \
2357 (__v8df)(__m512d)(C), \
2358 (__mmask8)(U), (int)(R)))
2361#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2362 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2363 (__v8df)(__m512d)(B), \
2364 (__v8df)(__m512d)(C), \
2365 (__mmask8)(U), (int)(R)))
2368#define _mm512_fmsub_round_pd(A, B, C, R) \
2369 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2370 (__v8df)(__m512d)(B), \
2371 -(__v8df)(__m512d)(C), \
2372 (__mmask8)-1, (int)(R)))
2375#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2376 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2377 (__v8df)(__m512d)(B), \
2378 -(__v8df)(__m512d)(C), \
2379 (__mmask8)(U), (int)(R)))
2382#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2383 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2384 (__v8df)(__m512d)(B), \
2385 -(__v8df)(__m512d)(C), \
2386 (__mmask8)(U), (int)(R)))
2389#define _mm512_fnmadd_round_pd(A, B, C, R) \
2390 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2391 (__v8df)(__m512d)(B), \
2392 (__v8df)(__m512d)(C), \
2393 (__mmask8)-1, (int)(R)))
2396#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2397 ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2398 (__v8df)(__m512d)(B), \
2399 (__v8df)(__m512d)(C), \
2400 (__mmask8)(U), (int)(R)))
2403#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2404 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2405 (__v8df)(__m512d)(B), \
2406 (__v8df)(__m512d)(C), \
2407 (__mmask8)(U), (int)(R)))
2410#define _mm512_fnmsub_round_pd(A, B, C, R) \
2411 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2412 (__v8df)(__m512d)(B), \
2413 -(__v8df)(__m512d)(C), \
2414 (__mmask8)-1, (int)(R)))
2417#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2418 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2419 (__v8df)(__m512d)(B), \
2420 -(__v8df)(__m512d)(C), \
2421 (__mmask8)(U), (int)(R)))
2425 return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
2431 return (__m512d)__builtin_ia32_selectpd_512(
2437 return (__m512d)__builtin_ia32_selectpd_512(
2443 return (__m512d)__builtin_ia32_selectpd_512(
2450 return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
2456 return (__m512d)__builtin_ia32_selectpd_512(
2462 return (__m512d)__builtin_ia32_selectpd_512(
2468 return (__m512d)__builtin_ia32_selectpd_512(
2475 return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
2481 return (__m512d)__builtin_ia32_selectpd_512(
2487 return (__m512d)__builtin_ia32_selectpd_512(
2493 return (__m512d)__builtin_ia32_selectpd_512(
2500 return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
2506 return (__m512d)__builtin_ia32_selectpd_512(
2512 return (__m512d)__builtin_ia32_selectpd_512(
2518 return (__m512d)__builtin_ia32_selectpd_512(
2523#define _mm512_fmadd_round_ps(A, B, C, R) \
2524 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2525 (__v16sf)(__m512)(B), \
2526 (__v16sf)(__m512)(C), \
2527 (__mmask16)-1, (int)(R)))
2530#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2531 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2532 (__v16sf)(__m512)(B), \
2533 (__v16sf)(__m512)(C), \
2534 (__mmask16)(U), (int)(R)))
2537#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2538 ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2539 (__v16sf)(__m512)(B), \
2540 (__v16sf)(__m512)(C), \
2541 (__mmask16)(U), (int)(R)))
2544#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2545 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2546 (__v16sf)(__m512)(B), \
2547 (__v16sf)(__m512)(C), \
2548 (__mmask16)(U), (int)(R)))
2551#define _mm512_fmsub_round_ps(A, B, C, R) \
2552 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2553 (__v16sf)(__m512)(B), \
2554 -(__v16sf)(__m512)(C), \
2555 (__mmask16)-1, (int)(R)))
2558#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2559 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2560 (__v16sf)(__m512)(B), \
2561 -(__v16sf)(__m512)(C), \
2562 (__mmask16)(U), (int)(R)))
2565#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2566 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2567 (__v16sf)(__m512)(B), \
2568 -(__v16sf)(__m512)(C), \
2569 (__mmask16)(U), (int)(R)))
2572#define _mm512_fnmadd_round_ps(A, B, C, R) \
2573 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2574 -(__v16sf)(__m512)(B), \
2575 (__v16sf)(__m512)(C), \
2576 (__mmask16)-1, (int)(R)))
2579#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2580 ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2581 (__v16sf)(__m512)(B), \
2582 (__v16sf)(__m512)(C), \
2583 (__mmask16)(U), (int)(R)))
2586#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2587 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2588 (__v16sf)(__m512)(B), \
2589 (__v16sf)(__m512)(C), \
2590 (__mmask16)(U), (int)(R)))
2593#define _mm512_fnmsub_round_ps(A, B, C, R) \
2594 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2595 -(__v16sf)(__m512)(B), \
2596 -(__v16sf)(__m512)(C), \
2597 (__mmask16)-1, (int)(R)))
2600#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2601 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2602 (__v16sf)(__m512)(B), \
2603 -(__v16sf)(__m512)(C), \
2604 (__mmask16)(U), (int)(R)))
2608 return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
2614 return (__m512)__builtin_ia32_selectps_512(
2620 return (__m512)__builtin_ia32_selectps_512(
2626 return (__m512)__builtin_ia32_selectps_512(
2633 return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
2639 return (__m512)__builtin_ia32_selectps_512(
2645 return (__m512)__builtin_ia32_selectps_512(
2651 return (__m512)__builtin_ia32_selectps_512(
2658 return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
2664 return (__m512)__builtin_ia32_selectps_512(
2670 return (__m512)__builtin_ia32_selectps_512(
2676 return (__m512)__builtin_ia32_selectps_512(
2683 return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
2689 return (__m512)__builtin_ia32_selectps_512(
2695 return (__m512)__builtin_ia32_selectps_512(
2701 return (__m512)__builtin_ia32_selectps_512(
2706#define _mm512_fmaddsub_round_pd(A, B, C, R) \
2707 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2708 (__v8df)(__m512d)(B), \
2709 (__v8df)(__m512d)(C), \
2710 (__mmask8)-1, (int)(R)))
2713#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2714 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2715 (__v8df)(__m512d)(B), \
2716 (__v8df)(__m512d)(C), \
2717 (__mmask8)(U), (int)(R)))
2720#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2721 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2722 (__v8df)(__m512d)(B), \
2723 (__v8df)(__m512d)(C), \
2724 (__mmask8)(U), (int)(R)))
2727#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2728 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2729 (__v8df)(__m512d)(B), \
2730 (__v8df)(__m512d)(C), \
2731 (__mmask8)(U), (int)(R)))
2734#define _mm512_fmsubadd_round_pd(A, B, C, R) \
2735 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2736 (__v8df)(__m512d)(B), \
2737 -(__v8df)(__m512d)(C), \
2738 (__mmask8)-1, (int)(R)))
2741#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2742 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2743 (__v8df)(__m512d)(B), \
2744 -(__v8df)(__m512d)(C), \
2745 (__mmask8)(U), (int)(R)))
2748#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2749 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2750 (__v8df)(__m512d)(B), \
2751 -(__v8df)(__m512d)(C), \
2752 (__mmask8)(U), (int)(R)))
2758 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2768 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2778 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2788 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2798 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2808 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2818 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2825#define _mm512_fmaddsub_round_ps(A, B, C, R) \
2826 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2827 (__v16sf)(__m512)(B), \
2828 (__v16sf)(__m512)(C), \
2829 (__mmask16)-1, (int)(R)))
2832#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2833 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2834 (__v16sf)(__m512)(B), \
2835 (__v16sf)(__m512)(C), \
2836 (__mmask16)(U), (int)(R)))
2839#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2840 ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2841 (__v16sf)(__m512)(B), \
2842 (__v16sf)(__m512)(C), \
2843 (__mmask16)(U), (int)(R)))
2846#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2847 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2848 (__v16sf)(__m512)(B), \
2849 (__v16sf)(__m512)(C), \
2850 (__mmask16)(U), (int)(R)))
2853#define _mm512_fmsubadd_round_ps(A, B, C, R) \
2854 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2855 (__v16sf)(__m512)(B), \
2856 -(__v16sf)(__m512)(C), \
2857 (__mmask16)-1, (int)(R)))
2860#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
2861 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2862 (__v16sf)(__m512)(B), \
2863 -(__v16sf)(__m512)(C), \
2864 (__mmask16)(U), (int)(R)))
2867#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
2868 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2869 (__v16sf)(__m512)(B), \
2870 -(__v16sf)(__m512)(C), \
2871 (__mmask16)(U), (int)(R)))
2877 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2887 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2897 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2907 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2917 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2927 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2937 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2944#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
2945 ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
2946 (__v8df)(__m512d)(B), \
2947 (__v8df)(__m512d)(C), \
2948 (__mmask8)(U), (int)(R)))
2950#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
2951 ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
2952 (__v16sf)(__m512)(B), \
2953 (__v16sf)(__m512)(C), \
2954 (__mmask16)(U), (int)(R)))
2956#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
2957 ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
2958 (__v8df)(__m512d)(B), \
2959 (__v8df)(__m512d)(C), \
2960 (__mmask8)(U), (int)(R)))
2966 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
2973#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
2974 ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
2975 (__v16sf)(__m512)(B), \
2976 (__v16sf)(__m512)(C), \
2977 (__mmask16)(U), (int)(R)))
2983 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
2990#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
2991 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2992 -(__v8df)(__m512d)(B), \
2993 (__v8df)(__m512d)(C), \
2994 (__mmask8)(U), (int)(R)))
2996#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
2997 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2998 -(__v16sf)(__m512)(B), \
2999 (__v16sf)(__m512)(C), \
3000 (__mmask16)(U), (int)(R)))
3002#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3003 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3004 -(__v8df)(__m512d)(B), \
3005 -(__v8df)(__m512d)(C), \
3006 (__mmask8)(U), (int)(R)))
3009#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3010 ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3011 (__v8df)(__m512d)(B), \
3012 (__v8df)(__m512d)(C), \
3013 (__mmask8)(U), (int)(R)))
3015#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3016 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3017 -(__v16sf)(__m512)(B), \
3018 -(__v16sf)(__m512)(C), \
3019 (__mmask16)(U), (int)(R)))
3022#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3023 ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3024 (__v16sf)(__m512)(B), \
3025 (__v16sf)(__m512)(C), \
3026 (__mmask16)(U), (int)(R)))
3032 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3039 return (__m512i)__builtin_ia32_selectd_512(__U,
3047 return (__m512i)__builtin_ia32_selectd_512(__U,
3055 return (__m512i)__builtin_ia32_selectd_512(__U,
3062 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3069 return (__m512i)__builtin_ia32_selectq_512(__U,
3077 return (__m512i)__builtin_ia32_selectq_512(__U,
3085 return (__m512i)__builtin_ia32_selectq_512(__U,
3090#define _mm512_alignr_epi64(A, B, I) \
3091 ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3092 (__v8di)(__m512i)(B), (int)(I)))
3094#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3095 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3096 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3097 (__v8di)(__m512i)(W)))
3099#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3100 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3101 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3102 (__v8di)_mm512_setzero_si512()))
3104#define _mm512_alignr_epi32(A, B, I) \
3105 ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3106 (__v16si)(__m512i)(B), (int)(I)))
3108#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3109 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3110 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3111 (__v16si)(__m512i)(W)))
3113#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3114 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3115 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3116 (__v16si)_mm512_setzero_si512()))
3119#define _mm512_extractf64x4_pd(A, I) \
3120 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3121 (__v4df)_mm256_setzero_pd(), \
3124#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3125 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3126 (__v4df)(__m256d)(W), \
3129#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3130 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3131 (__v4df)_mm256_setzero_pd(), \
3134#define _mm512_extractf32x4_ps(A, I) \
3135 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3136 (__v4sf)_mm_setzero_ps(), \
3139#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3140 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3141 (__v4sf)(__m128)(W), \
3144#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3145 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3146 (__v4sf)_mm_setzero_ps(), \
3153 return (__m512d) __builtin_ia32_selectpd_512 ((
__mmask8) __U,
3160 return (__m512) __builtin_ia32_selectps_512 ((
__mmask16) __U,
3167 return (__m512i) __builtin_ia32_selectq_512 ((
__mmask8) __U,
3174 return (__m512i) __builtin_ia32_selectd_512 ((
__mmask16) __U,
3181#define _mm512_cmp_round_ps_mask(A, B, P, R) \
3182 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3183 (__v16sf)(__m512)(B), (int)(P), \
3184 (__mmask16)-1, (int)(R)))
3186#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3187 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3188 (__v16sf)(__m512)(B), (int)(P), \
3189 (__mmask16)(U), (int)(R)))
3191#define _mm512_cmp_ps_mask(A, B, P) \
3192 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3193#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3194 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3196#define _mm512_cmpeq_ps_mask(A, B) \
3197 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3198#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3199 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3201#define _mm512_cmplt_ps_mask(A, B) \
3202 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3203#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3204 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3206#define _mm512_cmple_ps_mask(A, B) \
3207 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3208#define _mm512_mask_cmple_ps_mask(k, A, B) \
3209 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3211#define _mm512_cmpunord_ps_mask(A, B) \
3212 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3213#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3214 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3216#define _mm512_cmpneq_ps_mask(A, B) \
3217 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3218#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3219 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3221#define _mm512_cmpnlt_ps_mask(A, B) \
3222 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3223#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3224 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3226#define _mm512_cmpnle_ps_mask(A, B) \
3227 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3228#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3229 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3231#define _mm512_cmpord_ps_mask(A, B) \
3232 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3233#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3234 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3236#define _mm512_cmp_round_pd_mask(A, B, P, R) \
3237 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3238 (__v8df)(__m512d)(B), (int)(P), \
3239 (__mmask8)-1, (int)(R)))
3241#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3242 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3243 (__v8df)(__m512d)(B), (int)(P), \
3244 (__mmask8)(U), (int)(R)))
3246#define _mm512_cmp_pd_mask(A, B, P) \
3247 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3248#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3249 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3251#define _mm512_cmpeq_pd_mask(A, B) \
3252 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3253#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3254 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3256#define _mm512_cmplt_pd_mask(A, B) \
3257 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3258#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3259 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3261#define _mm512_cmple_pd_mask(A, B) \
3262 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3263#define _mm512_mask_cmple_pd_mask(k, A, B) \
3264 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3266#define _mm512_cmpunord_pd_mask(A, B) \
3267 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3268#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3269 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3271#define _mm512_cmpneq_pd_mask(A, B) \
3272 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3273#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3274 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3276#define _mm512_cmpnlt_pd_mask(A, B) \
3277 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3278#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3279 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3281#define _mm512_cmpnle_pd_mask(A, B) \
3282 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3283#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3284 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3286#define _mm512_cmpord_pd_mask(A, B) \
3287 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3288#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3289 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3293#define _mm512_cvtt_roundps_epu32(A, R) \
3294 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3295 (__v16si)_mm512_undefined_epi32(), \
3296 (__mmask16)-1, (int)(R)))
3298#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3299 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3300 (__v16si)(__m512i)(W), \
3301 (__mmask16)(U), (int)(R)))
3303#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3304 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3305 (__v16si)_mm512_setzero_si512(), \
3306 (__mmask16)(U), (int)(R)))
3312 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3322 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3331 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3337#define _mm512_cvt_roundepi32_ps(A, R) \
3338 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3339 (__v16sf)_mm512_setzero_ps(), \
3340 (__mmask16)-1, (int)(R)))
3342#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3343 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3344 (__v16sf)(__m512)(W), \
3345 (__mmask16)(U), (int)(R)))
3347#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3348 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3349 (__v16sf)_mm512_setzero_ps(), \
3350 (__mmask16)(U), (int)(R)))
3352#define _mm512_cvt_roundepu32_ps(A, R) \
3353 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3354 (__v16sf)_mm512_setzero_ps(), \
3355 (__mmask16)-1, (int)(R)))
3357#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3358 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3359 (__v16sf)(__m512)(W), \
3360 (__mmask16)(U), (int)(R)))
3362#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3363 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3364 (__v16sf)_mm512_setzero_ps(), \
3365 (__mmask16)(U), (int)(R)))
3367static __inline__ __m512
3369 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3374 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3381 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3388 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3393 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3400 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3417 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3422 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3429 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3436 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3441 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3448 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3463#define _mm512_cvt_roundpd_ps(A, R) \
3464 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3465 (__v8sf)_mm256_setzero_ps(), \
3466 (__mmask8)-1, (int)(R)))
3468#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3469 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3470 (__v8sf)(__m256)(W), (__mmask8)(U), \
3473#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3474 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3475 (__v8sf)_mm256_setzero_ps(), \
3476 (__mmask8)(U), (int)(R)))
3478static __inline__ __m256
3480 return (__m256)__builtin_ia32_cvtpd2ps512_mask(
3487 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3495 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3503 return (__m512) __builtin_shufflevector((__v8sf)
_mm512_cvtpd_ps(__A),
3505 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3510 return (__m512) __builtin_shufflevector (
3514 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3517#define _mm512_cvt_roundps_ph(A, I) \
3518 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3519 (__v16hi)_mm256_undefined_si256(), \
3522#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3523 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3524 (__v16hi)(__m256i)(U), \
3527#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3528 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3529 (__v16hi)_mm256_setzero_si256(), \
3532#define _mm512_cvtps_ph _mm512_cvt_roundps_ph
3533#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
3534#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3536#define _mm512_cvt_roundph_ps(A, R) \
3537 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3538 (__v16sf)_mm512_undefined_ps(), \
3539 (__mmask16)-1, (int)(R)))
3541#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3542 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3543 (__v16sf)(__m512)(W), \
3544 (__mmask16)(U), (int)(R)))
3546#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3547 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3548 (__v16sf)_mm512_setzero_ps(), \
3549 (__mmask16)(U), (int)(R)))
3555 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3565 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3574 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3580#define _mm512_cvtt_roundpd_epi32(A, R) \
3581 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3582 (__v8si)_mm256_setzero_si256(), \
3583 (__mmask8)-1, (int)(R)))
3585#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3586 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3587 (__v8si)(__m256i)(W), \
3588 (__mmask8)(U), (int)(R)))
3590#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3591 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3592 (__v8si)_mm256_setzero_si256(), \
3593 (__mmask8)(U), (int)(R)))
3598 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)
__a,
3607 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3616 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3622#define _mm512_cvtt_roundps_epi32(A, R) \
3623 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3624 (__v16si)_mm512_setzero_si512(), \
3625 (__mmask16)-1, (int)(R)))
3627#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3628 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3629 (__v16si)(__m512i)(W), \
3630 (__mmask16)(U), (int)(R)))
3632#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3633 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3634 (__v16si)_mm512_setzero_si512(), \
3635 (__mmask16)(U), (int)(R)))
3641 __builtin_ia32_cvttps2dq512_mask((__v16sf)
__a,
3649 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3658 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3664#define _mm512_cvt_roundps_epi32(A, R) \
3665 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3666 (__v16si)_mm512_setzero_si512(), \
3667 (__mmask16)-1, (int)(R)))
3669#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3670 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3671 (__v16si)(__m512i)(W), \
3672 (__mmask16)(U), (int)(R)))
3674#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3675 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3676 (__v16si)_mm512_setzero_si512(), \
3677 (__mmask16)(U), (int)(R)))
3682 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3691 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3700 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3707#define _mm512_cvt_roundpd_epi32(A, R) \
3708 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3709 (__v8si)_mm256_setzero_si256(), \
3710 (__mmask8)-1, (int)(R)))
3712#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3713 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3714 (__v8si)(__m256i)(W), \
3715 (__mmask8)(U), (int)(R)))
3717#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3718 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3719 (__v8si)_mm256_setzero_si256(), \
3720 (__mmask8)(U), (int)(R)))
3725 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3735 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3744 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3751#define _mm512_cvt_roundps_epu32(A, R) \
3752 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3753 (__v16si)_mm512_setzero_si512(), \
3754 (__mmask16)-1, (int)(R)))
3756#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
3757 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3758 (__v16si)(__m512i)(W), \
3759 (__mmask16)(U), (int)(R)))
3761#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
3762 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3763 (__v16si)_mm512_setzero_si512(), \
3764 (__mmask16)(U), (int)(R)))
3769 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
3779 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
3788 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
3795#define _mm512_cvt_roundpd_epu32(A, R) \
3796 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3797 (__v8si)_mm256_setzero_si256(), \
3798 (__mmask8)-1, (int)(R)))
3800#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
3801 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3802 (__v8si)(__m256i)(W), \
3803 (__mmask8)(U), (int)(R)))
3805#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
3806 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3807 (__v8si)_mm256_setzero_si256(), \
3808 (__mmask8)(U), (int)(R)))
3813 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3823 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3832 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3855 return (__m512d)__builtin_shufflevector((__v8df)
__a, (__v8df)
__b,
3856 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
3862 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3870 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3877 return (__m512d)__builtin_shufflevector((__v8df)
__a, (__v8df)
__b,
3878 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
3884 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3892 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3899 return (__m512)__builtin_shufflevector((__v16sf)
__a, (__v16sf)
__b,
3901 2+4, 18+4, 3+4, 19+4,
3902 2+8, 18+8, 3+8, 19+8,
3903 2+12, 18+12, 3+12, 19+12);
3909 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
3917 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
3924 return (__m512)__builtin_shufflevector((__v16sf)
__a, (__v16sf)
__b,
3926 0+4, 16+4, 1+4, 17+4,
3927 0+8, 16+8, 1+8, 17+8,
3928 0+12, 16+12, 1+12, 17+12);
3934 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
3942 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
3949 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
3951 2+4, 18+4, 3+4, 19+4,
3952 2+8, 18+8, 3+8, 19+8,
3953 2+12, 18+12, 3+12, 19+12);
3959 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
3967 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
3974 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
3976 0+4, 16+4, 1+4, 17+4,
3977 0+8, 16+8, 1+8, 17+8,
3978 0+12, 16+12, 1+12, 17+12);
3984 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
3992 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
3999 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4000 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4006 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4014 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4021 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4022 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4028 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4036 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4047 struct __loadu_si512 {
4050 return ((
const struct __loadu_si512*)
__P)->__v;
4056 struct __loadu_epi32 {
4059 return ((
const struct __loadu_epi32*)
__P)->__v;
4065 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const int *)
__P,
4074 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const int *)
__P,
4083 struct __loadu_epi64 {
4086 return ((
const struct __loadu_epi64*)
__P)->__v;
4092 return (__m512i) __builtin_ia32_loaddqudi512_mask ((
const long long *)
__P,
4100 return (__m512i) __builtin_ia32_loaddqudi512_mask ((
const long long *)
__P,
4109 return (__m512) __builtin_ia32_loadups512_mask ((
const float *)
__P,
4117 return (__m512) __builtin_ia32_loadups512_mask ((
const float *)
__P,
4126 return (__m512d) __builtin_ia32_loadupd512_mask ((
const double *)
__P,
4134 return (__m512d) __builtin_ia32_loadupd512_mask ((
const double *)
__P,
4146 return ((
const struct __loadu_pd*)
__p)->__v;
4155 return ((
const struct __loadu_ps*)
__p)->__v;
4161 return *(
const __m512*)
__p;
4167 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *)
__P,
4175 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *)
__P,
4184 return *(
const __m512d*)
__p;
4190 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *)
__P,
4198 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *)
__P,
4207 return *(
const __m512i *)
__P;
4213 return *(
const __m512i *)
__P;
4219 return *(
const __m512i *)
__P;
4227 struct __storeu_epi64 {
4230 ((
struct __storeu_epi64*)
__P)->
__v = __A;
4236 __builtin_ia32_storedqudi512_mask ((
long long *)
__P, (__v8di) __A,
4243 struct __storeu_si512 {
4246 ((
struct __storeu_si512*)
__P)->
__v = __A;
4252 struct __storeu_epi32 {
4255 ((
struct __storeu_epi32*)
__P)->
__v = __A;
4261 __builtin_ia32_storedqusi512_mask ((
int *)
__P, (__v16si) __A,
4268 __builtin_ia32_storeupd512_mask ((
double *)
__P, (__v8df) __A, (
__mmask8) __U);
4274 struct __storeu_pd {
4277 ((
struct __storeu_pd*)
__P)->
__v = __A;
4283 __builtin_ia32_storeups512_mask ((
float *)
__P, (__v16sf) __A,
4290 struct __storeu_ps {
4293 ((
struct __storeu_ps*)
__P)->
__v = __A;
4299 __builtin_ia32_storeapd512_mask ((__v8df *)
__P, (__v8df) __A, (
__mmask8) __U);
4305 *(__m512d*)
__P = __A;
4311 __builtin_ia32_storeaps512_mask ((__v16sf *)
__P, (__v16sf) __A,
4318 *(__m512*)
__P = __A;
4324 *(__m512i *)
__P = __A;
4330 *(__m512i *)
__P = __A;
4336 *(__m512i *)
__P = __A;
4343 return __builtin_ia32_knothi(__M);
4348#define _mm512_cmpeq_epi32_mask(A, B) \
4349 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4350#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4351 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4352#define _mm512_cmpge_epi32_mask(A, B) \
4353 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4354#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4355 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4356#define _mm512_cmpgt_epi32_mask(A, B) \
4357 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4358#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4359 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4360#define _mm512_cmple_epi32_mask(A, B) \
4361 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4362#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4363 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4364#define _mm512_cmplt_epi32_mask(A, B) \
4365 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4366#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4367 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4368#define _mm512_cmpneq_epi32_mask(A, B) \
4369 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4370#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4371 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4373#define _mm512_cmpeq_epu32_mask(A, B) \
4374 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4375#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4376 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4377#define _mm512_cmpge_epu32_mask(A, B) \
4378 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4379#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4380 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4381#define _mm512_cmpgt_epu32_mask(A, B) \
4382 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4383#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4384 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4385#define _mm512_cmple_epu32_mask(A, B) \
4386 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4387#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4388 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4389#define _mm512_cmplt_epu32_mask(A, B) \
4390 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4391#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4392 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4393#define _mm512_cmpneq_epu32_mask(A, B) \
4394 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4395#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4396 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4398#define _mm512_cmpeq_epi64_mask(A, B) \
4399 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4400#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4401 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4402#define _mm512_cmpge_epi64_mask(A, B) \
4403 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4404#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4405 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4406#define _mm512_cmpgt_epi64_mask(A, B) \
4407 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4408#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4409 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4410#define _mm512_cmple_epi64_mask(A, B) \
4411 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4412#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4413 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4414#define _mm512_cmplt_epi64_mask(A, B) \
4415 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4416#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4417 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4418#define _mm512_cmpneq_epi64_mask(A, B) \
4419 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4420#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4421 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4423#define _mm512_cmpeq_epu64_mask(A, B) \
4424 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4425#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4426 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4427#define _mm512_cmpge_epu64_mask(A, B) \
4428 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4429#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4430 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4431#define _mm512_cmpgt_epu64_mask(A, B) \
4432 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4433#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4434 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4435#define _mm512_cmple_epu64_mask(A, B) \
4436 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4437#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4438 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4439#define _mm512_cmplt_epu64_mask(A, B) \
4440 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4441#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4442 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4443#define _mm512_cmpneq_epu64_mask(A, B) \
4444 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4445#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4446 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4452 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4457 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4464 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4473 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4478 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4485 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4492 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4497 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4504 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4511 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4516 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4523 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4530 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4535 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4542 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4549 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4554 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4561 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4568 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4573 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4580 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4587 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4592 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4599 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4606 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4611 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4618 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4625 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4630 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4637 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4645 return (__m512i)__builtin_elementwise_fshr((__v16su)__A,(__v16su)__A, (__v16su)__B);
4651 return (__m512i)__builtin_ia32_selectd_512(__U,
4659 return (__m512i)__builtin_ia32_selectd_512(__U,
4667 return (__m512i)__builtin_elementwise_fshr((__v8du)__A, (__v8du)__A, (__v8du)__B);
4673 return (__m512i)__builtin_ia32_selectq_512(__U,
4681 return (__m512i)__builtin_ia32_selectq_512(__U,
4688#define _mm512_cmp_epi32_mask(a, b, p) \
4689 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4690 (__v16si)(__m512i)(b), (int)(p), \
4693#define _mm512_cmp_epu32_mask(a, b, p) \
4694 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4695 (__v16si)(__m512i)(b), (int)(p), \
4698#define _mm512_cmp_epi64_mask(a, b, p) \
4699 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4700 (__v8di)(__m512i)(b), (int)(p), \
4703#define _mm512_cmp_epu64_mask(a, b, p) \
4704 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4705 (__v8di)(__m512i)(b), (int)(p), \
4708#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
4709 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4710 (__v16si)(__m512i)(b), (int)(p), \
4713#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
4714 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4715 (__v16si)(__m512i)(b), (int)(p), \
4718#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
4719 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4720 (__v8di)(__m512i)(b), (int)(p), \
4723#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
4724 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4725 (__v8di)(__m512i)(b), (int)(p), \
4728#define _mm512_rol_epi32(a, b) \
4729 ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)))
4731#define _mm512_mask_rol_epi32(W, U, a, b) \
4732 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4733 (__v16si)_mm512_rol_epi32((a), (b)), \
4734 (__v16si)(__m512i)(W)))
4736#define _mm512_maskz_rol_epi32(U, a, b) \
4737 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4738 (__v16si)_mm512_rol_epi32((a), (b)), \
4739 (__v16si)_mm512_setzero_si512()))
4741#define _mm512_rol_epi64(a, b) \
4742 ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)))
4744#define _mm512_mask_rol_epi64(W, U, a, b) \
4745 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4746 (__v8di)_mm512_rol_epi64((a), (b)), \
4747 (__v8di)(__m512i)(W)))
4749#define _mm512_maskz_rol_epi64(U, a, b) \
4750 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4751 (__v8di)_mm512_rol_epi64((a), (b)), \
4752 (__v8di)_mm512_setzero_si512()))
4757 return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__A, (__v16su)__B);
4763 return (__m512i)__builtin_ia32_selectd_512(__U,
4771 return (__m512i)__builtin_ia32_selectd_512(__U,
4779 return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__A, (__v8du)__B);
4785 return (__m512i)__builtin_ia32_selectq_512(__U,
4793 return (__m512i)__builtin_ia32_selectq_512(__U,
4798#define _mm512_ror_epi32(A, B) \
4799 ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)))
4801#define _mm512_mask_ror_epi32(W, U, A, B) \
4802 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4803 (__v16si)_mm512_ror_epi32((A), (B)), \
4804 (__v16si)(__m512i)(W)))
4806#define _mm512_maskz_ror_epi32(U, A, B) \
4807 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4808 (__v16si)_mm512_ror_epi32((A), (B)), \
4809 (__v16si)_mm512_setzero_si512()))
4811#define _mm512_ror_epi64(A, B) \
4812 ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)))
4814#define _mm512_mask_ror_epi64(W, U, A, B) \
4815 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4816 (__v8di)_mm512_ror_epi64((A), (B)), \
4817 (__v8di)(__m512i)(W)))
4819#define _mm512_maskz_ror_epi64(U, A, B) \
4820 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4821 (__v8di)_mm512_ror_epi64((A), (B)), \
4822 (__v8di)_mm512_setzero_si512()))
4826 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (
int)__B);
4832 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4839 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4846 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (
int)__B);
4852 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4859 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4866 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (
int)__B);
4872 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4879 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4886 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (
int)__B);
4892 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4899 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4907 return (__m512i) __builtin_ia32_movdqa32load512_mask ((
const __v16si *)
__P,
4915 return (__m512i) __builtin_ia32_movdqa32load512_mask ((
const __v16si *)
__P,
4924 __builtin_ia32_movdqa32store512_mask ((__v16si *)
__P, (__v16si) __A,
4931 return (__m512i) __builtin_ia32_selectd_512 ((
__mmask16) __U,
4939 return (__m512i) __builtin_ia32_selectd_512 ((
__mmask16) __U,
4947 return (__m512i) __builtin_ia32_selectq_512 ((
__mmask8) __U,
4955 return (__m512i) __builtin_ia32_selectq_512 ((
__mmask8) __U,
4963 return (__m512i) __builtin_ia32_movdqa64load512_mask ((
const __v8di *)
__P,
4971 return (__m512i) __builtin_ia32_movdqa64load512_mask ((
const __v8di *)
__P,
4980 __builtin_ia32_movdqa64store512_mask ((__v8di *)
__P, (__v8di) __A,
4987 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
4988 0, 0, 2, 2, 4, 4, 6, 6);
4994 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
5002 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
5007#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5008 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5009 (__v8df)(__m512d)(B), \
5010 (__v8di)(__m512i)(C), (int)(imm), \
5011 (__mmask8)-1, (int)(R)))
5013#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5014 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5015 (__v8df)(__m512d)(B), \
5016 (__v8di)(__m512i)(C), (int)(imm), \
5017 (__mmask8)(U), (int)(R)))
5019#define _mm512_fixupimm_pd(A, B, C, imm) \
5020 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5021 (__v8df)(__m512d)(B), \
5022 (__v8di)(__m512i)(C), (int)(imm), \
5024 _MM_FROUND_CUR_DIRECTION))
5026#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5027 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5028 (__v8df)(__m512d)(B), \
5029 (__v8di)(__m512i)(C), (int)(imm), \
5031 _MM_FROUND_CUR_DIRECTION))
5033#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5034 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5035 (__v8df)(__m512d)(B), \
5036 (__v8di)(__m512i)(C), \
5037 (int)(imm), (__mmask8)(U), \
5040#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5041 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5042 (__v8df)(__m512d)(B), \
5043 (__v8di)(__m512i)(C), \
5044 (int)(imm), (__mmask8)(U), \
5045 _MM_FROUND_CUR_DIRECTION))
5047#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5048 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5049 (__v16sf)(__m512)(B), \
5050 (__v16si)(__m512i)(C), (int)(imm), \
5051 (__mmask16)-1, (int)(R)))
5053#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5054 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5055 (__v16sf)(__m512)(B), \
5056 (__v16si)(__m512i)(C), (int)(imm), \
5057 (__mmask16)(U), (int)(R)))
5059#define _mm512_fixupimm_ps(A, B, C, imm) \
5060 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5061 (__v16sf)(__m512)(B), \
5062 (__v16si)(__m512i)(C), (int)(imm), \
5064 _MM_FROUND_CUR_DIRECTION))
5066#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5067 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5068 (__v16sf)(__m512)(B), \
5069 (__v16si)(__m512i)(C), (int)(imm), \
5071 _MM_FROUND_CUR_DIRECTION))
5073#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5074 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5075 (__v16sf)(__m512)(B), \
5076 (__v16si)(__m512i)(C), \
5077 (int)(imm), (__mmask16)(U), \
5080#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5081 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5082 (__v16sf)(__m512)(B), \
5083 (__v16si)(__m512i)(C), \
5084 (int)(imm), (__mmask16)(U), \
5085 _MM_FROUND_CUR_DIRECTION))
5087#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5088 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5089 (__v2df)(__m128d)(B), \
5090 (__v2di)(__m128i)(C), (int)(imm), \
5091 (__mmask8)-1, (int)(R)))
5093#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5094 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5095 (__v2df)(__m128d)(B), \
5096 (__v2di)(__m128i)(C), (int)(imm), \
5097 (__mmask8)(U), (int)(R)))
5099#define _mm_fixupimm_sd(A, B, C, imm) \
5100 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5101 (__v2df)(__m128d)(B), \
5102 (__v2di)(__m128i)(C), (int)(imm), \
5104 _MM_FROUND_CUR_DIRECTION))
5106#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5107 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5108 (__v2df)(__m128d)(B), \
5109 (__v2di)(__m128i)(C), (int)(imm), \
5111 _MM_FROUND_CUR_DIRECTION))
5113#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5114 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5115 (__v2df)(__m128d)(B), \
5116 (__v2di)(__m128i)(C), (int)(imm), \
5117 (__mmask8)(U), (int)(R)))
5119#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5120 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5121 (__v2df)(__m128d)(B), \
5122 (__v2di)(__m128i)(C), (int)(imm), \
5124 _MM_FROUND_CUR_DIRECTION))
5126#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5127 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5128 (__v4sf)(__m128)(B), \
5129 (__v4si)(__m128i)(C), (int)(imm), \
5130 (__mmask8)-1, (int)(R)))
5132#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5133 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5134 (__v4sf)(__m128)(B), \
5135 (__v4si)(__m128i)(C), (int)(imm), \
5136 (__mmask8)(U), (int)(R)))
5138#define _mm_fixupimm_ss(A, B, C, imm) \
5139 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5140 (__v4sf)(__m128)(B), \
5141 (__v4si)(__m128i)(C), (int)(imm), \
5143 _MM_FROUND_CUR_DIRECTION))
5145#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5146 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5147 (__v4sf)(__m128)(B), \
5148 (__v4si)(__m128i)(C), (int)(imm), \
5150 _MM_FROUND_CUR_DIRECTION))
5152#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5153 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5154 (__v4sf)(__m128)(B), \
5155 (__v4si)(__m128i)(C), (int)(imm), \
5156 (__mmask8)(U), (int)(R)))
5158#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5159 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5160 (__v4sf)(__m128)(B), \
5161 (__v4si)(__m128i)(C), (int)(imm), \
5163 _MM_FROUND_CUR_DIRECTION))
5165#define _mm_getexp_round_sd(A, B, R) \
5166 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5167 (__v2df)(__m128d)(B), \
5168 (__v2df)_mm_setzero_pd(), \
5169 (__mmask8)-1, (int)(R)))
5175 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5182 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5189#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5190 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5191 (__v2df)(__m128d)(B), \
5192 (__v2df)(__m128d)(W), \
5193 (__mmask8)(U), (int)(R)))
5198 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5205#define _mm_maskz_getexp_round_sd(U, A, B, R) \
5206 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5207 (__v2df)(__m128d)(B), \
5208 (__v2df)_mm_setzero_pd(), \
5209 (__mmask8)(U), (int)(R)))
5211#define _mm_getexp_round_ss(A, B, R) \
5212 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5213 (__v4sf)(__m128)(B), \
5214 (__v4sf)_mm_setzero_ps(), \
5215 (__mmask8)-1, (int)(R)))
5220 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5227 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5234#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5235 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5236 (__v4sf)(__m128)(B), \
5237 (__v4sf)(__m128)(W), \
5238 (__mmask8)(U), (int)(R)))
5243 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5250#define _mm_maskz_getexp_round_ss(U, A, B, R) \
5251 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5252 (__v4sf)(__m128)(B), \
5253 (__v4sf)_mm_setzero_ps(), \
5254 (__mmask8)(U), (int)(R)))
5256#define _mm_getmant_round_sd(A, B, C, D, R) \
5257 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5258 (__v2df)(__m128d)(B), \
5259 (int)(((D)<<2) | (C)), \
5260 (__v2df)_mm_setzero_pd(), \
5261 (__mmask8)-1, (int)(R)))
5263#define _mm_getmant_sd(A, B, C, D) \
5264 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5265 (__v2df)(__m128d)(B), \
5266 (int)(((D)<<2) | (C)), \
5267 (__v2df)_mm_setzero_pd(), \
5269 _MM_FROUND_CUR_DIRECTION))
5271#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5272 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5273 (__v2df)(__m128d)(B), \
5274 (int)(((D)<<2) | (C)), \
5275 (__v2df)(__m128d)(W), \
5277 _MM_FROUND_CUR_DIRECTION))
5279#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5280 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5281 (__v2df)(__m128d)(B), \
5282 (int)(((D)<<2) | (C)), \
5283 (__v2df)(__m128d)(W), \
5284 (__mmask8)(U), (int)(R)))
5286#define _mm_maskz_getmant_sd(U, A, B, C, D) \
5287 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5288 (__v2df)(__m128d)(B), \
5289 (int)(((D)<<2) | (C)), \
5290 (__v2df)_mm_setzero_pd(), \
5292 _MM_FROUND_CUR_DIRECTION))
5294#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5295 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5296 (__v2df)(__m128d)(B), \
5297 (int)(((D)<<2) | (C)), \
5298 (__v2df)_mm_setzero_pd(), \
5299 (__mmask8)(U), (int)(R)))
5301#define _mm_getmant_round_ss(A, B, C, D, R) \
5302 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5303 (__v4sf)(__m128)(B), \
5304 (int)(((D)<<2) | (C)), \
5305 (__v4sf)_mm_setzero_ps(), \
5306 (__mmask8)-1, (int)(R)))
5308#define _mm_getmant_ss(A, B, C, D) \
5309 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5310 (__v4sf)(__m128)(B), \
5311 (int)(((D)<<2) | (C)), \
5312 (__v4sf)_mm_setzero_ps(), \
5314 _MM_FROUND_CUR_DIRECTION))
5316#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5317 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5318 (__v4sf)(__m128)(B), \
5319 (int)(((D)<<2) | (C)), \
5320 (__v4sf)(__m128)(W), \
5322 _MM_FROUND_CUR_DIRECTION))
5324#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5325 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5326 (__v4sf)(__m128)(B), \
5327 (int)(((D)<<2) | (C)), \
5328 (__v4sf)(__m128)(W), \
5329 (__mmask8)(U), (int)(R)))
5331#define _mm_maskz_getmant_ss(U, A, B, C, D) \
5332 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5333 (__v4sf)(__m128)(B), \
5334 (int)(((D)<<2) | (C)), \
5335 (__v4sf)_mm_setzero_ps(), \
5337 _MM_FROUND_CUR_DIRECTION))
5339#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5340 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5341 (__v4sf)(__m128)(B), \
5342 (int)(((D)<<2) | (C)), \
5343 (__v4sf)_mm_setzero_ps(), \
5344 (__mmask8)(U), (int)(R)))
5351#define _mm_comi_round_sd(A, B, P, R) \
5352 ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5353 (int)(P), (int)(R)))
5355#define _mm_comi_round_ss(A, B, P, R) \
5356 ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5357 (int)(P), (int)(R)))
5360#define _mm_cvt_roundsd_si64(A, R) \
5361 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5364static __inline__ __m512i
5366 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5371 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5378 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5385 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5390 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5397 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5404 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)
__Y);
5409 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5416 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5424 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)
__Y);
5430 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5438 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5445 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5450 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5457 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5464 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5469 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5476 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5483 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)
__Y);
5488 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5495 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5503 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)
__Y);
5509 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5517 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5524 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5529 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5536 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5543 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5548 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5555 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5562 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)
__Y);
5567 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5574 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5582 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)
__Y);
5588 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5596 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5610#define _mm512_ternarylogic_epi32(A, B, C, imm) \
5611 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5612 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5613 (unsigned char)(imm), (__mmask16)-1))
5615#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
5616 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5617 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5618 (unsigned char)(imm), (__mmask16)(U)))
5620#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
5621 ((__m512i)__builtin_ia32_pternlogd512_maskz( \
5622 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5623 (unsigned char)(imm), (__mmask16)(U)))
5625#define _mm512_ternarylogic_epi64(A, B, C, imm) \
5626 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5627 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5628 (unsigned char)(imm), (__mmask8)-1))
5630#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5631 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5632 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5633 (unsigned char)(imm), (__mmask8)(U)))
5635#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5636 ((__m512i)__builtin_ia32_pternlogq512_maskz( \
5637 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5638 (unsigned char)(imm), (__mmask8)(U)))
5641#define _mm_cvt_roundsd_i64(A, R) \
5642 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5645#define _mm_cvt_roundsd_si32(A, R) \
5646 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5648#define _mm_cvt_roundsd_i32(A, R) \
5649 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5651#define _mm_cvt_roundsd_u32(A, R) \
5652 ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5657 return (
unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5662#define _mm_cvt_roundsd_u64(A, R) \
5663 ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
5667_mm_cvtsd_u64 (__m128d __A)
5669 return (
unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
5675#define _mm_cvt_roundss_si32(A, R) \
5676 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5678#define _mm_cvt_roundss_i32(A, R) \
5679 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5682#define _mm_cvt_roundss_si64(A, R) \
5683 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5685#define _mm_cvt_roundss_i64(A, R) \
5686 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5689#define _mm_cvt_roundss_u32(A, R) \
5690 ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)))
5695 return (
unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
5700#define _mm_cvt_roundss_u64(A, R) \
5701 ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
5705_mm_cvtss_u64 (__m128 __A)
5707 return (
unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
5713#define _mm_cvtt_roundsd_i32(A, R) \
5714 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
5716#define _mm_cvtt_roundsd_si32(A, R) \
5717 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
5722 return (
int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
5727#define _mm_cvtt_roundsd_si64(A, R) \
5728 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
5730#define _mm_cvtt_roundsd_i64(A, R) \
5731 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
5734_mm_cvttsd_i64 (__m128d __A)
5736 return (
long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
5741#define _mm_cvtt_roundsd_u32(A, R) \
5742 ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5747 return (
unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
5752#define _mm_cvtt_roundsd_u64(A, R) \
5753 ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
5757_mm_cvttsd_u64 (__m128d __A)
5759 return (
unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
5765#define _mm_cvtt_roundss_i32(A, R) \
5766 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
5768#define _mm_cvtt_roundss_si32(A, R) \
5769 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
5774 return (
int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
5779#define _mm_cvtt_roundss_i64(A, R) \
5780 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
5782#define _mm_cvtt_roundss_si64(A, R) \
5783 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
5786_mm_cvttss_i64 (__m128 __A)
5788 return (
long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
5793#define _mm_cvtt_roundss_u32(A, R) \
5794 ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)))
5799 return (
unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
5804#define _mm_cvtt_roundss_u64(A, R) \
5805 ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
5809_mm_cvttss_u64 (__m128 __A)
5811 return (
unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
5817#define _mm512_permute_pd(X, C) \
5818 ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)))
5820#define _mm512_mask_permute_pd(W, U, X, C) \
5821 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
5822 (__v8df)_mm512_permute_pd((X), (C)), \
5823 (__v8df)(__m512d)(W)))
5825#define _mm512_maskz_permute_pd(U, X, C) \
5826 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
5827 (__v8df)_mm512_permute_pd((X), (C)), \
5828 (__v8df)_mm512_setzero_pd()))
5830#define _mm512_permute_ps(X, C) \
5831 ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)))
5833#define _mm512_mask_permute_ps(W, U, X, C) \
5834 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
5835 (__v16sf)_mm512_permute_ps((X), (C)), \
5836 (__v16sf)(__m512)(W)))
5838#define _mm512_maskz_permute_ps(U, X, C) \
5839 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
5840 (__v16sf)_mm512_permute_ps((X), (C)), \
5841 (__v16sf)_mm512_setzero_ps()))
5845 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
5850 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
5857 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
5864 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
5869 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
5876 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
5883 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
5890 return (__m512d)__builtin_ia32_selectpd_512(__U,
5898 return (__m512d)__builtin_ia32_selectpd_512(__U,
5900 (__v8df)(__m512d)__I);
5906 return (__m512d)__builtin_ia32_selectpd_512(__U,
5913 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
5920 return (__m512)__builtin_ia32_selectps_512(__U,
5928 return (__m512)__builtin_ia32_selectps_512(__U,
5930 (__v16sf)(__m512)__I);
5936 return (__m512)__builtin_ia32_selectps_512(__U,
5941#define _mm512_cvtt_roundpd_epu32(A, R) \
5942 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5943 (__v8si)_mm256_undefined_si256(), \
5944 (__mmask8)-1, (int)(R)))
5946#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
5947 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5948 (__v8si)(__m256i)(W), \
5949 (__mmask8)(U), (int)(R)))
5951#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
5952 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5953 (__v8si)_mm256_setzero_si256(), \
5954 (__mmask8)(U), (int)(R)))
5959 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5969 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5978 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5985#define _mm_roundscale_round_sd(A, B, imm, R) \
5986 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
5987 (__v2df)(__m128d)(B), \
5988 (__v2df)_mm_setzero_pd(), \
5989 (__mmask8)-1, (int)(imm), \
5992#define _mm_roundscale_sd(A, B, imm) \
5993 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
5994 (__v2df)(__m128d)(B), \
5995 (__v2df)_mm_setzero_pd(), \
5996 (__mmask8)-1, (int)(imm), \
5997 _MM_FROUND_CUR_DIRECTION))
5999#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6000 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6001 (__v2df)(__m128d)(B), \
6002 (__v2df)(__m128d)(W), \
6003 (__mmask8)(U), (int)(imm), \
6004 _MM_FROUND_CUR_DIRECTION))
6006#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6007 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6008 (__v2df)(__m128d)(B), \
6009 (__v2df)(__m128d)(W), \
6010 (__mmask8)(U), (int)(I), \
6013#define _mm_maskz_roundscale_sd(U, A, B, I) \
6014 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6015 (__v2df)(__m128d)(B), \
6016 (__v2df)_mm_setzero_pd(), \
6017 (__mmask8)(U), (int)(I), \
6018 _MM_FROUND_CUR_DIRECTION))
6020#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6021 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6022 (__v2df)(__m128d)(B), \
6023 (__v2df)_mm_setzero_pd(), \
6024 (__mmask8)(U), (int)(I), \
6027#define _mm_roundscale_round_ss(A, B, imm, R) \
6028 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6029 (__v4sf)(__m128)(B), \
6030 (__v4sf)_mm_setzero_ps(), \
6031 (__mmask8)-1, (int)(imm), \
6034#define _mm_roundscale_ss(A, B, imm) \
6035 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6036 (__v4sf)(__m128)(B), \
6037 (__v4sf)_mm_setzero_ps(), \
6038 (__mmask8)-1, (int)(imm), \
6039 _MM_FROUND_CUR_DIRECTION))
6041#define _mm_mask_roundscale_ss(W, U, A, B, I) \
6042 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6043 (__v4sf)(__m128)(B), \
6044 (__v4sf)(__m128)(W), \
6045 (__mmask8)(U), (int)(I), \
6046 _MM_FROUND_CUR_DIRECTION))
6048#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6049 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6050 (__v4sf)(__m128)(B), \
6051 (__v4sf)(__m128)(W), \
6052 (__mmask8)(U), (int)(I), \
6055#define _mm_maskz_roundscale_ss(U, A, B, I) \
6056 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6057 (__v4sf)(__m128)(B), \
6058 (__v4sf)_mm_setzero_ps(), \
6059 (__mmask8)(U), (int)(I), \
6060 _MM_FROUND_CUR_DIRECTION))
6062#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6063 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6064 (__v4sf)(__m128)(B), \
6065 (__v4sf)_mm_setzero_ps(), \
6066 (__mmask8)(U), (int)(I), \
6069#define _mm512_scalef_round_pd(A, B, R) \
6070 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6071 (__v8df)(__m512d)(B), \
6072 (__v8df)_mm512_undefined_pd(), \
6073 (__mmask8)-1, (int)(R)))
6075#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6076 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6077 (__v8df)(__m512d)(B), \
6078 (__v8df)(__m512d)(W), \
6079 (__mmask8)(U), (int)(R)))
6081#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6082 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6083 (__v8df)(__m512d)(B), \
6084 (__v8df)_mm512_setzero_pd(), \
6085 (__mmask8)(U), (int)(R)))
6090 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6101 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6111 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6119#define _mm512_scalef_round_ps(A, B, R) \
6120 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6121 (__v16sf)(__m512)(B), \
6122 (__v16sf)_mm512_undefined_ps(), \
6123 (__mmask16)-1, (int)(R)))
6125#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6126 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6127 (__v16sf)(__m512)(B), \
6128 (__v16sf)(__m512)(W), \
6129 (__mmask16)(U), (int)(R)))
6131#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6132 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6133 (__v16sf)(__m512)(B), \
6134 (__v16sf)_mm512_setzero_ps(), \
6135 (__mmask16)(U), (int)(R)))
6140 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6151 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6161 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6169#define _mm_scalef_round_sd(A, B, R) \
6170 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6171 (__v2df)(__m128d)(B), \
6172 (__v2df)_mm_setzero_pd(), \
6173 (__mmask8)-1, (int)(R)))
6178 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6187 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6194#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6195 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6196 (__v2df)(__m128d)(B), \
6197 (__v2df)(__m128d)(W), \
6198 (__mmask8)(U), (int)(R)))
6203 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6210#define _mm_maskz_scalef_round_sd(U, A, B, R) \
6211 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6212 (__v2df)(__m128d)(B), \
6213 (__v2df)_mm_setzero_pd(), \
6214 (__mmask8)(U), (int)(R)))
6216#define _mm_scalef_round_ss(A, B, R) \
6217 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6218 (__v4sf)(__m128)(B), \
6219 (__v4sf)_mm_setzero_ps(), \
6220 (__mmask8)-1, (int)(R)))
6225 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6234 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6241#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6242 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6243 (__v4sf)(__m128)(B), \
6244 (__v4sf)(__m128)(W), \
6245 (__mmask8)(U), (int)(R)))
6250 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6257#define _mm_maskz_scalef_round_ss(U, A, B, R) \
6258 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6259 (__v4sf)(__m128)(B), \
6260 (__v4sf)_mm_setzero_ps(), \
6266 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (
int)__B);
6272 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
6279 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
6286 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (
int)__B);
6292 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
6299 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
6304#define _mm512_shuffle_f32x4(A, B, imm) \
6305 ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6306 (__v16sf)(__m512)(B), (int)(imm)))
6308#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6309 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6310 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6311 (__v16sf)(__m512)(W)))
6313#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6314 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6315 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6316 (__v16sf)_mm512_setzero_ps()))
6318#define _mm512_shuffle_f64x2(A, B, imm) \
6319 ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6320 (__v8df)(__m512d)(B), (int)(imm)))
6322#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6323 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6324 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6325 (__v8df)(__m512d)(W)))
6327#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6328 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6329 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6330 (__v8df)_mm512_setzero_pd()))
6332#define _mm512_shuffle_i32x4(A, B, imm) \
6333 ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6334 (__v16si)(__m512i)(B), (int)(imm)))
6336#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6337 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6338 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6339 (__v16si)(__m512i)(W)))
6341#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6342 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6343 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6344 (__v16si)_mm512_setzero_si512()))
6346#define _mm512_shuffle_i64x2(A, B, imm) \
6347 ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6348 (__v8di)(__m512i)(B), (int)(imm)))
6350#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6351 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6352 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6353 (__v8di)(__m512i)(W)))
6355#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6356 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6357 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6358 (__v8di)_mm512_setzero_si512()))
6360#define _mm512_shuffle_pd(A, B, M) \
6361 ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6362 (__v8df)(__m512d)(B), (int)(M)))
6364#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6365 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6366 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6367 (__v8df)(__m512d)(W)))
6369#define _mm512_maskz_shuffle_pd(U, A, B, M) \
6370 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6371 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6372 (__v8df)_mm512_setzero_pd()))
6374#define _mm512_shuffle_ps(A, B, M) \
6375 ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6376 (__v16sf)(__m512)(B), (int)(M)))
6378#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6379 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6380 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6381 (__v16sf)(__m512)(W)))
6383#define _mm512_maskz_shuffle_ps(U, A, B, M) \
6384 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6385 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6386 (__v16sf)_mm512_setzero_ps()))
6388#define _mm_sqrt_round_sd(A, B, R) \
6389 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6390 (__v2df)(__m128d)(B), \
6391 (__v2df)_mm_setzero_pd(), \
6392 (__mmask8)-1, (int)(R)))
6397 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6404#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6405 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6406 (__v2df)(__m128d)(B), \
6407 (__v2df)(__m128d)(W), \
6408 (__mmask8)(U), (int)(R)))
6413 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6420#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6421 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6422 (__v2df)(__m128d)(B), \
6423 (__v2df)_mm_setzero_pd(), \
6424 (__mmask8)(U), (int)(R)))
6426#define _mm_sqrt_round_ss(A, B, R) \
6427 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6428 (__v4sf)(__m128)(B), \
6429 (__v4sf)_mm_setzero_ps(), \
6430 (__mmask8)-1, (int)(R)))
6435 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6442#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6443 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6444 (__v4sf)(__m128)(B), \
6445 (__v4sf)(__m128)(W), (__mmask8)(U), \
6451 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6458#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6459 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6460 (__v4sf)(__m128)(B), \
6461 (__v4sf)_mm_setzero_ps(), \
6462 (__mmask8)(U), (int)(R)))
6466 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6467 0, 1, 2, 3, 0, 1, 2, 3,
6468 0, 1, 2, 3, 0, 1, 2, 3);
6473 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__M,
6480 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__M,
6487 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6488 0, 1, 2, 3, 0, 1, 2, 3);
6494 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__M,
6502 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__M,
6509 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6510 0, 1, 2, 3, 0, 1, 2, 3,
6511 0, 1, 2, 3, 0, 1, 2, 3);
6516 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
6523 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
6530 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6531 0, 1, 2, 3, 0, 1, 2, 3);
6537 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
6545 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
6552 return (__m512d)__builtin_ia32_selectpd_512(__M,
6559 return (__m512d)__builtin_ia32_selectpd_512(__M,
6566 return (__m512)__builtin_ia32_selectps_512(__M,
6573 return (__m512)__builtin_ia32_selectps_512(__M,
6581 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6589 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6590 (__v16qi) __O, __M);
6596 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6604 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *)
__P, (__v16si) __A, __M);
6610 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6618 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6619 (__v16hi) __O, __M);
6625 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6633 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*)
__P, (__v16si) __A, __M);
6639 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6647 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6648 (__v16qi) __O, __M);
6654 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6662 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *)
__P, (__v8di) __A, __M);
6668 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6676 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6683 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6691 __builtin_ia32_pmovsqd512mem_mask ((__v8si *)
__P, (__v8di) __A, __M);
6697 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6705 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6712 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6720 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *)
__P, (__v8di) __A, __M);
6726 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6734 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6742 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6750 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *)
__P, (__v16si) __A, __M);
6756 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6764 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6772 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6780 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*)
__P, (__v16si) __A, __M);
6786 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6794 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6802 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6810 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *)
__P, (__v8di) __A, __M);
6816 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6824 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6831 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6839 __builtin_ia32_pmovusqd512mem_mask ((__v8si*)
__P, (__v8di) __A, __M);
6845 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6853 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6860 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6868 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*)
__P, (__v8di) __A, __M);
6874 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6882 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6883 (__v16qi) __O, __M);
6889 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6897 __builtin_ia32_pmovdb512mem_mask ((__v16qi *)
__P, (__v16si) __A, __M);
6903 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6911 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6912 (__v16hi) __O, __M);
6918 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6926 __builtin_ia32_pmovdw512mem_mask ((__v16hi *)
__P, (__v16si) __A, __M);
6932 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6940 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6941 (__v16qi) __O, __M);
6947 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6955 __builtin_ia32_pmovqb512mem_mask ((__v16qi *)
__P, (__v8di) __A, __M);
6961 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6969 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6976 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6984 __builtin_ia32_pmovqd512mem_mask ((__v8si *)
__P, (__v8di) __A, __M);
6990 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
6998 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7005 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7013 __builtin_ia32_pmovqw512mem_mask ((__v8hi *)
__P, (__v8di) __A, __M);
7016#define _mm512_extracti32x4_epi32(A, imm) \
7017 ((__m128i)__builtin_ia32_extracti32x4_mask( \
7018 (__v16si)(__m512i)(A), (int)(imm), (__v4si)_mm_setzero_si128(), \
7021#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7022 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7023 (__v4si)(__m128i)(W), \
7026#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7027 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7028 (__v4si)_mm_setzero_si128(), \
7031#define _mm512_extracti64x4_epi64(A, imm) \
7032 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7033 (__v4di)_mm256_setzero_si256(), \
7036#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7037 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7038 (__v4di)(__m256i)(W), \
7041#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7042 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7043 (__v4di)_mm256_setzero_si256(), \
7046#define _mm512_insertf64x4(A, B, imm) \
7047 ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7048 (__v4df)(__m256d)(B), (int)(imm)))
7050#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7051 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7052 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7053 (__v8df)(__m512d)(W)))
7055#define _mm512_maskz_insertf64x4(U, A, B, imm) \
7056 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7057 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7058 (__v8df)_mm512_setzero_pd()))
7060#define _mm512_inserti64x4(A, B, imm) \
7061 ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7062 (__v4di)(__m256i)(B), (int)(imm)))
7064#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7065 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7066 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7067 (__v8di)(__m512i)(W)))
7069#define _mm512_maskz_inserti64x4(U, A, B, imm) \
7070 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7071 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7072 (__v8di)_mm512_setzero_si512()))
7074#define _mm512_insertf32x4(A, B, imm) \
7075 ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7076 (__v4sf)(__m128)(B), (int)(imm)))
7078#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7079 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7080 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7081 (__v16sf)(__m512)(W)))
7083#define _mm512_maskz_insertf32x4(U, A, B, imm) \
7084 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7085 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7086 (__v16sf)_mm512_setzero_ps()))
7088#define _mm512_inserti32x4(A, B, imm) \
7089 ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7090 (__v4si)(__m128i)(B), (int)(imm)))
7092#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7093 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7094 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7095 (__v16si)(__m512i)(W)))
7097#define _mm512_maskz_inserti32x4(U, A, B, imm) \
7098 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7099 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7100 (__v16si)_mm512_setzero_si512()))
7102#define _mm512_getmant_round_pd(A, B, C, R) \
7103 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7104 (int)(((C)<<2) | (B)), \
7105 (__v8df)_mm512_undefined_pd(), \
7106 (__mmask8)-1, (int)(R)))
7108#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7109 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7110 (int)(((C)<<2) | (B)), \
7111 (__v8df)(__m512d)(W), \
7112 (__mmask8)(U), (int)(R)))
7114#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7115 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7116 (int)(((C)<<2) | (B)), \
7117 (__v8df)_mm512_setzero_pd(), \
7118 (__mmask8)(U), (int)(R)))
7120#define _mm512_getmant_pd(A, B, C) \
7121 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7122 (int)(((C)<<2) | (B)), \
7123 (__v8df)_mm512_setzero_pd(), \
7125 _MM_FROUND_CUR_DIRECTION))
7127#define _mm512_mask_getmant_pd(W, U, A, B, C) \
7128 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7129 (int)(((C)<<2) | (B)), \
7130 (__v8df)(__m512d)(W), \
7132 _MM_FROUND_CUR_DIRECTION))
7134#define _mm512_maskz_getmant_pd(U, A, B, C) \
7135 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7136 (int)(((C)<<2) | (B)), \
7137 (__v8df)_mm512_setzero_pd(), \
7139 _MM_FROUND_CUR_DIRECTION))
7141#define _mm512_getmant_round_ps(A, B, C, R) \
7142 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7143 (int)(((C)<<2) | (B)), \
7144 (__v16sf)_mm512_undefined_ps(), \
7145 (__mmask16)-1, (int)(R)))
7147#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7148 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7149 (int)(((C)<<2) | (B)), \
7150 (__v16sf)(__m512)(W), \
7151 (__mmask16)(U), (int)(R)))
7153#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7154 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7155 (int)(((C)<<2) | (B)), \
7156 (__v16sf)_mm512_setzero_ps(), \
7157 (__mmask16)(U), (int)(R)))
7159#define _mm512_getmant_ps(A, B, C) \
7160 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7161 (int)(((C)<<2)|(B)), \
7162 (__v16sf)_mm512_undefined_ps(), \
7164 _MM_FROUND_CUR_DIRECTION))
7166#define _mm512_mask_getmant_ps(W, U, A, B, C) \
7167 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7168 (int)(((C)<<2)|(B)), \
7169 (__v16sf)(__m512)(W), \
7171 _MM_FROUND_CUR_DIRECTION))
7173#define _mm512_maskz_getmant_ps(U, A, B, C) \
7174 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7175 (int)(((C)<<2)|(B)), \
7176 (__v16sf)_mm512_setzero_ps(), \
7178 _MM_FROUND_CUR_DIRECTION))
7180#define _mm512_getexp_round_pd(A, R) \
7181 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7182 (__v8df)_mm512_undefined_pd(), \
7183 (__mmask8)-1, (int)(R)))
7185#define _mm512_mask_getexp_round_pd(W, U, A, R) \
7186 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7187 (__v8df)(__m512d)(W), \
7188 (__mmask8)(U), (int)(R)))
7190#define _mm512_maskz_getexp_round_pd(U, A, R) \
7191 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7192 (__v8df)_mm512_setzero_pd(), \
7193 (__mmask8)(U), (int)(R)))
7198 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7207 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7216 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7222#define _mm512_getexp_round_ps(A, R) \
7223 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7224 (__v16sf)_mm512_undefined_ps(), \
7225 (__mmask16)-1, (int)(R)))
7227#define _mm512_mask_getexp_round_ps(W, U, A, R) \
7228 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7229 (__v16sf)(__m512)(W), \
7230 (__mmask16)(U), (int)(R)))
7232#define _mm512_maskz_getexp_round_ps(U, A, R) \
7233 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7234 (__v16sf)_mm512_setzero_ps(), \
7235 (__mmask16)(U), (int)(R)))
7240 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7249 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7258 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7264#define _mm512_i64gather_ps(index, addr, scale) \
7265 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7266 (void const *)(addr), \
7267 (__v8di)(__m512i)(index), (__mmask8)-1, \
7270#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7271 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7272 (void const *)(addr), \
7273 (__v8di)(__m512i)(index), \
7274 (__mmask8)(mask), (int)(scale)))
7276#define _mm512_i64gather_epi32(index, addr, scale) \
7277 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7278 (void const *)(addr), \
7279 (__v8di)(__m512i)(index), \
7280 (__mmask8)-1, (int)(scale)))
7282#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7283 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7284 (void const *)(addr), \
7285 (__v8di)(__m512i)(index), \
7286 (__mmask8)(mask), (int)(scale)))
7288#define _mm512_i64gather_pd(index, addr, scale) \
7289 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7290 (void const *)(addr), \
7291 (__v8di)(__m512i)(index), (__mmask8)-1, \
7294#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7295 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7296 (void const *)(addr), \
7297 (__v8di)(__m512i)(index), \
7298 (__mmask8)(mask), (int)(scale)))
7300#define _mm512_i64gather_epi64(index, addr, scale) \
7301 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7302 (void const *)(addr), \
7303 (__v8di)(__m512i)(index), (__mmask8)-1, \
7306#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7307 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7308 (void const *)(addr), \
7309 (__v8di)(__m512i)(index), \
7310 (__mmask8)(mask), (int)(scale)))
7312#define _mm512_i32gather_ps(index, addr, scale) \
7313 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7314 (void const *)(addr), \
7315 (__v16si)(__m512)(index), \
7316 (__mmask16)-1, (int)(scale)))
7318#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7319 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7320 (void const *)(addr), \
7321 (__v16si)(__m512)(index), \
7322 (__mmask16)(mask), (int)(scale)))
7324#define _mm512_i32gather_epi32(index, addr, scale) \
7325 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7326 (void const *)(addr), \
7327 (__v16si)(__m512i)(index), \
7328 (__mmask16)-1, (int)(scale)))
7330#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7331 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7332 (void const *)(addr), \
7333 (__v16si)(__m512i)(index), \
7334 (__mmask16)(mask), (int)(scale)))
7336#define _mm512_i32gather_pd(index, addr, scale) \
7337 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7338 (void const *)(addr), \
7339 (__v8si)(__m256i)(index), (__mmask8)-1, \
7342#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7343 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7344 (void const *)(addr), \
7345 (__v8si)(__m256i)(index), \
7346 (__mmask8)(mask), (int)(scale)))
7348#define _mm512_i32gather_epi64(index, addr, scale) \
7349 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7350 (void const *)(addr), \
7351 (__v8si)(__m256i)(index), (__mmask8)-1, \
7354#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7355 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7356 (void const *)(addr), \
7357 (__v8si)(__m256i)(index), \
7358 (__mmask8)(mask), (int)(scale)))
7360#define _mm512_i64scatter_ps(addr, index, v1, scale) \
7361 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7362 (__v8di)(__m512i)(index), \
7363 (__v8sf)(__m256)(v1), (int)(scale))
7365#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7366 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7367 (__v8di)(__m512i)(index), \
7368 (__v8sf)(__m256)(v1), (int)(scale))
7370#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7371 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7372 (__v8di)(__m512i)(index), \
7373 (__v8si)(__m256i)(v1), (int)(scale))
7375#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7376 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7377 (__v8di)(__m512i)(index), \
7378 (__v8si)(__m256i)(v1), (int)(scale))
7380#define _mm512_i64scatter_pd(addr, index, v1, scale) \
7381 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7382 (__v8di)(__m512i)(index), \
7383 (__v8df)(__m512d)(v1), (int)(scale))
7385#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7386 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7387 (__v8di)(__m512i)(index), \
7388 (__v8df)(__m512d)(v1), (int)(scale))
7390#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7391 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7392 (__v8di)(__m512i)(index), \
7393 (__v8di)(__m512i)(v1), (int)(scale))
7395#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7396 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7397 (__v8di)(__m512i)(index), \
7398 (__v8di)(__m512i)(v1), (int)(scale))
7400#define _mm512_i32scatter_ps(addr, index, v1, scale) \
7401 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7402 (__v16si)(__m512i)(index), \
7403 (__v16sf)(__m512)(v1), (int)(scale))
7405#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7406 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7407 (__v16si)(__m512i)(index), \
7408 (__v16sf)(__m512)(v1), (int)(scale))
7410#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7411 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7412 (__v16si)(__m512i)(index), \
7413 (__v16si)(__m512i)(v1), (int)(scale))
7415#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7416 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7417 (__v16si)(__m512i)(index), \
7418 (__v16si)(__m512i)(v1), (int)(scale))
7420#define _mm512_i32scatter_pd(addr, index, v1, scale) \
7421 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7422 (__v8si)(__m256i)(index), \
7423 (__v8df)(__m512d)(v1), (int)(scale))
7425#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7426 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7427 (__v8si)(__m256i)(index), \
7428 (__v8df)(__m512d)(v1), (int)(scale))
7430#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7431 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7432 (__v8si)(__m256i)(index), \
7433 (__v8di)(__m512i)(v1), (int)(scale))
7435#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7436 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7437 (__v8si)(__m256i)(index), \
7438 (__v8di)(__m512i)(v1), (int)(scale))
7443 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7450#define _mm_fmadd_round_ss(A, B, C, R) \
7451 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7452 (__v4sf)(__m128)(B), \
7453 (__v4sf)(__m128)(C), (__mmask8)-1, \
7456#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7457 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7458 (__v4sf)(__m128)(A), \
7459 (__v4sf)(__m128)(B), (__mmask8)(U), \
7465 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7472#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7473 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7474 (__v4sf)(__m128)(B), \
7475 (__v4sf)(__m128)(C), (__mmask8)(U), \
7481 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7488#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7489 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7490 (__v4sf)(__m128)(X), \
7491 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7497 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7504#define _mm_fmsub_round_ss(A, B, C, R) \
7505 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7506 (__v4sf)(__m128)(B), \
7507 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7510#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7511 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7512 (__v4sf)(__m128)(A), \
7513 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7519 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7526#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7527 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7528 (__v4sf)(__m128)(B), \
7529 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7535 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7542#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7543 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7544 (__v4sf)(__m128)(X), \
7545 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7551 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7558#define _mm_fnmadd_round_ss(A, B, C, R) \
7559 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7560 -(__v4sf)(__m128)(B), \
7561 (__v4sf)(__m128)(C), (__mmask8)-1, \
7564#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7565 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7566 -(__v4sf)(__m128)(A), \
7567 (__v4sf)(__m128)(B), (__mmask8)(U), \
7573 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7580#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7581 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7582 -(__v4sf)(__m128)(B), \
7583 (__v4sf)(__m128)(C), (__mmask8)(U), \
7589 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7596#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7597 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7598 -(__v4sf)(__m128)(X), \
7599 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7605 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7612#define _mm_fnmsub_round_ss(A, B, C, R) \
7613 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7614 -(__v4sf)(__m128)(B), \
7615 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7618#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7619 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7620 -(__v4sf)(__m128)(A), \
7621 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7627 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7634#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
7635 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7636 -(__v4sf)(__m128)(B), \
7637 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7643 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7650#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
7651 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7652 -(__v4sf)(__m128)(X), \
7653 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7659 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7666#define _mm_fmadd_round_sd(A, B, C, R) \
7667 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7668 (__v2df)(__m128d)(B), \
7669 (__v2df)(__m128d)(C), (__mmask8)-1, \
7672#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
7673 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7674 (__v2df)(__m128d)(A), \
7675 (__v2df)(__m128d)(B), (__mmask8)(U), \
7681 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7688#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
7689 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7690 (__v2df)(__m128d)(B), \
7691 (__v2df)(__m128d)(C), (__mmask8)(U), \
7697 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
7704#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
7705 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
7706 (__v2df)(__m128d)(X), \
7707 (__v2df)(__m128d)(Y), (__mmask8)(U), \
7713 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7720#define _mm_fmsub_round_sd(A, B, C, R) \
7721 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7722 (__v2df)(__m128d)(B), \
7723 -(__v2df)(__m128d)(C), (__mmask8)-1, \
7726#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
7727 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7728 (__v2df)(__m128d)(A), \
7729 -(__v2df)(__m128d)(B), (__mmask8)(U), \
7735 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7742#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
7743 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7744 (__v2df)(__m128d)(B), \
7745 -(__v2df)(__m128d)(C), \
7746 (__mmask8)(U), (int)(R)))
7751 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
7758#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
7759 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
7760 (__v2df)(__m128d)(X), \
7761 (__v2df)(__m128d)(Y), \
7762 (__mmask8)(U), (int)(R)))
7767 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7774#define _mm_fnmadd_round_sd(A, B, C, R) \
7775 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7776 -(__v2df)(__m128d)(B), \
7777 (__v2df)(__m128d)(C), (__mmask8)-1, \
7780#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
7781 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7782 -(__v2df)(__m128d)(A), \
7783 (__v2df)(__m128d)(B), (__mmask8)(U), \
7789 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7796#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
7797 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7798 -(__v2df)(__m128d)(B), \
7799 (__v2df)(__m128d)(C), (__mmask8)(U), \
7805 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
7812#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
7813 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
7814 -(__v2df)(__m128d)(X), \
7815 (__v2df)(__m128d)(Y), (__mmask8)(U), \
7821 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7828#define _mm_fnmsub_round_sd(A, B, C, R) \
7829 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7830 -(__v2df)(__m128d)(B), \
7831 -(__v2df)(__m128d)(C), (__mmask8)-1, \
7834#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
7835 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7836 -(__v2df)(__m128d)(A), \
7837 -(__v2df)(__m128d)(B), (__mmask8)(U), \
7843 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7850#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
7851 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7852 -(__v2df)(__m128d)(B), \
7853 -(__v2df)(__m128d)(C), \
7860 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
7867#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
7868 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
7869 -(__v2df)(__m128d)(X), \
7870 (__v2df)(__m128d)(Y), \
7871 (__mmask8)(U), (int)(R)))
7873#define _mm512_permutex_pd(X, C) \
7874 ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)))
7876#define _mm512_mask_permutex_pd(W, U, X, C) \
7877 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7878 (__v8df)_mm512_permutex_pd((X), (C)), \
7879 (__v8df)(__m512d)(W)))
7881#define _mm512_maskz_permutex_pd(U, X, C) \
7882 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7883 (__v8df)_mm512_permutex_pd((X), (C)), \
7884 (__v8df)_mm512_setzero_pd()))
7886#define _mm512_permutex_epi64(X, C) \
7887 ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)))
7889#define _mm512_mask_permutex_epi64(W, U, X, C) \
7890 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7891 (__v8di)_mm512_permutex_epi64((X), (C)), \
7892 (__v8di)(__m512i)(W)))
7894#define _mm512_maskz_permutex_epi64(U, X, C) \
7895 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7896 (__v8di)_mm512_permutex_epi64((X), (C)), \
7897 (__v8di)_mm512_setzero_si512()))
7901 return (__m512d)__builtin_ia32_permvardf512((__v8df)
__Y, (__v8di) __X);
7907 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
7914 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
7921 return (__m512i)__builtin_ia32_permvardi512((__v8di)
__Y, (__v8di)__X);
7926 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
7934 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
7941 return (__m512)__builtin_ia32_permvarsf512((__v16sf)
__Y, (__v16si)__X);
7946 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
7953 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
7960 return (__m512i)__builtin_ia32_permvarsi512((__v16si)
__Y, (__v16si)__X);
7963#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
7967 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
7975 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
7980#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8009 return (
unsigned char)__builtin_ia32_kortestchi(__A, __B);
8014 return (
unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8019 *__C = (
unsigned char)__builtin_ia32_kortestchi(__A, __B);
8020 return (
unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8038#define _kand_mask16 _mm512_kand
8039#define _kandn_mask16 _mm512_kandn
8040#define _knot_mask16 _mm512_knot
8041#define _kor_mask16 _mm512_kor
8042#define _kxnor_mask16 _mm512_kxnor
8043#define _kxor_mask16 _mm512_kxor
8045#define _kshiftli_mask16(A, I) \
8046 ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)))
8048#define _kshiftri_mask16(A, I) \
8049 ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)))
8051static __inline__
unsigned int
8053 return (
unsigned int)__builtin_ia32_kmovw((
__mmask16)__A);
8075 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)
__P);
8082 return (__m512i) __builtin_nontemporal_load((
const __v8di_aligned *)
__P);
8089 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)
__P);
8095 typedef __v16sf __v16sf_aligned
__attribute__((aligned(64)));
8096 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)
__P);
8102 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8110 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8119 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8127 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8136 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8144 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8153 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8161 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8167#define _mm_cmp_round_ss_mask(X, Y, P, R) \
8168 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8169 (__v4sf)(__m128)(Y), (int)(P), \
8170 (__mmask8)-1, (int)(R)))
8172#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8173 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8174 (__v4sf)(__m128)(Y), (int)(P), \
8175 (__mmask8)(M), (int)(R)))
8177#define _mm_cmp_ss_mask(X, Y, P) \
8178 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8179 (__v4sf)(__m128)(Y), (int)(P), \
8181 _MM_FROUND_CUR_DIRECTION))
8183#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8184 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8185 (__v4sf)(__m128)(Y), (int)(P), \
8187 _MM_FROUND_CUR_DIRECTION))
8189#define _mm_cmp_round_sd_mask(X, Y, P, R) \
8190 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8191 (__v2df)(__m128d)(Y), (int)(P), \
8192 (__mmask8)-1, (int)(R)))
8194#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8195 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8196 (__v2df)(__m128d)(Y), (int)(P), \
8197 (__mmask8)(M), (int)(R)))
8199#define _mm_cmp_sd_mask(X, Y, P) \
8200 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8201 (__v2df)(__m128d)(Y), (int)(P), \
8203 _MM_FROUND_CUR_DIRECTION))
8205#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8206 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8207 (__v2df)(__m128d)(Y), (int)(P), \
8209 _MM_FROUND_CUR_DIRECTION))
8272 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8273 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8278 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8285 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8293 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8294 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8299 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8306 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8313 return __builtin_ia32_selectss_128(__U,
_mm_move_ss(__A, __B), __W);
8318 return __builtin_ia32_selectss_128(__U,
_mm_move_ss(__A, __B),
8324 return __builtin_ia32_selectsd_128(__U,
_mm_move_sd(__A, __B), __W);
8329 return __builtin_ia32_selectsd_128(__U,
_mm_move_sd(__A, __B),
8336 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8342 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8348 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8352 return (__m128) __builtin_ia32_loadss128_mask ((
const __v4sf *) __A, src, __U & 1);
8358 return (__m128)__builtin_ia32_loadss128_mask ((
const __v4sf *) __A,
8366 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8370 return (__m128d) __builtin_ia32_loadsd128_mask ((
const __v2df *) __A, src, __U & 1);
8376 return (__m128d) __builtin_ia32_loadsd128_mask ((
const __v2df *) __A,
8381#define _mm512_shuffle_epi32(A, I) \
8382 ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)))
8384#define _mm512_mask_shuffle_epi32(W, U, A, I) \
8385 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8386 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8387 (__v16si)(__m512i)(W)))
8389#define _mm512_maskz_shuffle_epi32(U, A, I) \
8390 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8391 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8392 (__v16si)_mm512_setzero_si512()))
8397 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8405 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8413 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8421 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8429 return (__m512d) __builtin_ia32_expandloaddf512_mask ((
const __v8df *)
__P,
8437 return (__m512d) __builtin_ia32_expandloaddf512_mask ((
const __v8df *)
__P,
8445 return (__m512i) __builtin_ia32_expandloaddi512_mask ((
const __v8di *)
__P,
8453 return (__m512i) __builtin_ia32_expandloaddi512_mask ((
const __v8di *)
__P,
8461 return (__m512) __builtin_ia32_expandloadsf512_mask ((
const __v16sf *)
__P,
8469 return (__m512) __builtin_ia32_expandloadsf512_mask ((
const __v16sf *)
__P,
8477 return (__m512i) __builtin_ia32_expandloadsi512_mask ((
const __v16si *)
__P,
8485 return (__m512i) __builtin_ia32_expandloadsi512_mask ((
const __v16si *)
__P,
8493 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8501 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8509 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8517 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8522#define _mm512_cvt_roundps_pd(A, R) \
8523 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8524 (__v8df)_mm512_undefined_pd(), \
8525 (__mmask8)-1, (int)(R)))
8527#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8528 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8529 (__v8df)(__m512d)(W), \
8530 (__mmask8)(U), (int)(R)))
8532#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8533 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8534 (__v8df)_mm512_setzero_pd(), \
8535 (__mmask8)(U), (int)(R)))
8537static __inline__ __m512d
8539 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8544 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
8551 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
8568 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U, (__v8df)__A,
8574 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U, (__v8df)__A,
8580 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U, (__v16sf)__A,
8586 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U, (__v16sf)__A,
8593 __builtin_ia32_compressstoredf512_mask ((__v8df *)
__P, (__v8df) __A,
8600 __builtin_ia32_compressstoredi512_mask ((__v8di *)
__P, (__v8di) __A,
8607 __builtin_ia32_compressstoresf512_mask ((__v16sf *)
__P, (__v16sf) __A,
8614 __builtin_ia32_compressstoresi512_mask ((__v16si *)
__P, (__v16si) __A,
8618#define _mm_cvt_roundsd_ss(A, B, R) \
8619 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8620 (__v2df)(__m128d)(B), \
8621 (__v4sf)_mm_undefined_ps(), \
8622 (__mmask8)-1, (int)(R)))
8624#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
8625 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8626 (__v2df)(__m128d)(B), \
8627 (__v4sf)(__m128)(W), \
8628 (__mmask8)(U), (int)(R)))
8630#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
8631 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8632 (__v2df)(__m128d)(B), \
8633 (__v4sf)_mm_setzero_ps(), \
8634 (__mmask8)(U), (int)(R)))
8638 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8646 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8652#define _mm_cvtss_i32 _mm_cvtss_si32
8653#define _mm_cvtsd_i32 _mm_cvtsd_si32
8654#define _mm_cvti32_sd _mm_cvtsi32_sd
8655#define _mm_cvti32_ss _mm_cvtsi32_ss
8657#define _mm_cvtss_i64 _mm_cvtss_si64
8658#define _mm_cvtsd_i64 _mm_cvtsd_si64
8659#define _mm_cvti64_sd _mm_cvtsi64_sd
8660#define _mm_cvti64_ss _mm_cvtsi64_ss
8664#define _mm_cvt_roundi64_sd(A, B, R) \
8665 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
8668#define _mm_cvt_roundsi64_sd(A, B, R) \
8669 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
8673#define _mm_cvt_roundsi32_ss(A, B, R) \
8674 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
8676#define _mm_cvt_roundi32_ss(A, B, R) \
8677 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
8680#define _mm_cvt_roundsi64_ss(A, B, R) \
8681 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
8684#define _mm_cvt_roundi64_ss(A, B, R) \
8685 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
8689#define _mm_cvt_roundss_sd(A, B, R) \
8690 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8691 (__v4sf)(__m128)(B), \
8692 (__v2df)_mm_undefined_pd(), \
8693 (__mmask8)-1, (int)(R)))
8695#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
8696 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8697 (__v4sf)(__m128)(B), \
8698 (__v2df)(__m128d)(W), \
8699 (__mmask8)(U), (int)(R)))
8701#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
8702 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8703 (__v4sf)(__m128)(B), \
8704 (__v2df)_mm_setzero_pd(), \
8705 (__mmask8)(U), (int)(R)))
8710 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
8719 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
8733#define _mm_cvt_roundu64_sd(A, B, R) \
8734 ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
8735 (unsigned long long)(B), (int)(R)))
8738_mm_cvtu64_sd (__m128d __A,
unsigned long long __B)
8745#define _mm_cvt_roundu32_ss(A, B, R) \
8746 ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
8757#define _mm_cvt_roundu64_ss(A, B, R) \
8758 ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
8759 (unsigned long long)(B), (int)(R)))
8762_mm_cvtu64_ss (__m128 __A,
unsigned long long __B)
8771 return (__m512i) __builtin_ia32_selectd_512(__M,
8778 return (__m512i) __builtin_ia32_selectq_512(__M,
8784 char __e63,
char __e62,
char __e61,
char __e60,
char __e59,
char __e58,
8785 char __e57,
char __e56,
char __e55,
char __e54,
char __e53,
char __e52,
8786 char __e51,
char __e50,
char __e49,
char __e48,
char __e47,
char __e46,
8787 char __e45,
char __e44,
char __e43,
char __e42,
char __e41,
char __e40,
8788 char __e39,
char __e38,
char __e37,
char __e36,
char __e35,
char __e34,
8789 char __e33,
char __e32,
char __e31,
char __e30,
char __e29,
char __e28,
8790 char __e27,
char __e26,
char __e25,
char __e24,
char __e23,
char __e22,
8791 char __e21,
char __e20,
char __e19,
char __e18,
char __e17,
char __e16,
8792 char __e15,
char __e14,
char __e13,
char __e12,
char __e11,
char __e10,
8793 char __e9,
char __e8,
char __e7,
char __e6,
char __e5,
char __e4,
char __e3,
8794 char __e2,
char __e1,
char __e0) {
8796 return __extension__ (__m512i)(__v64qi)
8797 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
8798 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
8799 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
8800 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
8801 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
8802 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
8803 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
8804 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
8808 short __e31,
short __e30,
short __e29,
short __e28,
short __e27,
8809 short __e26,
short __e25,
short __e24,
short __e23,
short __e22,
8810 short __e21,
short __e20,
short __e19,
short __e18,
short __e17,
8811 short __e16,
short __e15,
short __e14,
short __e13,
short __e12,
8812 short __e11,
short __e10,
short __e9,
short __e8,
short __e7,
short __e6,
8813 short __e5,
short __e4,
short __e3,
short __e2,
short __e1,
short __e0) {
8814 return __extension__ (__m512i)(__v32hi)
8815 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
8816 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
8817 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
8818 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
8822 int __A,
int __B,
int __C,
int __D,
int __E,
int __F,
int __G,
int __H,
8823 int __I,
int __J,
int __K,
int __L,
int __M,
int __N,
int __O,
int __P) {
8824 return __extension__ (__m512i)(__v16si)
8825 {
__P, __O, __N, __M,
__L, __K, __J, __I,
8826 __H, __G, __F, __E,
__D, __C, __B, __A };
8830 int e0,
int e1,
int e2,
int e3,
int e4,
int e5,
int e6,
int e7,
int e8,
8831 int e9,
int e10,
int e11,
int e12,
int e13,
int e14,
int e15) {
8832 return _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4,
8838 long long __E,
long long __F,
long long __G,
long long __H) {
8839 return __extension__ (__m512i) (__v8di)
8840 { __H, __G, __F, __E,
__D, __C, __B, __A };
8845 long long e4,
long long e5,
long long e6,
long long e7) {
8851 double __F,
double __G,
double __H) {
8852 return __extension__ (__m512d)
8853 { __H, __G, __F, __E,
__D, __C, __B, __A };
8858 double e6,
double e7) {
8864 float __G,
float __H,
float __I,
float __J,
float __K,
float __L,
8865 float __M,
float __N,
float __O,
float __P) {
8866 return __extension__ (__m512)
8867 {
__P, __O, __N, __M,
__L, __K, __J, __I,
8868 __H, __G, __F, __E,
__D, __C, __B, __A };
8873 float e6,
float e7,
float e8,
float e9,
float e10,
float e11,
8874 float e12,
float e13,
float e14,
float e15) {
8875 return _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3,
8917 return __builtin_reduce_add((__v8di)__W);
8922 return __builtin_reduce_mul((__v8di)__W);
8927 return __builtin_reduce_and((__v8di)__W);
8932 return __builtin_reduce_or((__v8di)__W);
8938 return __builtin_reduce_add((__v8di)__W);
8944 return __builtin_reduce_mul((__v8di)__W);
8950 return __builtin_reduce_and((__v8di)__W);
8956 return __builtin_reduce_or((__v8di)__W);
8963 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
8967 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
8973 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
8979 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
8984 return __builtin_reduce_add((__v16si)__W);
8989 return __builtin_reduce_mul((__v16si)__W);
8994 return __builtin_reduce_and((__v16si)__W);
8999 return __builtin_reduce_or((__v16si)__W);
9005 return __builtin_reduce_add((__v16si)__W);
9011 return __builtin_reduce_mul((__v16si)__W);
9017 return __builtin_reduce_and((__v16si)__W);
9023 return __builtin_reduce_or((__v16si)__W);
9028 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9033 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9039 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9045 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9050 return __builtin_reduce_max((__v8di)__V);
9055 return __builtin_reduce_max((__v8du)__V);
9060 return __builtin_reduce_min((__v8di)__V);
9065 return __builtin_reduce_min((__v8du)__V);
9071 return __builtin_reduce_max((__v8di)__V);
9077 return __builtin_reduce_max((__v8du)__V);
9083 return __builtin_reduce_min((__v8di)__V);
9089 return __builtin_reduce_min((__v8du)__V);
9093 return __builtin_reduce_max((__v16si)__V);
9098 return __builtin_reduce_max((__v16su)__V);
9103 return __builtin_reduce_min((__v16si)__V);
9108 return __builtin_reduce_min((__v16su)__V);
9114 return __builtin_reduce_max((__v16si)__V);
9120 return __builtin_reduce_max((__v16su)__V);
9126 return __builtin_reduce_min((__v16si)__V);
9132 return __builtin_reduce_min((__v16su)__V);
9137 return __builtin_ia32_reduce_fmax_pd512(__V);
9142 return __builtin_ia32_reduce_fmin_pd512(__V);
9148 return __builtin_ia32_reduce_fmax_pd512(__V);
9154 return __builtin_ia32_reduce_fmin_pd512(__V);
9159 return __builtin_ia32_reduce_fmax_ps512(__V);
9164 return __builtin_ia32_reduce_fmin_ps512(__V);
9170 return __builtin_ia32_reduce_fmax_ps512(__V);
9176 return __builtin_ia32_reduce_fmin_ps512(__V);
9192 __v16si
__b = (__v16si)__A;
9211#define _mm512_i32logather_pd(vindex, base_addr, scale) \
9212 _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9235#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale) \
9236 _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex), \
9237 (base_addr), (scale))
9254#define _mm512_i32logather_epi64(vindex, base_addr, scale) \
9255 _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9277#define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale) \
9278 _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex), \
9279 (base_addr), (scale))
9295#define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale) \
9296 _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale))
9316#define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale) \
9317 _mm512_mask_i32scatter_pd((base_addr), (mask), \
9318 _mm512_castsi512_si256(vindex), (v1), (scale))
9334#define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale) \
9335 _mm512_i32scatter_epi64((base_addr), \
9336 _mm512_castsi512_si256(vindex), (v1), (scale))
9355#define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale) \
9356 _mm512_mask_i32scatter_epi64((base_addr), (mask), \
9357 _mm512_castsi512_si256(vindex), (v1), (scale))
9359#undef __DEFAULT_FN_ATTRS512
9360#undef __DEFAULT_FN_ATTRS128
9361#undef __DEFAULT_FN_ATTRS
9362#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
9363#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
9364#undef __DEFAULT_FN_ATTRS_CONSTEXPR
#define __DEFAULT_FN_ATTRS
static __inline__ vector float vector float __b
static __inline__ uint32_t volatile uint32_t * __p
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
#define __DEFAULT_FN_ATTRS512_CONSTEXPR
#define __DEFAULT_FN_ATTRS512
#define __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR _cvtmask16_u32(__mmask16 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _cvtu32_mask16(unsigned int __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi64(long long __d)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi64(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f32x4(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd_si512(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi64(__m512i __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi64(__m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi64(__m512i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_cvtsi512_si32(__m512i __A)
Moves the least significant 32 bits of a vector of [16 x i32] to a 32-bit signed integer value.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_ps(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512(__m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi32(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_pslo(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_ps(__m512 __A, __m512 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi64(__m512i __W)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS _store_mask16(__mmask16 *__A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd(void *__P, __m512d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss(__mmask8 __U, const float *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi32(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullox_epi64(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64(void *__P, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi32(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi32(int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7, int e8, int e9, int e10, int e11, int e12, int e13, int e14, int e15)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi64(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline __m256i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_si256(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movehdup_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi64(__m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_ps(float e0, float e1, float e2, float e3, float e4, float e5, float e6, float e7, float e8, float e9, float e10, float e11, float e12, float e13, float e14, float e15)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_pd(double __w)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi64(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastd_epi32(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_ps(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline __m128 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps512_ps128(__m512 __a)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i64x4(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_pd(double e0, double e1, double e2, double e3, double e4, double e5, double e6, double e7)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps512_ps256(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi8(char __w)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi64(long long e0, long long e1, long long e2, long long e3, long long e4, long long e5, long long e6, long long e7)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32(__m512i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline __m128i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_si128(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_int2mask(int __a)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask2int(__mmask16 __a)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m256d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd512_pd256(__m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_pd(__m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
char __v64qi __attribute__((__vector_size__(64)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi64(__m128i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512(void *__P, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movedup_pd(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_pd(__m512d __a, __m512d __b)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtps_pd(__m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_knot(__mmask16 __M)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastq_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64(void *__P, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kortestc(__mmask16 __A, __mmask16 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_pd(__m256i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _load_mask16(__mmask16 *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_pd(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32(__m128d __A)
_MM_TERNLOG_ENUM
A helper to represent the ternary logic operations among vector A, B and C.
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi32(__m512i __W)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps(void *__P, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_ps(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_ps(__m512 __a, __m512 __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_epi32(int e0, int e1, int e2, int e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512(void const *__P)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu64(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_ps(float __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu64(__m512i __V)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f64x4(__m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sra_epi32(__m512i __A, __m128i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd(__m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi32(int __s)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64(void const *__P)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32(void *__P, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_pd(double e0, double e1, double e2, double e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastss_ps(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m128d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd512_pd128(__m512d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i32x4(__m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi32(__m512i __W)
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps(__m512d __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32lo_pd(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi32(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps_pd(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kmov(__mmask16 __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_si512(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32lo_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps_si512(__m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_si512(__m512i __a, __m512i __b)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu32(__m512i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64(void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_ps(float e0, float e1, float e2, float e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi32(__m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi16(short __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_moveldup_ps(__m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd_ps(__m512d __A)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi32(__m512i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ void int __a
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
static __inline__ void short __D
static __inline__ void const void * __src
__inline unsigned int unsigned int unsigned int * __P
__inline unsigned int unsigned int __Y
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...