10#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
13#ifndef __AVX512FINTRIN_H
14#define __AVX512FINTRIN_H
24typedef unsigned char __v64qu
__attribute__((__vector_size__(64)));
25typedef unsigned short __v32hu
__attribute__((__vector_size__(64)));
26typedef unsigned long long __v8du
__attribute__((__vector_size__(64)));
27typedef unsigned int __v16su
__attribute__((__vector_size__(64)));
31typedef signed char __v64qs
__attribute__((__vector_size__(64)));
33typedef float __m512
__attribute__((__vector_size__(64), __aligned__(64)));
34typedef double __m512d
__attribute__((__vector_size__(64), __aligned__(64)));
35typedef long long __m512i
__attribute__((__vector_size__(64), __aligned__(64)));
37typedef float __m512_u
__attribute__((__vector_size__(64), __aligned__(1)));
38typedef double __m512d_u
__attribute__((__vector_size__(64), __aligned__(1)));
39typedef long long __m512i_u
__attribute__((__vector_size__(64), __aligned__(1)));
45#define _MM_FROUND_TO_NEAREST_INT 0x00
46#define _MM_FROUND_TO_NEG_INF 0x01
47#define _MM_FROUND_TO_POS_INF 0x02
48#define _MM_FROUND_TO_ZERO 0x03
49#define _MM_FROUND_CUR_DIRECTION 0x04
59#define _MM_CMPINT_GE _MM_CMPINT_NLT
61#define _MM_CMPINT_GT _MM_CMPINT_NLE
170#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,evex512"), __min_vector_width__(512)))
171#define __DEFAULT_FN_ATTRS128 \
172 __attribute__((__always_inline__, __nodebug__, \
173 __target__("avx512f,no-evex512"), __min_vector_width__(128)))
174#define __DEFAULT_FN_ATTRS \
175 __attribute__((__always_inline__, __nodebug__, \
176 __target__("avx512f,no-evex512")))
178#if defined(__cplusplus) && (__cplusplus >= 201103L)
179#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
180#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
181#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
183#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS128
184#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
185#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS
192 return __extension__(__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0};
195#define _mm512_setzero_epi32 _mm512_setzero_si512
200 return (__m512d)__builtin_ia32_undef512();
206 return (__m512)__builtin_ia32_undef512();
212 return (__m512)__builtin_ia32_undef512();
218 return (__m512i)__builtin_ia32_undef512();
224 return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
225 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
231 return (__m512i)__builtin_ia32_selectd_512(__M,
239 return (__m512i)__builtin_ia32_selectd_512(__M,
247 return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
248 0, 0, 0, 0, 0, 0, 0, 0);
254 return (__m512i)__builtin_ia32_selectq_512(__M,
263 return (__m512i)__builtin_ia32_selectq_512(__M,
269 return __extension__(__m512){0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
270 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
273#define _mm512_setzero _mm512_setzero_ps
277 return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
283 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
284 __w, __w, __w, __w, __w, __w, __w, __w };
290 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
296 return __extension__ (__m512i)(__v64qi){
297 __w, __w, __w, __w, __w, __w, __w, __w,
298 __w, __w, __w, __w, __w, __w, __w, __w,
299 __w, __w, __w, __w, __w, __w, __w, __w,
300 __w, __w, __w, __w, __w, __w, __w, __w,
301 __w, __w, __w, __w, __w, __w, __w, __w,
302 __w, __w, __w, __w, __w, __w, __w, __w,
303 __w, __w, __w, __w, __w, __w, __w, __w,
304 __w, __w, __w, __w, __w, __w, __w, __w };
310 return __extension__ (__m512i)(__v32hi){
311 __w, __w, __w, __w, __w, __w, __w, __w,
312 __w, __w, __w, __w, __w, __w, __w, __w,
313 __w, __w, __w, __w, __w, __w, __w, __w,
314 __w, __w, __w, __w, __w, __w, __w, __w };
320 return __extension__ (__m512i)(__v16si){
321 __s, __s, __s, __s, __s, __s, __s, __s,
322 __s, __s, __s, __s, __s, __s, __s, __s };
328 return (__m512i)__builtin_ia32_selectd_512(__M,
336 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
342 return (__m512i)__builtin_ia32_selectq_512(__M,
350 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
351 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
357 return __extension__ (__m512i)(__v16si)
358 {
__D, __C, __B, __A,
__D, __C, __B, __A,
359 __D, __C, __B, __A,
__D, __C, __B, __A };
366 return __extension__ (__m512i) (__v8di)
367 {
__D, __C, __B, __A,
__D, __C, __B, __A };
373 return __extension__ (__m512d)
374 {
__D, __C, __B, __A,
__D, __C, __B, __A };
380 return __extension__ (__m512)
381 {
__D, __C, __B, __A,
__D, __C, __B, __A,
382 __D, __C, __B, __A,
__D, __C, __B, __A };
385#define _mm512_setr4_epi32(e0,e1,e2,e3) \
386 _mm512_set4_epi32((e3),(e2),(e1),(e0))
388#define _mm512_setr4_epi64(e0,e1,e2,e3) \
389 _mm512_set4_epi64((e3),(e2),(e1),(e0))
391#define _mm512_setr4_pd(e0,e1,e2,e3) \
392 _mm512_set4_pd((e3),(e2),(e1),(e0))
394#define _mm512_setr4_ps(e0,e1,e2,e3) \
395 _mm512_set4_ps((e3),(e2),(e1),(e0))
400 return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
401 0, 0, 0, 0, 0, 0, 0, 0);
409 return __builtin_shufflevector(
__a, __builtin_nondeterministic_value(
__a), 0,
410 1, 2, 3, 4, 5, 6, 7);
416 return __builtin_shufflevector(
__a, __builtin_nondeterministic_value(
__a), 0,
417 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
423 return __builtin_shufflevector(
__a,
__a, 0, 1);
429 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
435 return __builtin_shufflevector(
__a,
__a, 0, 1, 2, 3);
441 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
447 return (__m512) (__A);
453 return (__m512i) (__A);
459 __m256d __B = __builtin_nondeterministic_value(__B);
460 return __builtin_shufflevector(
461 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
462 __B, 0, 1, 2, 3, 4, 5, 6, 7);
468 return (__m512d) (__A);
474 return (__m512i) (__A);
480 __m256 __B = __builtin_nondeterministic_value(__B);
481 return __builtin_shufflevector(
482 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7),
483 __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
489 __m256i __B = __builtin_nondeterministic_value(__B);
490 return __builtin_shufflevector(
491 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
492 __B, 0, 1, 2, 3, 4, 5, 6, 7);
498 return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7);
504 return (__m512) (__A);
510 return (__m512d) (__A);
516 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
522 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
553 return __builtin_shufflevector((__v2df)
__a, (__v2df)
_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
572 return __builtin_shufflevector((__v4df)
__a, (__v4df)
_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
590 return __builtin_shufflevector((__v4sf)
__a, (__v4sf)
_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
608 return __builtin_shufflevector((__v8sf)
__a, (__v8sf)
_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
626 return __builtin_shufflevector((__v2di)
__a, (__v2di)
_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
651 return (__m512i)((__v16su)
__a & (__v16su)
__b);
657 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__k,
672 return (__m512i)((__v8du)
__a & (__v8du)
__b);
678 return (__m512i) __builtin_ia32_selectq_512 ((
__mmask8) __k,
693 return (__m512i)(~(__v8du)__A & (__v8du)__B);
699 return (__m512i)(~(__v16su)__A & (__v16su)__B);
705 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
720 return (__m512i)(~(__v8du)__A & (__v8du)__B);
726 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
741 return (__m512i)((__v16su)
__a | (__v16su)
__b);
747 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__k,
761 return (__m512i)((__v8du)
__a | (__v8du)
__b);
767 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__k,
781 return (__m512i)((__v16su)
__a ^ (__v16su)
__b);
787 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__k,
801 return (__m512i)((__v8du)
__a ^ (__v8du)
__b);
807 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__k,
821 return (__m512i)((__v8du)
__a & (__v8du)
__b);
827 return (__m512i)((__v8du)
__a | (__v8du)
__b);
833 return (__m512i)((__v8du)
__a ^ (__v8du)
__b);
841 return (__m512d)((__v8df)
__a + (__v8df)
__b);
847 return (__m512)((__v16sf)
__a + (__v16sf)
__b);
853 return (__m512d)((__v8df)
__a * (__v8df)
__b);
859 return (__m512)((__v16sf)
__a * (__v16sf)
__b);
865 return (__m512d)((__v8df)
__a - (__v8df)
__b);
871 return (__m512)((__v16sf)
__a - (__v16sf)
__b);
877 return (__m512i) ((__v8du) __A + (__v8du) __B);
883 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
891 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
899 return (__m512i) ((__v8du) __A - (__v8du) __B);
905 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
913 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
921 return (__m512i) ((__v16su) __A + (__v16su) __B);
927 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
935 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
943 return (__m512i) ((__v16su) __A - (__v16su) __B);
949 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
957 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
962#define _mm512_max_round_pd(A, B, R) \
963 ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
964 (__v8df)(__m512d)(B), (int)(R)))
966#define _mm512_mask_max_round_pd(W, U, A, B, R) \
967 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
968 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
971#define _mm512_maskz_max_round_pd(U, A, B, R) \
972 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
973 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
974 (__v8df)_mm512_setzero_pd()))
979 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
986 return (__m512d)__builtin_ia32_selectpd_512(__U,
994 return (__m512d)__builtin_ia32_selectpd_512(__U,
999#define _mm512_max_round_ps(A, B, R) \
1000 ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
1001 (__v16sf)(__m512)(B), (int)(R)))
1003#define _mm512_mask_max_round_ps(W, U, A, B, R) \
1004 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1005 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
1008#define _mm512_maskz_max_round_ps(U, A, B, R) \
1009 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1010 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
1011 (__v16sf)_mm512_setzero_ps()))
1016 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
1023 return (__m512)__builtin_ia32_selectps_512(__U,
1031 return (__m512)__builtin_ia32_selectps_512(__U,
1038 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1047 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1054#define _mm_max_round_ss(A, B, R) \
1055 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1056 (__v4sf)(__m128)(B), \
1057 (__v4sf)_mm_setzero_ps(), \
1058 (__mmask8)-1, (int)(R)))
1060#define _mm_mask_max_round_ss(W, U, A, B, R) \
1061 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1062 (__v4sf)(__m128)(B), \
1063 (__v4sf)(__m128)(W), (__mmask8)(U), \
1066#define _mm_maskz_max_round_ss(U, A, B, R) \
1067 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1068 (__v4sf)(__m128)(B), \
1069 (__v4sf)_mm_setzero_ps(), \
1070 (__mmask8)(U), (int)(R)))
1074 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1083 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1090#define _mm_max_round_sd(A, B, R) \
1091 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1092 (__v2df)(__m128d)(B), \
1093 (__v2df)_mm_setzero_pd(), \
1094 (__mmask8)-1, (int)(R)))
1096#define _mm_mask_max_round_sd(W, U, A, B, R) \
1097 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1098 (__v2df)(__m128d)(B), \
1099 (__v2df)(__m128d)(W), \
1100 (__mmask8)(U), (int)(R)))
1102#define _mm_maskz_max_round_sd(U, A, B, R) \
1103 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1104 (__v2df)(__m128d)(B), \
1105 (__v2df)_mm_setzero_pd(), \
1106 (__mmask8)(U), (int)(R)))
1108static __inline __m512i
1112 return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
1118 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1126 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1134 return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
1140 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1148 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1156 return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
1162 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1170 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1178 return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
1184 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1192 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1197#define _mm512_min_round_pd(A, B, R) \
1198 ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1199 (__v8df)(__m512d)(B), (int)(R)))
1201#define _mm512_mask_min_round_pd(W, U, A, B, R) \
1202 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1203 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1206#define _mm512_maskz_min_round_pd(U, A, B, R) \
1207 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1208 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1209 (__v8df)_mm512_setzero_pd()))
1214 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1221 return (__m512d)__builtin_ia32_selectpd_512(__U,
1229 return (__m512d)__builtin_ia32_selectpd_512(__U,
1234#define _mm512_min_round_ps(A, B, R) \
1235 ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1236 (__v16sf)(__m512)(B), (int)(R)))
1238#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1239 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1240 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1243#define _mm512_maskz_min_round_ps(U, A, B, R) \
1244 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1245 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1246 (__v16sf)_mm512_setzero_ps()))
1251 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1258 return (__m512)__builtin_ia32_selectps_512(__U,
1266 return (__m512)__builtin_ia32_selectps_512(__U,
1273 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1282 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1289#define _mm_min_round_ss(A, B, R) \
1290 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1291 (__v4sf)(__m128)(B), \
1292 (__v4sf)_mm_setzero_ps(), \
1293 (__mmask8)-1, (int)(R)))
1295#define _mm_mask_min_round_ss(W, U, A, B, R) \
1296 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1297 (__v4sf)(__m128)(B), \
1298 (__v4sf)(__m128)(W), (__mmask8)(U), \
1301#define _mm_maskz_min_round_ss(U, A, B, R) \
1302 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1303 (__v4sf)(__m128)(B), \
1304 (__v4sf)_mm_setzero_ps(), \
1305 (__mmask8)(U), (int)(R)))
1309 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1318 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1325#define _mm_min_round_sd(A, B, R) \
1326 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1327 (__v2df)(__m128d)(B), \
1328 (__v2df)_mm_setzero_pd(), \
1329 (__mmask8)-1, (int)(R)))
1331#define _mm_mask_min_round_sd(W, U, A, B, R) \
1332 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1333 (__v2df)(__m128d)(B), \
1334 (__v2df)(__m128d)(W), \
1335 (__mmask8)(U), (int)(R)))
1337#define _mm_maskz_min_round_sd(U, A, B, R) \
1338 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1339 (__v2df)(__m128d)(B), \
1340 (__v2df)_mm_setzero_pd(), \
1341 (__mmask8)(U), (int)(R)))
1343static __inline __m512i
1347 return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
1353 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1361 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1369 return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
1375 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1383 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1391 return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
1397 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1405 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1413 return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
1419 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1427 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1435 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si)
__Y);
1441 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1449 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1457 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)
__Y);
1463 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1471 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1479 return (__m512i) ((__v16su) __A * (__v16su) __B);
1485 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1493 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1500 return (__m512i) ((__v8du) __A * (__v8du) __B);
1505 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
1510#define _mm512_sqrt_round_pd(A, R) \
1511 ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)))
1513#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1514 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1515 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1516 (__v8df)(__m512d)(W)))
1518#define _mm512_maskz_sqrt_round_pd(U, A, R) \
1519 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1520 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1521 (__v8df)_mm512_setzero_pd()))
1526 return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1533 return (__m512d)__builtin_ia32_selectpd_512(__U,
1541 return (__m512d)__builtin_ia32_selectpd_512(__U,
1546#define _mm512_sqrt_round_ps(A, R) \
1547 ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)))
1549#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1550 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1551 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1552 (__v16sf)(__m512)(W)))
1554#define _mm512_maskz_sqrt_round_ps(U, A, R) \
1555 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1556 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1557 (__v16sf)_mm512_setzero_ps()))
1562 return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1569 return (__m512)__builtin_ia32_selectps_512(__U,
1577 return (__m512)__builtin_ia32_selectps_512(__U,
1585 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1593 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1601 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1610 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1619 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1627 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1636 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1646 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1655 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1664 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1674 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1683 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1692 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1701 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1709 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1718 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1727 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1735 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1744 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1754 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1763 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1772 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1782 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1791 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1800 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1802 (__v16sf) __A, (
unsigned short)-1,
1809 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1818 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1820 (__v8df) __A, (
unsigned char)-1,
1827 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1836 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1845 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1847 (__v16sf) __A, (
unsigned short)-1,
1854 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1856 (__v8df) __A, (
unsigned char)-1,
1863 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1872 return (__m512i)__builtin_elementwise_abs((__v8di)__A);
1878 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
1886 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
1894 return (__m512i)__builtin_elementwise_abs((__v16si) __A);
1900 return (__m512i)__builtin_ia32_selectd_512(__U,
1908 return (__m512i)__builtin_ia32_selectd_512(__U,
1916 return __builtin_ia32_selectss_128(__U, __A, __W);
1925#define _mm_add_round_ss(A, B, R) \
1926 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1927 (__v4sf)(__m128)(B), \
1928 (__v4sf)_mm_setzero_ps(), \
1929 (__mmask8)-1, (int)(R)))
1931#define _mm_mask_add_round_ss(W, U, A, B, R) \
1932 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1933 (__v4sf)(__m128)(B), \
1934 (__v4sf)(__m128)(W), (__mmask8)(U), \
1937#define _mm_maskz_add_round_ss(U, A, B, R) \
1938 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1939 (__v4sf)(__m128)(B), \
1940 (__v4sf)_mm_setzero_ps(), \
1941 (__mmask8)(U), (int)(R)))
1946 return __builtin_ia32_selectsd_128(__U, __A, __W);
1954#define _mm_add_round_sd(A, B, R) \
1955 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1956 (__v2df)(__m128d)(B), \
1957 (__v2df)_mm_setzero_pd(), \
1958 (__mmask8)-1, (int)(R)))
1960#define _mm_mask_add_round_sd(W, U, A, B, R) \
1961 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1962 (__v2df)(__m128d)(B), \
1963 (__v2df)(__m128d)(W), \
1964 (__mmask8)(U), (int)(R)))
1966#define _mm_maskz_add_round_sd(U, A, B, R) \
1967 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1968 (__v2df)(__m128d)(B), \
1969 (__v2df)_mm_setzero_pd(), \
1970 (__mmask8)(U), (int)(R)))
1974 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
1981 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
1988 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
1995 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2000#define _mm512_add_round_pd(A, B, R) \
2001 ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
2002 (__v8df)(__m512d)(B), (int)(R)))
2004#define _mm512_mask_add_round_pd(W, U, A, B, R) \
2005 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2006 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
2007 (__v8df)(__m512d)(W)))
2009#define _mm512_maskz_add_round_pd(U, A, B, R) \
2010 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2011 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
2012 (__v8df)_mm512_setzero_pd()))
2014#define _mm512_add_round_ps(A, B, R) \
2015 ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
2016 (__v16sf)(__m512)(B), (int)(R)))
2018#define _mm512_mask_add_round_ps(W, U, A, B, R) \
2019 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2020 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2021 (__v16sf)(__m512)(W)))
2023#define _mm512_maskz_add_round_ps(U, A, B, R) \
2024 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2025 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2026 (__v16sf)_mm512_setzero_ps()))
2031 return __builtin_ia32_selectss_128(__U, __A, __W);
2039#define _mm_sub_round_ss(A, B, R) \
2040 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2041 (__v4sf)(__m128)(B), \
2042 (__v4sf)_mm_setzero_ps(), \
2043 (__mmask8)-1, (int)(R)))
2045#define _mm_mask_sub_round_ss(W, U, A, B, R) \
2046 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2047 (__v4sf)(__m128)(B), \
2048 (__v4sf)(__m128)(W), (__mmask8)(U), \
2051#define _mm_maskz_sub_round_ss(U, A, B, R) \
2052 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2053 (__v4sf)(__m128)(B), \
2054 (__v4sf)_mm_setzero_ps(), \
2055 (__mmask8)(U), (int)(R)))
2060 return __builtin_ia32_selectsd_128(__U, __A, __W);
2069#define _mm_sub_round_sd(A, B, R) \
2070 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2071 (__v2df)(__m128d)(B), \
2072 (__v2df)_mm_setzero_pd(), \
2073 (__mmask8)-1, (int)(R)))
2075#define _mm_mask_sub_round_sd(W, U, A, B, R) \
2076 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2077 (__v2df)(__m128d)(B), \
2078 (__v2df)(__m128d)(W), \
2079 (__mmask8)(U), (int)(R)))
2081#define _mm_maskz_sub_round_sd(U, A, B, R) \
2082 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2083 (__v2df)(__m128d)(B), \
2084 (__v2df)_mm_setzero_pd(), \
2085 (__mmask8)(U), (int)(R)))
2089 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2096 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2103 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2110 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2115#define _mm512_sub_round_pd(A, B, R) \
2116 ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2117 (__v8df)(__m512d)(B), (int)(R)))
2119#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2120 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2121 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2122 (__v8df)(__m512d)(W)))
2124#define _mm512_maskz_sub_round_pd(U, A, B, R) \
2125 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2126 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2127 (__v8df)_mm512_setzero_pd()))
2129#define _mm512_sub_round_ps(A, B, R) \
2130 ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2131 (__v16sf)(__m512)(B), (int)(R)))
2133#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2134 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2135 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2136 (__v16sf)(__m512)(W)))
2138#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2139 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2140 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2141 (__v16sf)_mm512_setzero_ps()))
2146 return __builtin_ia32_selectss_128(__U, __A, __W);
2154#define _mm_mul_round_ss(A, B, R) \
2155 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2156 (__v4sf)(__m128)(B), \
2157 (__v4sf)_mm_setzero_ps(), \
2158 (__mmask8)-1, (int)(R)))
2160#define _mm_mask_mul_round_ss(W, U, A, B, R) \
2161 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2162 (__v4sf)(__m128)(B), \
2163 (__v4sf)(__m128)(W), (__mmask8)(U), \
2166#define _mm_maskz_mul_round_ss(U, A, B, R) \
2167 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2168 (__v4sf)(__m128)(B), \
2169 (__v4sf)_mm_setzero_ps(), \
2170 (__mmask8)(U), (int)(R)))
2175 return __builtin_ia32_selectsd_128(__U, __A, __W);
2184#define _mm_mul_round_sd(A, B, R) \
2185 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2186 (__v2df)(__m128d)(B), \
2187 (__v2df)_mm_setzero_pd(), \
2188 (__mmask8)-1, (int)(R)))
2190#define _mm_mask_mul_round_sd(W, U, A, B, R) \
2191 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2192 (__v2df)(__m128d)(B), \
2193 (__v2df)(__m128d)(W), \
2194 (__mmask8)(U), (int)(R)))
2196#define _mm_maskz_mul_round_sd(U, A, B, R) \
2197 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2198 (__v2df)(__m128d)(B), \
2199 (__v2df)_mm_setzero_pd(), \
2200 (__mmask8)(U), (int)(R)))
2204 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2211 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2218 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2225 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2230#define _mm512_mul_round_pd(A, B, R) \
2231 ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2232 (__v8df)(__m512d)(B), (int)(R)))
2234#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2235 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2236 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2237 (__v8df)(__m512d)(W)))
2239#define _mm512_maskz_mul_round_pd(U, A, B, R) \
2240 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2241 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2242 (__v8df)_mm512_setzero_pd()))
2244#define _mm512_mul_round_ps(A, B, R) \
2245 ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2246 (__v16sf)(__m512)(B), (int)(R)))
2248#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2249 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2250 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2251 (__v16sf)(__m512)(W)))
2253#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2254 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2255 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2256 (__v16sf)_mm512_setzero_ps()))
2261 return __builtin_ia32_selectss_128(__U, __A, __W);
2270#define _mm_div_round_ss(A, B, R) \
2271 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2272 (__v4sf)(__m128)(B), \
2273 (__v4sf)_mm_setzero_ps(), \
2274 (__mmask8)-1, (int)(R)))
2276#define _mm_mask_div_round_ss(W, U, A, B, R) \
2277 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2278 (__v4sf)(__m128)(B), \
2279 (__v4sf)(__m128)(W), (__mmask8)(U), \
2282#define _mm_maskz_div_round_ss(U, A, B, R) \
2283 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2284 (__v4sf)(__m128)(B), \
2285 (__v4sf)_mm_setzero_ps(), \
2286 (__mmask8)(U), (int)(R)))
2291 return __builtin_ia32_selectsd_128(__U, __A, __W);
2300#define _mm_div_round_sd(A, B, R) \
2301 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2302 (__v2df)(__m128d)(B), \
2303 (__v2df)_mm_setzero_pd(), \
2304 (__mmask8)-1, (int)(R)))
2306#define _mm_mask_div_round_sd(W, U, A, B, R) \
2307 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2308 (__v2df)(__m128d)(B), \
2309 (__v2df)(__m128d)(W), \
2310 (__mmask8)(U), (int)(R)))
2312#define _mm_maskz_div_round_sd(U, A, B, R) \
2313 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2314 (__v2df)(__m128d)(B), \
2315 (__v2df)_mm_setzero_pd(), \
2316 (__mmask8)(U), (int)(R)))
2321 return (__m512d)((__v8df)
__a/(__v8df)
__b);
2326 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2333 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2341 return (__m512)((__v16sf)
__a/(__v16sf)
__b);
2346 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2353 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2358#define _mm512_div_round_pd(A, B, R) \
2359 ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2360 (__v8df)(__m512d)(B), (int)(R)))
2362#define _mm512_mask_div_round_pd(W, U, A, B, R) \
2363 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2364 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2365 (__v8df)(__m512d)(W)))
2367#define _mm512_maskz_div_round_pd(U, A, B, R) \
2368 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2369 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2370 (__v8df)_mm512_setzero_pd()))
2372#define _mm512_div_round_ps(A, B, R) \
2373 ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2374 (__v16sf)(__m512)(B), (int)(R)))
2376#define _mm512_mask_div_round_ps(W, U, A, B, R) \
2377 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2378 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2379 (__v16sf)(__m512)(W)))
2381#define _mm512_maskz_div_round_ps(U, A, B, R) \
2382 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2383 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2384 (__v16sf)_mm512_setzero_ps()))
2386#define _mm512_roundscale_ps(A, B) \
2387 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2388 (__v16sf)_mm512_undefined_ps(), \
2390 _MM_FROUND_CUR_DIRECTION))
2392#define _mm512_mask_roundscale_ps(A, B, C, imm) \
2393 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2394 (__v16sf)(__m512)(A), (__mmask16)(B), \
2395 _MM_FROUND_CUR_DIRECTION))
2397#define _mm512_maskz_roundscale_ps(A, B, imm) \
2398 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2399 (__v16sf)_mm512_setzero_ps(), \
2401 _MM_FROUND_CUR_DIRECTION))
2403#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2404 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2405 (__v16sf)(__m512)(A), (__mmask16)(B), \
2408#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2409 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2410 (__v16sf)_mm512_setzero_ps(), \
2411 (__mmask16)(A), (int)(R)))
2413#define _mm512_roundscale_round_ps(A, imm, R) \
2414 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2415 (__v16sf)_mm512_undefined_ps(), \
2416 (__mmask16)-1, (int)(R)))
2418#define _mm512_roundscale_pd(A, B) \
2419 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2420 (__v8df)_mm512_undefined_pd(), \
2422 _MM_FROUND_CUR_DIRECTION))
2424#define _mm512_mask_roundscale_pd(A, B, C, imm) \
2425 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2426 (__v8df)(__m512d)(A), (__mmask8)(B), \
2427 _MM_FROUND_CUR_DIRECTION))
2429#define _mm512_maskz_roundscale_pd(A, B, imm) \
2430 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2431 (__v8df)_mm512_setzero_pd(), \
2433 _MM_FROUND_CUR_DIRECTION))
2435#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2436 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2437 (__v8df)(__m512d)(A), (__mmask8)(B), \
2440#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2441 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2442 (__v8df)_mm512_setzero_pd(), \
2443 (__mmask8)(A), (int)(R)))
2445#define _mm512_roundscale_round_pd(A, imm, R) \
2446 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2447 (__v8df)_mm512_undefined_pd(), \
2448 (__mmask8)-1, (int)(R)))
2450#define _mm512_fmadd_round_pd(A, B, C, R) \
2451 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2452 (__v8df)(__m512d)(B), \
2453 (__v8df)(__m512d)(C), \
2454 (__mmask8)-1, (int)(R)))
2457#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2458 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2459 (__v8df)(__m512d)(B), \
2460 (__v8df)(__m512d)(C), \
2461 (__mmask8)(U), (int)(R)))
2464#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2465 ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2466 (__v8df)(__m512d)(B), \
2467 (__v8df)(__m512d)(C), \
2468 (__mmask8)(U), (int)(R)))
2471#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2472 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2473 (__v8df)(__m512d)(B), \
2474 (__v8df)(__m512d)(C), \
2475 (__mmask8)(U), (int)(R)))
2478#define _mm512_fmsub_round_pd(A, B, C, R) \
2479 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2480 (__v8df)(__m512d)(B), \
2481 -(__v8df)(__m512d)(C), \
2482 (__mmask8)-1, (int)(R)))
2485#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2486 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2487 (__v8df)(__m512d)(B), \
2488 -(__v8df)(__m512d)(C), \
2489 (__mmask8)(U), (int)(R)))
2492#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2493 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2494 (__v8df)(__m512d)(B), \
2495 -(__v8df)(__m512d)(C), \
2496 (__mmask8)(U), (int)(R)))
2499#define _mm512_fnmadd_round_pd(A, B, C, R) \
2500 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2501 (__v8df)(__m512d)(B), \
2502 (__v8df)(__m512d)(C), \
2503 (__mmask8)-1, (int)(R)))
2506#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2507 ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2508 (__v8df)(__m512d)(B), \
2509 (__v8df)(__m512d)(C), \
2510 (__mmask8)(U), (int)(R)))
2513#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2514 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2515 (__v8df)(__m512d)(B), \
2516 (__v8df)(__m512d)(C), \
2517 (__mmask8)(U), (int)(R)))
2520#define _mm512_fnmsub_round_pd(A, B, C, R) \
2521 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2522 (__v8df)(__m512d)(B), \
2523 -(__v8df)(__m512d)(C), \
2524 (__mmask8)-1, (int)(R)))
2527#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2528 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2529 (__v8df)(__m512d)(B), \
2530 -(__v8df)(__m512d)(C), \
2531 (__mmask8)(U), (int)(R)))
2537 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2547 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2557 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2567 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2577 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2587 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2597 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2607 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2617 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2627 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2637 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2647 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2654#define _mm512_fmadd_round_ps(A, B, C, R) \
2655 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2656 (__v16sf)(__m512)(B), \
2657 (__v16sf)(__m512)(C), \
2658 (__mmask16)-1, (int)(R)))
2661#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2662 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2663 (__v16sf)(__m512)(B), \
2664 (__v16sf)(__m512)(C), \
2665 (__mmask16)(U), (int)(R)))
2668#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2669 ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2670 (__v16sf)(__m512)(B), \
2671 (__v16sf)(__m512)(C), \
2672 (__mmask16)(U), (int)(R)))
2675#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2676 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2677 (__v16sf)(__m512)(B), \
2678 (__v16sf)(__m512)(C), \
2679 (__mmask16)(U), (int)(R)))
2682#define _mm512_fmsub_round_ps(A, B, C, R) \
2683 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2684 (__v16sf)(__m512)(B), \
2685 -(__v16sf)(__m512)(C), \
2686 (__mmask16)-1, (int)(R)))
2689#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2690 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2691 (__v16sf)(__m512)(B), \
2692 -(__v16sf)(__m512)(C), \
2693 (__mmask16)(U), (int)(R)))
2696#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2697 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2698 (__v16sf)(__m512)(B), \
2699 -(__v16sf)(__m512)(C), \
2700 (__mmask16)(U), (int)(R)))
2703#define _mm512_fnmadd_round_ps(A, B, C, R) \
2704 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2705 -(__v16sf)(__m512)(B), \
2706 (__v16sf)(__m512)(C), \
2707 (__mmask16)-1, (int)(R)))
2710#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2711 ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2712 (__v16sf)(__m512)(B), \
2713 (__v16sf)(__m512)(C), \
2714 (__mmask16)(U), (int)(R)))
2717#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2718 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2719 (__v16sf)(__m512)(B), \
2720 (__v16sf)(__m512)(C), \
2721 (__mmask16)(U), (int)(R)))
2724#define _mm512_fnmsub_round_ps(A, B, C, R) \
2725 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2726 -(__v16sf)(__m512)(B), \
2727 -(__v16sf)(__m512)(C), \
2728 (__mmask16)-1, (int)(R)))
2731#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2732 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2733 (__v16sf)(__m512)(B), \
2734 -(__v16sf)(__m512)(C), \
2735 (__mmask16)(U), (int)(R)))
2741 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2751 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2761 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2771 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2781 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2791 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2801 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2811 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2821 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2831 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2841 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2851 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2858#define _mm512_fmaddsub_round_pd(A, B, C, R) \
2859 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2860 (__v8df)(__m512d)(B), \
2861 (__v8df)(__m512d)(C), \
2862 (__mmask8)-1, (int)(R)))
2865#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2866 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2867 (__v8df)(__m512d)(B), \
2868 (__v8df)(__m512d)(C), \
2869 (__mmask8)(U), (int)(R)))
2872#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2873 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2874 (__v8df)(__m512d)(B), \
2875 (__v8df)(__m512d)(C), \
2876 (__mmask8)(U), (int)(R)))
2879#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2880 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2881 (__v8df)(__m512d)(B), \
2882 (__v8df)(__m512d)(C), \
2883 (__mmask8)(U), (int)(R)))
2886#define _mm512_fmsubadd_round_pd(A, B, C, R) \
2887 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2888 (__v8df)(__m512d)(B), \
2889 -(__v8df)(__m512d)(C), \
2890 (__mmask8)-1, (int)(R)))
2893#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2894 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2895 (__v8df)(__m512d)(B), \
2896 -(__v8df)(__m512d)(C), \
2897 (__mmask8)(U), (int)(R)))
2900#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2901 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2902 (__v8df)(__m512d)(B), \
2903 -(__v8df)(__m512d)(C), \
2904 (__mmask8)(U), (int)(R)))
2910 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2920 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2930 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2940 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2950 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2960 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2970 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2977#define _mm512_fmaddsub_round_ps(A, B, C, R) \
2978 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2979 (__v16sf)(__m512)(B), \
2980 (__v16sf)(__m512)(C), \
2981 (__mmask16)-1, (int)(R)))
2984#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2985 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2986 (__v16sf)(__m512)(B), \
2987 (__v16sf)(__m512)(C), \
2988 (__mmask16)(U), (int)(R)))
2991#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2992 ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2993 (__v16sf)(__m512)(B), \
2994 (__v16sf)(__m512)(C), \
2995 (__mmask16)(U), (int)(R)))
2998#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2999 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3000 (__v16sf)(__m512)(B), \
3001 (__v16sf)(__m512)(C), \
3002 (__mmask16)(U), (int)(R)))
3005#define _mm512_fmsubadd_round_ps(A, B, C, R) \
3006 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3007 (__v16sf)(__m512)(B), \
3008 -(__v16sf)(__m512)(C), \
3009 (__mmask16)-1, (int)(R)))
3012#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3013 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3014 (__v16sf)(__m512)(B), \
3015 -(__v16sf)(__m512)(C), \
3016 (__mmask16)(U), (int)(R)))
3019#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3020 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3021 (__v16sf)(__m512)(B), \
3022 -(__v16sf)(__m512)(C), \
3023 (__mmask16)(U), (int)(R)))
3029 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3039 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3049 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3059 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3069 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3079 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3089 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3096#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3097 ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3098 (__v8df)(__m512d)(B), \
3099 (__v8df)(__m512d)(C), \
3100 (__mmask8)(U), (int)(R)))
3106 return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3113#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3114 ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3115 (__v16sf)(__m512)(B), \
3116 (__v16sf)(__m512)(C), \
3117 (__mmask16)(U), (int)(R)))
3122 return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3129#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3130 ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3131 (__v8df)(__m512d)(B), \
3132 (__v8df)(__m512d)(C), \
3133 (__mmask8)(U), (int)(R)))
3139 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3146#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3147 ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3148 (__v16sf)(__m512)(B), \
3149 (__v16sf)(__m512)(C), \
3150 (__mmask16)(U), (int)(R)))
3156 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3163#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3164 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3165 -(__v8df)(__m512d)(B), \
3166 (__v8df)(__m512d)(C), \
3167 (__mmask8)(U), (int)(R)))
3173 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3180#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3181 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3182 -(__v16sf)(__m512)(B), \
3183 (__v16sf)(__m512)(C), \
3184 (__mmask16)(U), (int)(R)))
3190 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3197#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3198 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3199 -(__v8df)(__m512d)(B), \
3200 -(__v8df)(__m512d)(C), \
3201 (__mmask8)(U), (int)(R)))
3204#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3205 ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3206 (__v8df)(__m512d)(B), \
3207 (__v8df)(__m512d)(C), \
3208 (__mmask8)(U), (int)(R)))
3214 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3224 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
3231#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3232 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3233 -(__v16sf)(__m512)(B), \
3234 -(__v16sf)(__m512)(C), \
3235 (__mmask16)(U), (int)(R)))
3238#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3239 ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3240 (__v16sf)(__m512)(B), \
3241 (__v16sf)(__m512)(C), \
3242 (__mmask16)(U), (int)(R)))
3248 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3258 return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
3272 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3280 return (__m512i)__builtin_ia32_selectd_512(__U,
3289 return (__m512i)__builtin_ia32_selectd_512(__U,
3298 return (__m512i)__builtin_ia32_selectd_512(__U,
3306 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3314 return (__m512i)__builtin_ia32_selectq_512(__U,
3323 return (__m512i)__builtin_ia32_selectq_512(__U,
3332 return (__m512i)__builtin_ia32_selectq_512(__U,
3337#define _mm512_alignr_epi64(A, B, I) \
3338 ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3339 (__v8di)(__m512i)(B), (int)(I)))
3341#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3342 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3343 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3344 (__v8di)(__m512i)(W)))
3346#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3347 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3348 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3349 (__v8di)_mm512_setzero_si512()))
3351#define _mm512_alignr_epi32(A, B, I) \
3352 ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3353 (__v16si)(__m512i)(B), (int)(I)))
3355#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3356 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3357 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3358 (__v16si)(__m512i)(W)))
3360#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3361 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3362 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3363 (__v16si)_mm512_setzero_si512()))
3366#define _mm512_extractf64x4_pd(A, I) \
3367 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3368 (__v4df)_mm256_undefined_pd(), \
3371#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3372 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3373 (__v4df)(__m256d)(W), \
3376#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3377 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3378 (__v4df)_mm256_setzero_pd(), \
3381#define _mm512_extractf32x4_ps(A, I) \
3382 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3383 (__v4sf)_mm_undefined_ps(), \
3386#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3387 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3388 (__v4sf)(__m128)(W), \
3391#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3392 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3393 (__v4sf)_mm_setzero_ps(), \
3401 return (__m512d) __builtin_ia32_selectpd_512 ((
__mmask8) __U,
3409 return (__m512) __builtin_ia32_selectps_512 ((
__mmask16) __U,
3417 return (__m512i) __builtin_ia32_selectq_512 ((
__mmask8) __U,
3425 return (__m512i) __builtin_ia32_selectd_512 ((
__mmask16) __U,
3432#define _mm512_cmp_round_ps_mask(A, B, P, R) \
3433 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3434 (__v16sf)(__m512)(B), (int)(P), \
3435 (__mmask16)-1, (int)(R)))
3437#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3438 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3439 (__v16sf)(__m512)(B), (int)(P), \
3440 (__mmask16)(U), (int)(R)))
3442#define _mm512_cmp_ps_mask(A, B, P) \
3443 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3444#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3445 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3447#define _mm512_cmpeq_ps_mask(A, B) \
3448 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3449#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3450 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3452#define _mm512_cmplt_ps_mask(A, B) \
3453 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3454#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3455 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3457#define _mm512_cmple_ps_mask(A, B) \
3458 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3459#define _mm512_mask_cmple_ps_mask(k, A, B) \
3460 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3462#define _mm512_cmpunord_ps_mask(A, B) \
3463 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3464#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3465 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3467#define _mm512_cmpneq_ps_mask(A, B) \
3468 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3469#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3470 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3472#define _mm512_cmpnlt_ps_mask(A, B) \
3473 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3474#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3475 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3477#define _mm512_cmpnle_ps_mask(A, B) \
3478 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3479#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3480 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3482#define _mm512_cmpord_ps_mask(A, B) \
3483 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3484#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3485 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3487#define _mm512_cmp_round_pd_mask(A, B, P, R) \
3488 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3489 (__v8df)(__m512d)(B), (int)(P), \
3490 (__mmask8)-1, (int)(R)))
3492#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3493 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3494 (__v8df)(__m512d)(B), (int)(P), \
3495 (__mmask8)(U), (int)(R)))
3497#define _mm512_cmp_pd_mask(A, B, P) \
3498 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3499#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3500 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3502#define _mm512_cmpeq_pd_mask(A, B) \
3503 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3504#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3505 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3507#define _mm512_cmplt_pd_mask(A, B) \
3508 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3509#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3510 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3512#define _mm512_cmple_pd_mask(A, B) \
3513 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3514#define _mm512_mask_cmple_pd_mask(k, A, B) \
3515 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3517#define _mm512_cmpunord_pd_mask(A, B) \
3518 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3519#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3520 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3522#define _mm512_cmpneq_pd_mask(A, B) \
3523 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3524#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3525 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3527#define _mm512_cmpnlt_pd_mask(A, B) \
3528 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3529#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3530 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3532#define _mm512_cmpnle_pd_mask(A, B) \
3533 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3534#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3535 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3537#define _mm512_cmpord_pd_mask(A, B) \
3538 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3539#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3540 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3544#define _mm512_cvtt_roundps_epu32(A, R) \
3545 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3546 (__v16si)_mm512_undefined_epi32(), \
3547 (__mmask16)-1, (int)(R)))
3549#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3550 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3551 (__v16si)(__m512i)(W), \
3552 (__mmask16)(U), (int)(R)))
3554#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3555 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3556 (__v16si)_mm512_setzero_si512(), \
3557 (__mmask16)(U), (int)(R)))
3563 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3573 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3582 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3588#define _mm512_cvt_roundepi32_ps(A, R) \
3589 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3590 (__v16sf)_mm512_setzero_ps(), \
3591 (__mmask16)-1, (int)(R)))
3593#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3594 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3595 (__v16sf)(__m512)(W), \
3596 (__mmask16)(U), (int)(R)))
3598#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3599 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3600 (__v16sf)_mm512_setzero_ps(), \
3601 (__mmask16)(U), (int)(R)))
3603#define _mm512_cvt_roundepu32_ps(A, R) \
3604 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3605 (__v16sf)_mm512_setzero_ps(), \
3606 (__mmask16)-1, (int)(R)))
3608#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3609 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3610 (__v16sf)(__m512)(W), \
3611 (__mmask16)(U), (int)(R)))
3613#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3614 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3615 (__v16sf)_mm512_setzero_ps(), \
3616 (__mmask16)(U), (int)(R)))
3621 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3627 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3635 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3643 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3649 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3657 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3677 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3683 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3691 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3699 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3705 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3713 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3730#define _mm512_cvt_roundpd_ps(A, R) \
3731 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3732 (__v8sf)_mm256_setzero_ps(), \
3733 (__mmask8)-1, (int)(R)))
3735#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3736 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3737 (__v8sf)(__m256)(W), (__mmask8)(U), \
3740#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3741 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3742 (__v8sf)_mm256_setzero_ps(), \
3743 (__mmask8)(U), (int)(R)))
3748 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3757 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3766 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3775 return (__m512) __builtin_shufflevector((__v8sf)
_mm512_cvtpd_ps(__A),
3777 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3783 return (__m512) __builtin_shufflevector (
3787 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3790#define _mm512_cvt_roundps_ph(A, I) \
3791 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3792 (__v16hi)_mm256_undefined_si256(), \
3795#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3796 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3797 (__v16hi)(__m256i)(U), \
3800#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3801 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3802 (__v16hi)_mm256_setzero_si256(), \
3805#define _mm512_cvtps_ph _mm512_cvt_roundps_ph
3806#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
3807#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3809#define _mm512_cvt_roundph_ps(A, R) \
3810 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3811 (__v16sf)_mm512_undefined_ps(), \
3812 (__mmask16)-1, (int)(R)))
3814#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3815 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3816 (__v16sf)(__m512)(W), \
3817 (__mmask16)(U), (int)(R)))
3819#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3820 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3821 (__v16sf)_mm512_setzero_ps(), \
3822 (__mmask16)(U), (int)(R)))
3828 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3838 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3847 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3853#define _mm512_cvtt_roundpd_epi32(A, R) \
3854 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3855 (__v8si)_mm256_setzero_si256(), \
3856 (__mmask8)-1, (int)(R)))
3858#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3859 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3860 (__v8si)(__m256i)(W), \
3861 (__mmask8)(U), (int)(R)))
3863#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3864 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3865 (__v8si)_mm256_setzero_si256(), \
3866 (__mmask8)(U), (int)(R)))
3871 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)
__a,
3880 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3889 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3895#define _mm512_cvtt_roundps_epi32(A, R) \
3896 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3897 (__v16si)_mm512_setzero_si512(), \
3898 (__mmask16)-1, (int)(R)))
3900#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3901 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3902 (__v16si)(__m512i)(W), \
3903 (__mmask16)(U), (int)(R)))
3905#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3906 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3907 (__v16si)_mm512_setzero_si512(), \
3908 (__mmask16)(U), (int)(R)))
3914 __builtin_ia32_cvttps2dq512_mask((__v16sf)
__a,
3922 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3931 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3937#define _mm512_cvt_roundps_epi32(A, R) \
3938 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3939 (__v16si)_mm512_setzero_si512(), \
3940 (__mmask16)-1, (int)(R)))
3942#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3943 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3944 (__v16si)(__m512i)(W), \
3945 (__mmask16)(U), (int)(R)))
3947#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3948 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3949 (__v16si)_mm512_setzero_si512(), \
3950 (__mmask16)(U), (int)(R)))
3955 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3964 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3973 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3980#define _mm512_cvt_roundpd_epi32(A, R) \
3981 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3982 (__v8si)_mm256_setzero_si256(), \
3983 (__mmask8)-1, (int)(R)))
3985#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3986 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3987 (__v8si)(__m256i)(W), \
3988 (__mmask8)(U), (int)(R)))
3990#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3991 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3992 (__v8si)_mm256_setzero_si256(), \
3993 (__mmask8)(U), (int)(R)))
3998 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4008 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4017 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4024#define _mm512_cvt_roundps_epu32(A, R) \
4025 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4026 (__v16si)_mm512_setzero_si512(), \
4027 (__mmask16)-1, (int)(R)))
4029#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
4030 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4031 (__v16si)(__m512i)(W), \
4032 (__mmask16)(U), (int)(R)))
4034#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
4035 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4036 (__v16si)_mm512_setzero_si512(), \
4037 (__mmask16)(U), (int)(R)))
4042 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4052 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4061 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4068#define _mm512_cvt_roundpd_epu32(A, R) \
4069 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4070 (__v8si)_mm256_setzero_si256(), \
4071 (__mmask8)-1, (int)(R)))
4073#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
4074 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4075 (__v8si)(__m256i)(W), \
4076 (__mmask8)(U), (int)(R)))
4078#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
4079 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4080 (__v8si)_mm256_setzero_si256(), \
4081 (__mmask8)(U), (int)(R)))
4086 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4096 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4105 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4129 return (__m512d)__builtin_shufflevector((__v8df)
__a, (__v8df)
__b,
4130 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4136 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
4144 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
4152 return (__m512d)__builtin_shufflevector((__v8df)
__a, (__v8df)
__b,
4153 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4159 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
4167 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
4175 return (__m512)__builtin_shufflevector((__v16sf)
__a, (__v16sf)
__b,
4177 2+4, 18+4, 3+4, 19+4,
4178 2+8, 18+8, 3+8, 19+8,
4179 2+12, 18+12, 3+12, 19+12);
4185 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
4193 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
4201 return (__m512)__builtin_shufflevector((__v16sf)
__a, (__v16sf)
__b,
4203 0+4, 16+4, 1+4, 17+4,
4204 0+8, 16+8, 1+8, 17+8,
4205 0+12, 16+12, 1+12, 17+12);
4211 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
4219 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
4227 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4229 2+4, 18+4, 3+4, 19+4,
4230 2+8, 18+8, 3+8, 19+8,
4231 2+12, 18+12, 3+12, 19+12);
4237 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
4245 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
4253 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4255 0+4, 16+4, 1+4, 17+4,
4256 0+8, 16+8, 1+8, 17+8,
4257 0+12, 16+12, 1+12, 17+12);
4263 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
4271 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
4279 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4280 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4286 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4294 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4302 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4303 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4309 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4317 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4328 struct __loadu_si512 {
4331 return ((
const struct __loadu_si512*)
__P)->__v;
4337 struct __loadu_epi32 {
4340 return ((
const struct __loadu_epi32*)
__P)->__v;
4346 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const int *)
__P,
4355 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const int *)
__P,
4364 struct __loadu_epi64 {
4367 return ((
const struct __loadu_epi64*)
__P)->__v;
4373 return (__m512i) __builtin_ia32_loaddqudi512_mask ((
const long long *)
__P,
4381 return (__m512i) __builtin_ia32_loaddqudi512_mask ((
const long long *)
__P,
4390 return (__m512) __builtin_ia32_loadups512_mask ((
const float *)
__P,
4398 return (__m512) __builtin_ia32_loadups512_mask ((
const float *)
__P,
4407 return (__m512d) __builtin_ia32_loadupd512_mask ((
const double *)
__P,
4415 return (__m512d) __builtin_ia32_loadupd512_mask ((
const double *)
__P,
4427 return ((
const struct __loadu_pd*)
__p)->__v;
4436 return ((
const struct __loadu_ps*)
__p)->__v;
4442 return *(
const __m512*)
__p;
4448 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *)
__P,
4456 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *)
__P,
4465 return *(
const __m512d*)
__p;
4471 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *)
__P,
4479 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *)
__P,
4488 return *(
const __m512i *)
__P;
4494 return *(
const __m512i *)
__P;
4500 return *(
const __m512i *)
__P;
4508 struct __storeu_epi64 {
4511 ((
struct __storeu_epi64*)
__P)->__v = __A;
4517 __builtin_ia32_storedqudi512_mask ((
long long *)
__P, (__v8di) __A,
4524 struct __storeu_si512 {
4527 ((
struct __storeu_si512*)
__P)->__v = __A;
4533 struct __storeu_epi32 {
4536 ((
struct __storeu_epi32*)
__P)->__v = __A;
4542 __builtin_ia32_storedqusi512_mask ((
int *)
__P, (__v16si) __A,
4549 __builtin_ia32_storeupd512_mask ((
double *)
__P, (__v8df) __A, (
__mmask8) __U);
4555 struct __storeu_pd {
4558 ((
struct __storeu_pd*)
__P)->__v = __A;
4564 __builtin_ia32_storeups512_mask ((
float *)
__P, (__v16sf) __A,
4571 struct __storeu_ps {
4574 ((
struct __storeu_ps*)
__P)->__v = __A;
4580 __builtin_ia32_storeapd512_mask ((__v8df *)
__P, (__v8df) __A, (
__mmask8) __U);
4586 *(__m512d*)
__P = __A;
4592 __builtin_ia32_storeaps512_mask ((__v16sf *)
__P, (__v16sf) __A,
4599 *(__m512*)
__P = __A;
4605 *(__m512i *)
__P = __A;
4611 *(__m512i *)
__P = __A;
4617 *(__m512i *)
__P = __A;
4625 return __builtin_ia32_knothi(__M);
4630#define _mm512_cmpeq_epi32_mask(A, B) \
4631 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4632#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4633 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4634#define _mm512_cmpge_epi32_mask(A, B) \
4635 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4636#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4637 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4638#define _mm512_cmpgt_epi32_mask(A, B) \
4639 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4640#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4641 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4642#define _mm512_cmple_epi32_mask(A, B) \
4643 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4644#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4645 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4646#define _mm512_cmplt_epi32_mask(A, B) \
4647 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4648#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4649 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4650#define _mm512_cmpneq_epi32_mask(A, B) \
4651 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4652#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4653 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4655#define _mm512_cmpeq_epu32_mask(A, B) \
4656 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4657#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4658 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4659#define _mm512_cmpge_epu32_mask(A, B) \
4660 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4661#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4662 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4663#define _mm512_cmpgt_epu32_mask(A, B) \
4664 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4665#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4666 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4667#define _mm512_cmple_epu32_mask(A, B) \
4668 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4669#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4670 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4671#define _mm512_cmplt_epu32_mask(A, B) \
4672 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4673#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4674 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4675#define _mm512_cmpneq_epu32_mask(A, B) \
4676 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4677#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4678 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4680#define _mm512_cmpeq_epi64_mask(A, B) \
4681 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4682#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4683 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4684#define _mm512_cmpge_epi64_mask(A, B) \
4685 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4686#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4687 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4688#define _mm512_cmpgt_epi64_mask(A, B) \
4689 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4690#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4691 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4692#define _mm512_cmple_epi64_mask(A, B) \
4693 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4694#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4695 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4696#define _mm512_cmplt_epi64_mask(A, B) \
4697 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4698#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4699 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4700#define _mm512_cmpneq_epi64_mask(A, B) \
4701 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4702#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4703 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4705#define _mm512_cmpeq_epu64_mask(A, B) \
4706 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4707#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4708 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4709#define _mm512_cmpge_epu64_mask(A, B) \
4710 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4711#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4712 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4713#define _mm512_cmpgt_epu64_mask(A, B) \
4714 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4715#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4716 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4717#define _mm512_cmple_epu64_mask(A, B) \
4718 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4719#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4720 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4721#define _mm512_cmplt_epu64_mask(A, B) \
4722 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4723#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4724 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4725#define _mm512_cmpneq_epu64_mask(A, B) \
4726 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4727#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4728 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4735 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4741 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4749 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4759 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4765 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4773 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4781 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4787 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4795 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4803 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4809 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4817 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4825 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4831 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4839 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4847 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4853 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4861 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4869 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4875 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4883 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4891 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4897 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4905 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4913 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4919 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4927 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4935 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4941 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4949 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4957 return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
4963 return (__m512i)__builtin_ia32_selectd_512(__U,
4971 return (__m512i)__builtin_ia32_selectd_512(__U,
4979 return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
4985 return (__m512i)__builtin_ia32_selectq_512(__U,
4993 return (__m512i)__builtin_ia32_selectq_512(__U,
5000#define _mm512_cmp_epi32_mask(a, b, p) \
5001 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5002 (__v16si)(__m512i)(b), (int)(p), \
5005#define _mm512_cmp_epu32_mask(a, b, p) \
5006 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5007 (__v16si)(__m512i)(b), (int)(p), \
5010#define _mm512_cmp_epi64_mask(a, b, p) \
5011 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5012 (__v8di)(__m512i)(b), (int)(p), \
5015#define _mm512_cmp_epu64_mask(a, b, p) \
5016 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5017 (__v8di)(__m512i)(b), (int)(p), \
5020#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
5021 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5022 (__v16si)(__m512i)(b), (int)(p), \
5025#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
5026 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5027 (__v16si)(__m512i)(b), (int)(p), \
5030#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
5031 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5032 (__v8di)(__m512i)(b), (int)(p), \
5035#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
5036 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5037 (__v8di)(__m512i)(b), (int)(p), \
5040#define _mm512_rol_epi32(a, b) \
5041 ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)))
5043#define _mm512_mask_rol_epi32(W, U, a, b) \
5044 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5045 (__v16si)_mm512_rol_epi32((a), (b)), \
5046 (__v16si)(__m512i)(W)))
5048#define _mm512_maskz_rol_epi32(U, a, b) \
5049 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5050 (__v16si)_mm512_rol_epi32((a), (b)), \
5051 (__v16si)_mm512_setzero_si512()))
5053#define _mm512_rol_epi64(a, b) \
5054 ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)))
5056#define _mm512_mask_rol_epi64(W, U, a, b) \
5057 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5058 (__v8di)_mm512_rol_epi64((a), (b)), \
5059 (__v8di)(__m512i)(W)))
5061#define _mm512_maskz_rol_epi64(U, a, b) \
5062 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5063 (__v8di)_mm512_rol_epi64((a), (b)), \
5064 (__v8di)_mm512_setzero_si512()))
5069 return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
5075 return (__m512i)__builtin_ia32_selectd_512(__U,
5083 return (__m512i)__builtin_ia32_selectd_512(__U,
5091 return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
5097 return (__m512i)__builtin_ia32_selectq_512(__U,
5105 return (__m512i)__builtin_ia32_selectq_512(__U,
5110#define _mm512_ror_epi32(A, B) \
5111 ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)))
5113#define _mm512_mask_ror_epi32(W, U, A, B) \
5114 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5115 (__v16si)_mm512_ror_epi32((A), (B)), \
5116 (__v16si)(__m512i)(W)))
5118#define _mm512_maskz_ror_epi32(U, A, B) \
5119 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5120 (__v16si)_mm512_ror_epi32((A), (B)), \
5121 (__v16si)_mm512_setzero_si512()))
5123#define _mm512_ror_epi64(A, B) \
5124 ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)))
5126#define _mm512_mask_ror_epi64(W, U, A, B) \
5127 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5128 (__v8di)_mm512_ror_epi64((A), (B)), \
5129 (__v8di)(__m512i)(W)))
5131#define _mm512_maskz_ror_epi64(U, A, B) \
5132 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5133 (__v8di)_mm512_ror_epi64((A), (B)), \
5134 (__v8di)_mm512_setzero_si512()))
5139 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (
int)__B);
5146 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5153 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5161 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (
int)__B);
5167 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5175 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5183 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (
int)__B);
5190 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5197 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5205 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (
int)__B);
5212 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5221 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5229 return (__m512i) __builtin_ia32_movdqa32load512_mask ((
const __v16si *)
__P,
5237 return (__m512i) __builtin_ia32_movdqa32load512_mask ((
const __v16si *)
__P,
5246 __builtin_ia32_movdqa32store512_mask ((__v16si *)
__P, (__v16si) __A,
5253 return (__m512i) __builtin_ia32_selectd_512 ((
__mmask16) __U,
5261 return (__m512i) __builtin_ia32_selectd_512 ((
__mmask16) __U,
5269 return (__m512i) __builtin_ia32_selectq_512 ((
__mmask8) __U,
5277 return (__m512i) __builtin_ia32_selectq_512 ((
__mmask8) __U,
5285 return (__m512i) __builtin_ia32_movdqa64load512_mask ((
const __v8di *)
__P,
5293 return (__m512i) __builtin_ia32_movdqa64load512_mask ((
const __v8di *)
__P,
5302 __builtin_ia32_movdqa64store512_mask ((__v8di *)
__P, (__v8di) __A,
5309 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5310 0, 0, 2, 2, 4, 4, 6, 6);
5316 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
5324 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
5329#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5330 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5331 (__v8df)(__m512d)(B), \
5332 (__v8di)(__m512i)(C), (int)(imm), \
5333 (__mmask8)-1, (int)(R)))
5335#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5336 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5337 (__v8df)(__m512d)(B), \
5338 (__v8di)(__m512i)(C), (int)(imm), \
5339 (__mmask8)(U), (int)(R)))
5341#define _mm512_fixupimm_pd(A, B, C, imm) \
5342 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5343 (__v8df)(__m512d)(B), \
5344 (__v8di)(__m512i)(C), (int)(imm), \
5346 _MM_FROUND_CUR_DIRECTION))
5348#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5349 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5350 (__v8df)(__m512d)(B), \
5351 (__v8di)(__m512i)(C), (int)(imm), \
5353 _MM_FROUND_CUR_DIRECTION))
5355#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5356 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5357 (__v8df)(__m512d)(B), \
5358 (__v8di)(__m512i)(C), \
5359 (int)(imm), (__mmask8)(U), \
5362#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5363 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5364 (__v8df)(__m512d)(B), \
5365 (__v8di)(__m512i)(C), \
5366 (int)(imm), (__mmask8)(U), \
5367 _MM_FROUND_CUR_DIRECTION))
5369#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5370 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5371 (__v16sf)(__m512)(B), \
5372 (__v16si)(__m512i)(C), (int)(imm), \
5373 (__mmask16)-1, (int)(R)))
5375#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5376 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5377 (__v16sf)(__m512)(B), \
5378 (__v16si)(__m512i)(C), (int)(imm), \
5379 (__mmask16)(U), (int)(R)))
5381#define _mm512_fixupimm_ps(A, B, C, imm) \
5382 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5383 (__v16sf)(__m512)(B), \
5384 (__v16si)(__m512i)(C), (int)(imm), \
5386 _MM_FROUND_CUR_DIRECTION))
5388#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5389 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5390 (__v16sf)(__m512)(B), \
5391 (__v16si)(__m512i)(C), (int)(imm), \
5393 _MM_FROUND_CUR_DIRECTION))
5395#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5396 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5397 (__v16sf)(__m512)(B), \
5398 (__v16si)(__m512i)(C), \
5399 (int)(imm), (__mmask16)(U), \
5402#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5403 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5404 (__v16sf)(__m512)(B), \
5405 (__v16si)(__m512i)(C), \
5406 (int)(imm), (__mmask16)(U), \
5407 _MM_FROUND_CUR_DIRECTION))
5409#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5410 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5411 (__v2df)(__m128d)(B), \
5412 (__v2di)(__m128i)(C), (int)(imm), \
5413 (__mmask8)-1, (int)(R)))
5415#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5416 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5417 (__v2df)(__m128d)(B), \
5418 (__v2di)(__m128i)(C), (int)(imm), \
5419 (__mmask8)(U), (int)(R)))
5421#define _mm_fixupimm_sd(A, B, C, imm) \
5422 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5423 (__v2df)(__m128d)(B), \
5424 (__v2di)(__m128i)(C), (int)(imm), \
5426 _MM_FROUND_CUR_DIRECTION))
5428#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5429 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5430 (__v2df)(__m128d)(B), \
5431 (__v2di)(__m128i)(C), (int)(imm), \
5433 _MM_FROUND_CUR_DIRECTION))
5435#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5436 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5437 (__v2df)(__m128d)(B), \
5438 (__v2di)(__m128i)(C), (int)(imm), \
5439 (__mmask8)(U), (int)(R)))
5441#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5442 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5443 (__v2df)(__m128d)(B), \
5444 (__v2di)(__m128i)(C), (int)(imm), \
5446 _MM_FROUND_CUR_DIRECTION))
5448#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5449 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5450 (__v4sf)(__m128)(B), \
5451 (__v4si)(__m128i)(C), (int)(imm), \
5452 (__mmask8)-1, (int)(R)))
5454#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5455 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5456 (__v4sf)(__m128)(B), \
5457 (__v4si)(__m128i)(C), (int)(imm), \
5458 (__mmask8)(U), (int)(R)))
5460#define _mm_fixupimm_ss(A, B, C, imm) \
5461 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5462 (__v4sf)(__m128)(B), \
5463 (__v4si)(__m128i)(C), (int)(imm), \
5465 _MM_FROUND_CUR_DIRECTION))
5467#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5468 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5469 (__v4sf)(__m128)(B), \
5470 (__v4si)(__m128i)(C), (int)(imm), \
5472 _MM_FROUND_CUR_DIRECTION))
5474#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5475 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5476 (__v4sf)(__m128)(B), \
5477 (__v4si)(__m128i)(C), (int)(imm), \
5478 (__mmask8)(U), (int)(R)))
5480#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5481 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5482 (__v4sf)(__m128)(B), \
5483 (__v4si)(__m128i)(C), (int)(imm), \
5485 _MM_FROUND_CUR_DIRECTION))
5487#define _mm_getexp_round_sd(A, B, R) \
5488 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5489 (__v2df)(__m128d)(B), \
5490 (__v2df)_mm_setzero_pd(), \
5491 (__mmask8)-1, (int)(R)))
5497 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5504 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5511#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5512 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5513 (__v2df)(__m128d)(B), \
5514 (__v2df)(__m128d)(W), \
5515 (__mmask8)(U), (int)(R)))
5520 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5527#define _mm_maskz_getexp_round_sd(U, A, B, R) \
5528 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5529 (__v2df)(__m128d)(B), \
5530 (__v2df)_mm_setzero_pd(), \
5531 (__mmask8)(U), (int)(R)))
5533#define _mm_getexp_round_ss(A, B, R) \
5534 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5535 (__v4sf)(__m128)(B), \
5536 (__v4sf)_mm_setzero_ps(), \
5537 (__mmask8)-1, (int)(R)))
5542 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5549 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5556#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5557 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5558 (__v4sf)(__m128)(B), \
5559 (__v4sf)(__m128)(W), \
5560 (__mmask8)(U), (int)(R)))
5565 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5572#define _mm_maskz_getexp_round_ss(U, A, B, R) \
5573 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5574 (__v4sf)(__m128)(B), \
5575 (__v4sf)_mm_setzero_ps(), \
5576 (__mmask8)(U), (int)(R)))
5578#define _mm_getmant_round_sd(A, B, C, D, R) \
5579 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5580 (__v2df)(__m128d)(B), \
5581 (int)(((D)<<2) | (C)), \
5582 (__v2df)_mm_setzero_pd(), \
5583 (__mmask8)-1, (int)(R)))
5585#define _mm_getmant_sd(A, B, C, D) \
5586 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5587 (__v2df)(__m128d)(B), \
5588 (int)(((D)<<2) | (C)), \
5589 (__v2df)_mm_setzero_pd(), \
5591 _MM_FROUND_CUR_DIRECTION))
5593#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5594 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5595 (__v2df)(__m128d)(B), \
5596 (int)(((D)<<2) | (C)), \
5597 (__v2df)(__m128d)(W), \
5599 _MM_FROUND_CUR_DIRECTION))
5601#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5602 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5603 (__v2df)(__m128d)(B), \
5604 (int)(((D)<<2) | (C)), \
5605 (__v2df)(__m128d)(W), \
5606 (__mmask8)(U), (int)(R)))
5608#define _mm_maskz_getmant_sd(U, A, B, C, D) \
5609 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5610 (__v2df)(__m128d)(B), \
5611 (int)(((D)<<2) | (C)), \
5612 (__v2df)_mm_setzero_pd(), \
5614 _MM_FROUND_CUR_DIRECTION))
5616#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5617 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5618 (__v2df)(__m128d)(B), \
5619 (int)(((D)<<2) | (C)), \
5620 (__v2df)_mm_setzero_pd(), \
5621 (__mmask8)(U), (int)(R)))
5623#define _mm_getmant_round_ss(A, B, C, D, R) \
5624 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5625 (__v4sf)(__m128)(B), \
5626 (int)(((D)<<2) | (C)), \
5627 (__v4sf)_mm_setzero_ps(), \
5628 (__mmask8)-1, (int)(R)))
5630#define _mm_getmant_ss(A, B, C, D) \
5631 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5632 (__v4sf)(__m128)(B), \
5633 (int)(((D)<<2) | (C)), \
5634 (__v4sf)_mm_setzero_ps(), \
5636 _MM_FROUND_CUR_DIRECTION))
5638#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5639 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5640 (__v4sf)(__m128)(B), \
5641 (int)(((D)<<2) | (C)), \
5642 (__v4sf)(__m128)(W), \
5644 _MM_FROUND_CUR_DIRECTION))
5646#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5647 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5648 (__v4sf)(__m128)(B), \
5649 (int)(((D)<<2) | (C)), \
5650 (__v4sf)(__m128)(W), \
5651 (__mmask8)(U), (int)(R)))
5653#define _mm_maskz_getmant_ss(U, A, B, C, D) \
5654 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5655 (__v4sf)(__m128)(B), \
5656 (int)(((D)<<2) | (C)), \
5657 (__v4sf)_mm_setzero_ps(), \
5659 _MM_FROUND_CUR_DIRECTION))
5661#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5662 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5663 (__v4sf)(__m128)(B), \
5664 (int)(((D)<<2) | (C)), \
5665 (__v4sf)_mm_setzero_ps(), \
5666 (__mmask8)(U), (int)(R)))
5674#define _mm_comi_round_sd(A, B, P, R) \
5675 ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5676 (int)(P), (int)(R)))
5678#define _mm_comi_round_ss(A, B, P, R) \
5679 ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5680 (int)(P), (int)(R)))
5683#define _mm_cvt_roundsd_si64(A, R) \
5684 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5690 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5696 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5704 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5712 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5718 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5726 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5734 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)
__Y);
5740 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5748 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5756 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)
__Y);
5762 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5770 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5778 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5784 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5792 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5800 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5806 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5814 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5822 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)
__Y);
5828 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5836 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5844 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)
__Y);
5850 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5858 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5866 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5872 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5880 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5888 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5894 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5902 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5910 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)
__Y);
5916 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5924 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5932 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)
__Y);
5938 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5946 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5960#define _mm512_ternarylogic_epi32(A, B, C, imm) \
5961 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5962 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5963 (unsigned char)(imm), (__mmask16)-1))
5965#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
5966 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5967 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5968 (unsigned char)(imm), (__mmask16)(U)))
5970#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
5971 ((__m512i)__builtin_ia32_pternlogd512_maskz( \
5972 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5973 (unsigned char)(imm), (__mmask16)(U)))
5975#define _mm512_ternarylogic_epi64(A, B, C, imm) \
5976 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5977 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5978 (unsigned char)(imm), (__mmask8)-1))
5980#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5981 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5982 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5983 (unsigned char)(imm), (__mmask8)(U)))
5985#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5986 ((__m512i)__builtin_ia32_pternlogq512_maskz( \
5987 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5988 (unsigned char)(imm), (__mmask8)(U)))
5991#define _mm_cvt_roundsd_i64(A, R) \
5992 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5995#define _mm_cvt_roundsd_si32(A, R) \
5996 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5998#define _mm_cvt_roundsd_i32(A, R) \
5999 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
6001#define _mm_cvt_roundsd_u32(A, R) \
6002 ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)))
6007 return (
unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
6012#define _mm_cvt_roundsd_u64(A, R) \
6013 ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
6017_mm_cvtsd_u64 (__m128d __A)
6019 return (
unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
6025#define _mm_cvt_roundss_si32(A, R) \
6026 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
6028#define _mm_cvt_roundss_i32(A, R) \
6029 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
6032#define _mm_cvt_roundss_si64(A, R) \
6033 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
6035#define _mm_cvt_roundss_i64(A, R) \
6036 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
6039#define _mm_cvt_roundss_u32(A, R) \
6040 ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)))
6045 return (
unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6050#define _mm_cvt_roundss_u64(A, R) \
6051 ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6055_mm_cvtss_u64 (__m128 __A)
6057 return (
unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6063#define _mm_cvtt_roundsd_i32(A, R) \
6064 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
6066#define _mm_cvtt_roundsd_si32(A, R) \
6067 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
6072 return (
int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6077#define _mm_cvtt_roundsd_si64(A, R) \
6078 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
6080#define _mm_cvtt_roundsd_i64(A, R) \
6081 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
6084_mm_cvttsd_i64 (__m128d __A)
6086 return (
long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6091#define _mm_cvtt_roundsd_u32(A, R) \
6092 ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)))
6097 return (
unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6102#define _mm_cvtt_roundsd_u64(A, R) \
6103 ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6107_mm_cvttsd_u64 (__m128d __A)
6109 return (
unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6115#define _mm_cvtt_roundss_i32(A, R) \
6116 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
6118#define _mm_cvtt_roundss_si32(A, R) \
6119 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
6124 return (
int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6129#define _mm_cvtt_roundss_i64(A, R) \
6130 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
6132#define _mm_cvtt_roundss_si64(A, R) \
6133 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
6136_mm_cvttss_i64 (__m128 __A)
6138 return (
long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6143#define _mm_cvtt_roundss_u32(A, R) \
6144 ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)))
6149 return (
unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6154#define _mm_cvtt_roundss_u64(A, R) \
6155 ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6159_mm_cvttss_u64 (__m128 __A)
6161 return (
unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6167#define _mm512_permute_pd(X, C) \
6168 ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)))
6170#define _mm512_mask_permute_pd(W, U, X, C) \
6171 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6172 (__v8df)_mm512_permute_pd((X), (C)), \
6173 (__v8df)(__m512d)(W)))
6175#define _mm512_maskz_permute_pd(U, X, C) \
6176 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6177 (__v8df)_mm512_permute_pd((X), (C)), \
6178 (__v8df)_mm512_setzero_pd()))
6180#define _mm512_permute_ps(X, C) \
6181 ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)))
6183#define _mm512_mask_permute_ps(W, U, X, C) \
6184 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6185 (__v16sf)_mm512_permute_ps((X), (C)), \
6186 (__v16sf)(__m512)(W)))
6188#define _mm512_maskz_permute_ps(U, X, C) \
6189 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6190 (__v16sf)_mm512_permute_ps((X), (C)), \
6191 (__v16sf)_mm512_setzero_ps()))
6196 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6202 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
6210 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
6218 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6224 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
6232 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
6240 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
6247 return (__m512d)__builtin_ia32_selectpd_512(__U,
6256 return (__m512d)__builtin_ia32_selectpd_512(__U,
6258 (__v8df)(__m512d)__I);
6265 return (__m512d)__builtin_ia32_selectpd_512(__U,
6273 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
6280 return (__m512)__builtin_ia32_selectps_512(__U,
6288 return (__m512)__builtin_ia32_selectps_512(__U,
6290 (__v16sf)(__m512)__I);
6296 return (__m512)__builtin_ia32_selectps_512(__U,
6302#define _mm512_cvtt_roundpd_epu32(A, R) \
6303 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6304 (__v8si)_mm256_undefined_si256(), \
6305 (__mmask8)-1, (int)(R)))
6307#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
6308 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6309 (__v8si)(__m256i)(W), \
6310 (__mmask8)(U), (int)(R)))
6312#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
6313 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6314 (__v8si)_mm256_setzero_si256(), \
6315 (__mmask8)(U), (int)(R)))
6320 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6330 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6339 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6346#define _mm_roundscale_round_sd(A, B, imm, R) \
6347 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6348 (__v2df)(__m128d)(B), \
6349 (__v2df)_mm_setzero_pd(), \
6350 (__mmask8)-1, (int)(imm), \
6353#define _mm_roundscale_sd(A, B, imm) \
6354 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6355 (__v2df)(__m128d)(B), \
6356 (__v2df)_mm_setzero_pd(), \
6357 (__mmask8)-1, (int)(imm), \
6358 _MM_FROUND_CUR_DIRECTION))
6360#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6361 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6362 (__v2df)(__m128d)(B), \
6363 (__v2df)(__m128d)(W), \
6364 (__mmask8)(U), (int)(imm), \
6365 _MM_FROUND_CUR_DIRECTION))
6367#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6368 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6369 (__v2df)(__m128d)(B), \
6370 (__v2df)(__m128d)(W), \
6371 (__mmask8)(U), (int)(I), \
6374#define _mm_maskz_roundscale_sd(U, A, B, I) \
6375 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6376 (__v2df)(__m128d)(B), \
6377 (__v2df)_mm_setzero_pd(), \
6378 (__mmask8)(U), (int)(I), \
6379 _MM_FROUND_CUR_DIRECTION))
6381#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6382 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6383 (__v2df)(__m128d)(B), \
6384 (__v2df)_mm_setzero_pd(), \
6385 (__mmask8)(U), (int)(I), \
6388#define _mm_roundscale_round_ss(A, B, imm, R) \
6389 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6390 (__v4sf)(__m128)(B), \
6391 (__v4sf)_mm_setzero_ps(), \
6392 (__mmask8)-1, (int)(imm), \
6395#define _mm_roundscale_ss(A, B, imm) \
6396 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6397 (__v4sf)(__m128)(B), \
6398 (__v4sf)_mm_setzero_ps(), \
6399 (__mmask8)-1, (int)(imm), \
6400 _MM_FROUND_CUR_DIRECTION))
6402#define _mm_mask_roundscale_ss(W, U, A, B, I) \
6403 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6404 (__v4sf)(__m128)(B), \
6405 (__v4sf)(__m128)(W), \
6406 (__mmask8)(U), (int)(I), \
6407 _MM_FROUND_CUR_DIRECTION))
6409#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6410 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6411 (__v4sf)(__m128)(B), \
6412 (__v4sf)(__m128)(W), \
6413 (__mmask8)(U), (int)(I), \
6416#define _mm_maskz_roundscale_ss(U, A, B, I) \
6417 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6418 (__v4sf)(__m128)(B), \
6419 (__v4sf)_mm_setzero_ps(), \
6420 (__mmask8)(U), (int)(I), \
6421 _MM_FROUND_CUR_DIRECTION))
6423#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6424 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6425 (__v4sf)(__m128)(B), \
6426 (__v4sf)_mm_setzero_ps(), \
6427 (__mmask8)(U), (int)(I), \
6430#define _mm512_scalef_round_pd(A, B, R) \
6431 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6432 (__v8df)(__m512d)(B), \
6433 (__v8df)_mm512_undefined_pd(), \
6434 (__mmask8)-1, (int)(R)))
6436#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6437 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6438 (__v8df)(__m512d)(B), \
6439 (__v8df)(__m512d)(W), \
6440 (__mmask8)(U), (int)(R)))
6442#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6443 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6444 (__v8df)(__m512d)(B), \
6445 (__v8df)_mm512_setzero_pd(), \
6446 (__mmask8)(U), (int)(R)))
6451 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6462 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6472 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6480#define _mm512_scalef_round_ps(A, B, R) \
6481 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6482 (__v16sf)(__m512)(B), \
6483 (__v16sf)_mm512_undefined_ps(), \
6484 (__mmask16)-1, (int)(R)))
6486#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6487 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6488 (__v16sf)(__m512)(B), \
6489 (__v16sf)(__m512)(W), \
6490 (__mmask16)(U), (int)(R)))
6492#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6493 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6494 (__v16sf)(__m512)(B), \
6495 (__v16sf)_mm512_setzero_ps(), \
6496 (__mmask16)(U), (int)(R)))
6501 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6512 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6522 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6530#define _mm_scalef_round_sd(A, B, R) \
6531 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6532 (__v2df)(__m128d)(B), \
6533 (__v2df)_mm_setzero_pd(), \
6534 (__mmask8)-1, (int)(R)))
6539 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6548 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6555#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6556 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6557 (__v2df)(__m128d)(B), \
6558 (__v2df)(__m128d)(W), \
6559 (__mmask8)(U), (int)(R)))
6564 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6571#define _mm_maskz_scalef_round_sd(U, A, B, R) \
6572 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6573 (__v2df)(__m128d)(B), \
6574 (__v2df)_mm_setzero_pd(), \
6575 (__mmask8)(U), (int)(R)))
6577#define _mm_scalef_round_ss(A, B, R) \
6578 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6579 (__v4sf)(__m128)(B), \
6580 (__v4sf)_mm_setzero_ps(), \
6581 (__mmask8)-1, (int)(R)))
6586 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6595 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6602#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6603 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6604 (__v4sf)(__m128)(B), \
6605 (__v4sf)(__m128)(W), \
6606 (__mmask8)(U), (int)(R)))
6611 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6618#define _mm_maskz_scalef_round_ss(U, A, B, R) \
6619 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6620 (__v4sf)(__m128)(B), \
6621 (__v4sf)_mm_setzero_ps(), \
6628 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (
int)__B);
6635 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
6643 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
6651 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (
int)__B);
6657 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
6665 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
6670#define _mm512_shuffle_f32x4(A, B, imm) \
6671 ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6672 (__v16sf)(__m512)(B), (int)(imm)))
6674#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6675 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6676 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6677 (__v16sf)(__m512)(W)))
6679#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6680 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6681 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6682 (__v16sf)_mm512_setzero_ps()))
6684#define _mm512_shuffle_f64x2(A, B, imm) \
6685 ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6686 (__v8df)(__m512d)(B), (int)(imm)))
6688#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6689 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6690 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6691 (__v8df)(__m512d)(W)))
6693#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6694 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6695 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6696 (__v8df)_mm512_setzero_pd()))
6698#define _mm512_shuffle_i32x4(A, B, imm) \
6699 ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6700 (__v16si)(__m512i)(B), (int)(imm)))
6702#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6703 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6704 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6705 (__v16si)(__m512i)(W)))
6707#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6708 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6709 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6710 (__v16si)_mm512_setzero_si512()))
6712#define _mm512_shuffle_i64x2(A, B, imm) \
6713 ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6714 (__v8di)(__m512i)(B), (int)(imm)))
6716#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6717 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6718 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6719 (__v8di)(__m512i)(W)))
6721#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6722 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6723 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6724 (__v8di)_mm512_setzero_si512()))
6726#define _mm512_shuffle_pd(A, B, M) \
6727 ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6728 (__v8df)(__m512d)(B), (int)(M)))
6730#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6731 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6732 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6733 (__v8df)(__m512d)(W)))
6735#define _mm512_maskz_shuffle_pd(U, A, B, M) \
6736 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6737 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6738 (__v8df)_mm512_setzero_pd()))
6740#define _mm512_shuffle_ps(A, B, M) \
6741 ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6742 (__v16sf)(__m512)(B), (int)(M)))
6744#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6745 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6746 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6747 (__v16sf)(__m512)(W)))
6749#define _mm512_maskz_shuffle_ps(U, A, B, M) \
6750 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6751 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6752 (__v16sf)_mm512_setzero_ps()))
6754#define _mm_sqrt_round_sd(A, B, R) \
6755 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6756 (__v2df)(__m128d)(B), \
6757 (__v2df)_mm_setzero_pd(), \
6758 (__mmask8)-1, (int)(R)))
6763 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6770#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6771 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6772 (__v2df)(__m128d)(B), \
6773 (__v2df)(__m128d)(W), \
6774 (__mmask8)(U), (int)(R)))
6779 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6786#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6787 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6788 (__v2df)(__m128d)(B), \
6789 (__v2df)_mm_setzero_pd(), \
6790 (__mmask8)(U), (int)(R)))
6792#define _mm_sqrt_round_ss(A, B, R) \
6793 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6794 (__v4sf)(__m128)(B), \
6795 (__v4sf)_mm_setzero_ps(), \
6796 (__mmask8)-1, (int)(R)))
6801 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6808#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6809 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6810 (__v4sf)(__m128)(B), \
6811 (__v4sf)(__m128)(W), (__mmask8)(U), \
6817 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6824#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6825 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6826 (__v4sf)(__m128)(B), \
6827 (__v4sf)_mm_setzero_ps(), \
6828 (__mmask8)(U), (int)(R)))
6833 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6834 0, 1, 2, 3, 0, 1, 2, 3,
6835 0, 1, 2, 3, 0, 1, 2, 3);
6841 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__M,
6849 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__M,
6857 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6858 0, 1, 2, 3, 0, 1, 2, 3);
6864 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__M,
6872 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__M,
6880 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6881 0, 1, 2, 3, 0, 1, 2, 3,
6882 0, 1, 2, 3, 0, 1, 2, 3);
6888 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
6896 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
6904 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6905 0, 1, 2, 3, 0, 1, 2, 3);
6911 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
6919 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
6927 return (__m512d)__builtin_ia32_selectpd_512(__M,
6935 return (__m512d)__builtin_ia32_selectpd_512(__M,
6943 return (__m512)__builtin_ia32_selectps_512(__M,
6951 return (__m512)__builtin_ia32_selectps_512(__M,
6959 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6967 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6968 (__v16qi) __O, __M);
6974 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6982 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *)
__P, (__v16si) __A, __M);
6988 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6996 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6997 (__v16hi) __O, __M);
7003 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7011 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*)
__P, (__v16si) __A, __M);
7017 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7025 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7026 (__v16qi) __O, __M);
7032 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7040 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *)
__P, (__v8di) __A, __M);
7046 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7054 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7061 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7069 __builtin_ia32_pmovsqd512mem_mask ((__v8si *)
__P, (__v8di) __A, __M);
7075 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7083 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7090 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7098 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *)
__P, (__v8di) __A, __M);
7104 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7112 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7120 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7128 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *)
__P, (__v16si) __A, __M);
7134 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7142 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7150 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7158 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*)
__P, (__v16si) __A, __M);
7164 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7172 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7180 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7188 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *)
__P, (__v8di) __A, __M);
7194 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7202 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7209 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7217 __builtin_ia32_pmovusqd512mem_mask ((__v8si*)
__P, (__v8di) __A, __M);
7223 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7231 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7238 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7246 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*)
__P, (__v8di) __A, __M);
7252 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7260 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7261 (__v16qi) __O, __M);
7267 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7275 __builtin_ia32_pmovdb512mem_mask ((__v16qi *)
__P, (__v16si) __A, __M);
7281 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7289 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7290 (__v16hi) __O, __M);
7296 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7304 __builtin_ia32_pmovdw512mem_mask ((__v16hi *)
__P, (__v16si) __A, __M);
7310 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7318 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7319 (__v16qi) __O, __M);
7325 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7333 __builtin_ia32_pmovqb512mem_mask ((__v16qi *)
__P, (__v8di) __A, __M);
7339 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7347 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7354 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7362 __builtin_ia32_pmovqd512mem_mask ((__v8si *)
__P, (__v8di) __A, __M);
7368 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7376 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7383 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7391 __builtin_ia32_pmovqw512mem_mask ((__v8hi *)
__P, (__v8di) __A, __M);
7394#define _mm512_extracti32x4_epi32(A, imm) \
7395 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7396 (__v4si)_mm_undefined_si128(), \
7399#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7400 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7401 (__v4si)(__m128i)(W), \
7404#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7405 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7406 (__v4si)_mm_setzero_si128(), \
7409#define _mm512_extracti64x4_epi64(A, imm) \
7410 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7411 (__v4di)_mm256_undefined_si256(), \
7414#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7415 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7416 (__v4di)(__m256i)(W), \
7419#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7420 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7421 (__v4di)_mm256_setzero_si256(), \
7424#define _mm512_insertf64x4(A, B, imm) \
7425 ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7426 (__v4df)(__m256d)(B), (int)(imm)))
7428#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7429 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7430 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7431 (__v8df)(__m512d)(W)))
7433#define _mm512_maskz_insertf64x4(U, A, B, imm) \
7434 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7435 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7436 (__v8df)_mm512_setzero_pd()))
7438#define _mm512_inserti64x4(A, B, imm) \
7439 ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7440 (__v4di)(__m256i)(B), (int)(imm)))
7442#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7443 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7444 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7445 (__v8di)(__m512i)(W)))
7447#define _mm512_maskz_inserti64x4(U, A, B, imm) \
7448 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7449 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7450 (__v8di)_mm512_setzero_si512()))
7452#define _mm512_insertf32x4(A, B, imm) \
7453 ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7454 (__v4sf)(__m128)(B), (int)(imm)))
7456#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7457 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7458 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7459 (__v16sf)(__m512)(W)))
7461#define _mm512_maskz_insertf32x4(U, A, B, imm) \
7462 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7463 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7464 (__v16sf)_mm512_setzero_ps()))
7466#define _mm512_inserti32x4(A, B, imm) \
7467 ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7468 (__v4si)(__m128i)(B), (int)(imm)))
7470#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7471 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7472 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7473 (__v16si)(__m512i)(W)))
7475#define _mm512_maskz_inserti32x4(U, A, B, imm) \
7476 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7477 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7478 (__v16si)_mm512_setzero_si512()))
7480#define _mm512_getmant_round_pd(A, B, C, R) \
7481 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7482 (int)(((C)<<2) | (B)), \
7483 (__v8df)_mm512_undefined_pd(), \
7484 (__mmask8)-1, (int)(R)))
7486#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7487 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7488 (int)(((C)<<2) | (B)), \
7489 (__v8df)(__m512d)(W), \
7490 (__mmask8)(U), (int)(R)))
7492#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7493 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7494 (int)(((C)<<2) | (B)), \
7495 (__v8df)_mm512_setzero_pd(), \
7496 (__mmask8)(U), (int)(R)))
7498#define _mm512_getmant_pd(A, B, C) \
7499 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7500 (int)(((C)<<2) | (B)), \
7501 (__v8df)_mm512_setzero_pd(), \
7503 _MM_FROUND_CUR_DIRECTION))
7505#define _mm512_mask_getmant_pd(W, U, A, B, C) \
7506 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7507 (int)(((C)<<2) | (B)), \
7508 (__v8df)(__m512d)(W), \
7510 _MM_FROUND_CUR_DIRECTION))
7512#define _mm512_maskz_getmant_pd(U, A, B, C) \
7513 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7514 (int)(((C)<<2) | (B)), \
7515 (__v8df)_mm512_setzero_pd(), \
7517 _MM_FROUND_CUR_DIRECTION))
7519#define _mm512_getmant_round_ps(A, B, C, R) \
7520 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7521 (int)(((C)<<2) | (B)), \
7522 (__v16sf)_mm512_undefined_ps(), \
7523 (__mmask16)-1, (int)(R)))
7525#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7526 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7527 (int)(((C)<<2) | (B)), \
7528 (__v16sf)(__m512)(W), \
7529 (__mmask16)(U), (int)(R)))
7531#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7532 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7533 (int)(((C)<<2) | (B)), \
7534 (__v16sf)_mm512_setzero_ps(), \
7535 (__mmask16)(U), (int)(R)))
7537#define _mm512_getmant_ps(A, B, C) \
7538 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7539 (int)(((C)<<2)|(B)), \
7540 (__v16sf)_mm512_undefined_ps(), \
7542 _MM_FROUND_CUR_DIRECTION))
7544#define _mm512_mask_getmant_ps(W, U, A, B, C) \
7545 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7546 (int)(((C)<<2)|(B)), \
7547 (__v16sf)(__m512)(W), \
7549 _MM_FROUND_CUR_DIRECTION))
7551#define _mm512_maskz_getmant_ps(U, A, B, C) \
7552 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7553 (int)(((C)<<2)|(B)), \
7554 (__v16sf)_mm512_setzero_ps(), \
7556 _MM_FROUND_CUR_DIRECTION))
7558#define _mm512_getexp_round_pd(A, R) \
7559 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7560 (__v8df)_mm512_undefined_pd(), \
7561 (__mmask8)-1, (int)(R)))
7563#define _mm512_mask_getexp_round_pd(W, U, A, R) \
7564 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7565 (__v8df)(__m512d)(W), \
7566 (__mmask8)(U), (int)(R)))
7568#define _mm512_maskz_getexp_round_pd(U, A, R) \
7569 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7570 (__v8df)_mm512_setzero_pd(), \
7571 (__mmask8)(U), (int)(R)))
7576 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7585 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7594 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7600#define _mm512_getexp_round_ps(A, R) \
7601 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7602 (__v16sf)_mm512_undefined_ps(), \
7603 (__mmask16)-1, (int)(R)))
7605#define _mm512_mask_getexp_round_ps(W, U, A, R) \
7606 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7607 (__v16sf)(__m512)(W), \
7608 (__mmask16)(U), (int)(R)))
7610#define _mm512_maskz_getexp_round_ps(U, A, R) \
7611 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7612 (__v16sf)_mm512_setzero_ps(), \
7613 (__mmask16)(U), (int)(R)))
7618 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7627 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7636 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7642#define _mm512_i64gather_ps(index, addr, scale) \
7643 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7644 (void const *)(addr), \
7645 (__v8di)(__m512i)(index), (__mmask8)-1, \
7648#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7649 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7650 (void const *)(addr), \
7651 (__v8di)(__m512i)(index), \
7652 (__mmask8)(mask), (int)(scale)))
7654#define _mm512_i64gather_epi32(index, addr, scale) \
7655 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7656 (void const *)(addr), \
7657 (__v8di)(__m512i)(index), \
7658 (__mmask8)-1, (int)(scale)))
7660#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7661 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7662 (void const *)(addr), \
7663 (__v8di)(__m512i)(index), \
7664 (__mmask8)(mask), (int)(scale)))
7666#define _mm512_i64gather_pd(index, addr, scale) \
7667 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7668 (void const *)(addr), \
7669 (__v8di)(__m512i)(index), (__mmask8)-1, \
7672#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7673 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7674 (void const *)(addr), \
7675 (__v8di)(__m512i)(index), \
7676 (__mmask8)(mask), (int)(scale)))
7678#define _mm512_i64gather_epi64(index, addr, scale) \
7679 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7680 (void const *)(addr), \
7681 (__v8di)(__m512i)(index), (__mmask8)-1, \
7684#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7685 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7686 (void const *)(addr), \
7687 (__v8di)(__m512i)(index), \
7688 (__mmask8)(mask), (int)(scale)))
7690#define _mm512_i32gather_ps(index, addr, scale) \
7691 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7692 (void const *)(addr), \
7693 (__v16si)(__m512)(index), \
7694 (__mmask16)-1, (int)(scale)))
7696#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7697 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7698 (void const *)(addr), \
7699 (__v16si)(__m512)(index), \
7700 (__mmask16)(mask), (int)(scale)))
7702#define _mm512_i32gather_epi32(index, addr, scale) \
7703 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7704 (void const *)(addr), \
7705 (__v16si)(__m512i)(index), \
7706 (__mmask16)-1, (int)(scale)))
7708#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7709 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7710 (void const *)(addr), \
7711 (__v16si)(__m512i)(index), \
7712 (__mmask16)(mask), (int)(scale)))
7714#define _mm512_i32gather_pd(index, addr, scale) \
7715 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7716 (void const *)(addr), \
7717 (__v8si)(__m256i)(index), (__mmask8)-1, \
7720#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7721 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7722 (void const *)(addr), \
7723 (__v8si)(__m256i)(index), \
7724 (__mmask8)(mask), (int)(scale)))
7726#define _mm512_i32gather_epi64(index, addr, scale) \
7727 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7728 (void const *)(addr), \
7729 (__v8si)(__m256i)(index), (__mmask8)-1, \
7732#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7733 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7734 (void const *)(addr), \
7735 (__v8si)(__m256i)(index), \
7736 (__mmask8)(mask), (int)(scale)))
7738#define _mm512_i64scatter_ps(addr, index, v1, scale) \
7739 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7740 (__v8di)(__m512i)(index), \
7741 (__v8sf)(__m256)(v1), (int)(scale))
7743#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7744 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7745 (__v8di)(__m512i)(index), \
7746 (__v8sf)(__m256)(v1), (int)(scale))
7748#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7749 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7750 (__v8di)(__m512i)(index), \
7751 (__v8si)(__m256i)(v1), (int)(scale))
7753#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7754 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7755 (__v8di)(__m512i)(index), \
7756 (__v8si)(__m256i)(v1), (int)(scale))
7758#define _mm512_i64scatter_pd(addr, index, v1, scale) \
7759 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7760 (__v8di)(__m512i)(index), \
7761 (__v8df)(__m512d)(v1), (int)(scale))
7763#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7764 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7765 (__v8di)(__m512i)(index), \
7766 (__v8df)(__m512d)(v1), (int)(scale))
7768#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7769 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7770 (__v8di)(__m512i)(index), \
7771 (__v8di)(__m512i)(v1), (int)(scale))
7773#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7774 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7775 (__v8di)(__m512i)(index), \
7776 (__v8di)(__m512i)(v1), (int)(scale))
7778#define _mm512_i32scatter_ps(addr, index, v1, scale) \
7779 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7780 (__v16si)(__m512i)(index), \
7781 (__v16sf)(__m512)(v1), (int)(scale))
7783#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7784 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7785 (__v16si)(__m512i)(index), \
7786 (__v16sf)(__m512)(v1), (int)(scale))
7788#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7789 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7790 (__v16si)(__m512i)(index), \
7791 (__v16si)(__m512i)(v1), (int)(scale))
7793#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7794 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7795 (__v16si)(__m512i)(index), \
7796 (__v16si)(__m512i)(v1), (int)(scale))
7798#define _mm512_i32scatter_pd(addr, index, v1, scale) \
7799 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7800 (__v8si)(__m256i)(index), \
7801 (__v8df)(__m512d)(v1), (int)(scale))
7803#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7804 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7805 (__v8si)(__m256i)(index), \
7806 (__v8df)(__m512d)(v1), (int)(scale))
7808#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7809 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7810 (__v8si)(__m256i)(index), \
7811 (__v8di)(__m512i)(v1), (int)(scale))
7813#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7814 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7815 (__v8si)(__m256i)(index), \
7816 (__v8di)(__m512i)(v1), (int)(scale))
7821 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7828#define _mm_fmadd_round_ss(A, B, C, R) \
7829 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7830 (__v4sf)(__m128)(B), \
7831 (__v4sf)(__m128)(C), (__mmask8)-1, \
7834#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7835 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7836 (__v4sf)(__m128)(A), \
7837 (__v4sf)(__m128)(B), (__mmask8)(U), \
7843 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7850#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7851 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7852 (__v4sf)(__m128)(B), \
7853 (__v4sf)(__m128)(C), (__mmask8)(U), \
7859 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7866#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7867 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7868 (__v4sf)(__m128)(X), \
7869 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7875 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7882#define _mm_fmsub_round_ss(A, B, C, R) \
7883 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7884 (__v4sf)(__m128)(B), \
7885 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7888#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7889 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7890 (__v4sf)(__m128)(A), \
7891 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7897 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7904#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7905 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7906 (__v4sf)(__m128)(B), \
7907 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7913 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7920#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7921 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7922 (__v4sf)(__m128)(X), \
7923 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7929 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7936#define _mm_fnmadd_round_ss(A, B, C, R) \
7937 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7938 -(__v4sf)(__m128)(B), \
7939 (__v4sf)(__m128)(C), (__mmask8)-1, \
7942#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7943 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7944 -(__v4sf)(__m128)(A), \
7945 (__v4sf)(__m128)(B), (__mmask8)(U), \
7951 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7958#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7959 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7960 -(__v4sf)(__m128)(B), \
7961 (__v4sf)(__m128)(C), (__mmask8)(U), \
7967 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7974#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7975 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7976 -(__v4sf)(__m128)(X), \
7977 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7983 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7990#define _mm_fnmsub_round_ss(A, B, C, R) \
7991 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7992 -(__v4sf)(__m128)(B), \
7993 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7996#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7997 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7998 -(__v4sf)(__m128)(A), \
7999 -(__v4sf)(__m128)(B), (__mmask8)(U), \
8005 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
8012#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
8013 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8014 -(__v4sf)(__m128)(B), \
8015 -(__v4sf)(__m128)(C), (__mmask8)(U), \
8021 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
8028#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
8029 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
8030 -(__v4sf)(__m128)(X), \
8031 (__v4sf)(__m128)(Y), (__mmask8)(U), \
8037 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8044#define _mm_fmadd_round_sd(A, B, C, R) \
8045 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8046 (__v2df)(__m128d)(B), \
8047 (__v2df)(__m128d)(C), (__mmask8)-1, \
8050#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
8051 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8052 (__v2df)(__m128d)(A), \
8053 (__v2df)(__m128d)(B), (__mmask8)(U), \
8059 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8066#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
8067 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8068 (__v2df)(__m128d)(B), \
8069 (__v2df)(__m128d)(C), (__mmask8)(U), \
8075 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8082#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
8083 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8084 (__v2df)(__m128d)(X), \
8085 (__v2df)(__m128d)(Y), (__mmask8)(U), \
8091 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8098#define _mm_fmsub_round_sd(A, B, C, R) \
8099 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8100 (__v2df)(__m128d)(B), \
8101 -(__v2df)(__m128d)(C), (__mmask8)-1, \
8104#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
8105 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8106 (__v2df)(__m128d)(A), \
8107 -(__v2df)(__m128d)(B), (__mmask8)(U), \
8113 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8120#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
8121 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8122 (__v2df)(__m128d)(B), \
8123 -(__v2df)(__m128d)(C), \
8124 (__mmask8)(U), (int)(R)))
8129 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8136#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
8137 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8138 (__v2df)(__m128d)(X), \
8139 (__v2df)(__m128d)(Y), \
8140 (__mmask8)(U), (int)(R)))
8145 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8152#define _mm_fnmadd_round_sd(A, B, C, R) \
8153 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8154 -(__v2df)(__m128d)(B), \
8155 (__v2df)(__m128d)(C), (__mmask8)-1, \
8158#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
8159 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8160 -(__v2df)(__m128d)(A), \
8161 (__v2df)(__m128d)(B), (__mmask8)(U), \
8167 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8174#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
8175 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8176 -(__v2df)(__m128d)(B), \
8177 (__v2df)(__m128d)(C), (__mmask8)(U), \
8183 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8190#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
8191 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8192 -(__v2df)(__m128d)(X), \
8193 (__v2df)(__m128d)(Y), (__mmask8)(U), \
8199 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8206#define _mm_fnmsub_round_sd(A, B, C, R) \
8207 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8208 -(__v2df)(__m128d)(B), \
8209 -(__v2df)(__m128d)(C), (__mmask8)-1, \
8212#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
8213 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8214 -(__v2df)(__m128d)(A), \
8215 -(__v2df)(__m128d)(B), (__mmask8)(U), \
8221 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8228#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
8229 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8230 -(__v2df)(__m128d)(B), \
8231 -(__v2df)(__m128d)(C), \
8238 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8245#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
8246 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8247 -(__v2df)(__m128d)(X), \
8248 (__v2df)(__m128d)(Y), \
8249 (__mmask8)(U), (int)(R)))
8251#define _mm512_permutex_pd(X, C) \
8252 ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)))
8254#define _mm512_mask_permutex_pd(W, U, X, C) \
8255 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8256 (__v8df)_mm512_permutex_pd((X), (C)), \
8257 (__v8df)(__m512d)(W)))
8259#define _mm512_maskz_permutex_pd(U, X, C) \
8260 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8261 (__v8df)_mm512_permutex_pd((X), (C)), \
8262 (__v8df)_mm512_setzero_pd()))
8264#define _mm512_permutex_epi64(X, C) \
8265 ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)))
8267#define _mm512_mask_permutex_epi64(W, U, X, C) \
8268 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8269 (__v8di)_mm512_permutex_epi64((X), (C)), \
8270 (__v8di)(__m512i)(W)))
8272#define _mm512_maskz_permutex_epi64(U, X, C) \
8273 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8274 (__v8di)_mm512_permutex_epi64((X), (C)), \
8275 (__v8di)_mm512_setzero_si512()))
8280 return (__m512d)__builtin_ia32_permvardf512((__v8df)
__Y, (__v8di) __X);
8286 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
8294 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
8302 return (__m512i)__builtin_ia32_permvardi512((__v8di)
__Y, (__v8di)__X);
8308 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
8317 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
8325 return (__m512)__builtin_ia32_permvarsf512((__v16sf)
__Y, (__v16si)__X);
8331 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8339 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8347 return (__m512i)__builtin_ia32_permvarsi512((__v16si)
__Y, (__v16si)__X);
8350#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8355 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
8364 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
8369#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8404 return (
unsigned char)__builtin_ia32_kortestchi(__A, __B);
8410 return (
unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8415 *__C = (
unsigned char)__builtin_ia32_kortestchi(__A, __B);
8416 return (
unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8437#define _kand_mask16 _mm512_kand
8438#define _kandn_mask16 _mm512_kandn
8439#define _knot_mask16 _mm512_knot
8440#define _kor_mask16 _mm512_kor
8441#define _kxnor_mask16 _mm512_kxnor
8442#define _kxor_mask16 _mm512_kxor
8444#define _kshiftli_mask16(A, I) \
8445 ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)))
8447#define _kshiftri_mask16(A, I) \
8448 ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)))
8452 return (
unsigned int)__builtin_ia32_kmovw((
__mmask16)__A);
8474 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)
__P);
8481 return (__m512i) __builtin_nontemporal_load((
const __v8di_aligned *)
__P);
8488 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)
__P);
8494 typedef __v16sf __v16sf_aligned
__attribute__((aligned(64)));
8495 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)
__P);
8501 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8509 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8518 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8526 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8535 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8543 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8552 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8560 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8566#define _mm_cmp_round_ss_mask(X, Y, P, R) \
8567 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8568 (__v4sf)(__m128)(Y), (int)(P), \
8569 (__mmask8)-1, (int)(R)))
8571#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8572 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8573 (__v4sf)(__m128)(Y), (int)(P), \
8574 (__mmask8)(M), (int)(R)))
8576#define _mm_cmp_ss_mask(X, Y, P) \
8577 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8578 (__v4sf)(__m128)(Y), (int)(P), \
8580 _MM_FROUND_CUR_DIRECTION))
8582#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8583 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8584 (__v4sf)(__m128)(Y), (int)(P), \
8586 _MM_FROUND_CUR_DIRECTION))
8588#define _mm_cmp_round_sd_mask(X, Y, P, R) \
8589 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8590 (__v2df)(__m128d)(Y), (int)(P), \
8591 (__mmask8)-1, (int)(R)))
8593#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8594 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8595 (__v2df)(__m128d)(Y), (int)(P), \
8596 (__mmask8)(M), (int)(R)))
8598#define _mm_cmp_sd_mask(X, Y, P) \
8599 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8600 (__v2df)(__m128d)(Y), (int)(P), \
8602 _MM_FROUND_CUR_DIRECTION))
8604#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8605 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8606 (__v2df)(__m128d)(Y), (int)(P), \
8608 _MM_FROUND_CUR_DIRECTION))
8671 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8672 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8678 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8686 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8694 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8695 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8701 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8709 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8717 return __builtin_ia32_selectss_128(__U,
_mm_move_ss(__A, __B), __W);
8723 return __builtin_ia32_selectss_128(__U,
_mm_move_ss(__A, __B),
8730 return __builtin_ia32_selectsd_128(__U,
_mm_move_sd(__A, __B), __W);
8736 return __builtin_ia32_selectsd_128(__U,
_mm_move_sd(__A, __B),
8743 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8749 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8755 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8759 return (__m128) __builtin_ia32_loadss128_mask ((
const __v4sf *) __A, src, __U & 1);
8765 return (__m128)__builtin_ia32_loadss128_mask ((
const __v4sf *) __A,
8773 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8777 return (__m128d) __builtin_ia32_loadsd128_mask ((
const __v2df *) __A, src, __U & 1);
8783 return (__m128d) __builtin_ia32_loadsd128_mask ((
const __v2df *) __A,
8788#define _mm512_shuffle_epi32(A, I) \
8789 ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)))
8791#define _mm512_mask_shuffle_epi32(W, U, A, I) \
8792 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8793 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8794 (__v16si)(__m512i)(W)))
8796#define _mm512_maskz_shuffle_epi32(U, A, I) \
8797 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8798 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8799 (__v16si)_mm512_setzero_si512()))
8804 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8812 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8820 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8828 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8836 return (__m512d) __builtin_ia32_expandloaddf512_mask ((
const __v8df *)
__P,
8844 return (__m512d) __builtin_ia32_expandloaddf512_mask ((
const __v8df *)
__P,
8852 return (__m512i) __builtin_ia32_expandloaddi512_mask ((
const __v8di *)
__P,
8860 return (__m512i) __builtin_ia32_expandloaddi512_mask ((
const __v8di *)
__P,
8868 return (__m512) __builtin_ia32_expandloadsf512_mask ((
const __v16sf *)
__P,
8876 return (__m512) __builtin_ia32_expandloadsf512_mask ((
const __v16sf *)
__P,
8884 return (__m512i) __builtin_ia32_expandloadsi512_mask ((
const __v16si *)
__P,
8892 return (__m512i) __builtin_ia32_expandloadsi512_mask ((
const __v16si *)
__P,
8900 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8908 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8916 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8924 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8929#define _mm512_cvt_roundps_pd(A, R) \
8930 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8931 (__v8df)_mm512_undefined_pd(), \
8932 (__mmask8)-1, (int)(R)))
8934#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8935 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8936 (__v8df)(__m512d)(W), \
8937 (__mmask8)(U), (int)(R)))
8939#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8940 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8941 (__v8df)_mm512_setzero_pd(), \
8942 (__mmask8)(U), (int)(R)))
8947 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8953 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
8961 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
8981 return (__m512d) __builtin_ia32_selectpd_512 ((
__mmask8) __U,
8989 return (__m512d) __builtin_ia32_selectpd_512 ((
__mmask8) __U,
8997 return (__m512) __builtin_ia32_selectps_512 ((
__mmask16) __U,
9005 return (__m512) __builtin_ia32_selectps_512 ((
__mmask16) __U,
9013 __builtin_ia32_compressstoredf512_mask ((__v8df *)
__P, (__v8df) __A,
9020 __builtin_ia32_compressstoredi512_mask ((__v8di *)
__P, (__v8di) __A,
9027 __builtin_ia32_compressstoresf512_mask ((__v16sf *)
__P, (__v16sf) __A,
9034 __builtin_ia32_compressstoresi512_mask ((__v16si *)
__P, (__v16si) __A,
9038#define _mm_cvt_roundsd_ss(A, B, R) \
9039 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9040 (__v2df)(__m128d)(B), \
9041 (__v4sf)_mm_undefined_ps(), \
9042 (__mmask8)-1, (int)(R)))
9044#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
9045 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9046 (__v2df)(__m128d)(B), \
9047 (__v4sf)(__m128)(W), \
9048 (__mmask8)(U), (int)(R)))
9050#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
9051 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9052 (__v2df)(__m128d)(B), \
9053 (__v4sf)_mm_setzero_ps(), \
9054 (__mmask8)(U), (int)(R)))
9059 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9068 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9074#define _mm_cvtss_i32 _mm_cvtss_si32
9075#define _mm_cvtsd_i32 _mm_cvtsd_si32
9076#define _mm_cvti32_sd _mm_cvtsi32_sd
9077#define _mm_cvti32_ss _mm_cvtsi32_ss
9079#define _mm_cvtss_i64 _mm_cvtss_si64
9080#define _mm_cvtsd_i64 _mm_cvtsd_si64
9081#define _mm_cvti64_sd _mm_cvtsi64_sd
9082#define _mm_cvti64_ss _mm_cvtsi64_ss
9086#define _mm_cvt_roundi64_sd(A, B, R) \
9087 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9090#define _mm_cvt_roundsi64_sd(A, B, R) \
9091 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9095#define _mm_cvt_roundsi32_ss(A, B, R) \
9096 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
9098#define _mm_cvt_roundi32_ss(A, B, R) \
9099 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
9102#define _mm_cvt_roundsi64_ss(A, B, R) \
9103 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9106#define _mm_cvt_roundi64_ss(A, B, R) \
9107 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9111#define _mm_cvt_roundss_sd(A, B, R) \
9112 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9113 (__v4sf)(__m128)(B), \
9114 (__v2df)_mm_undefined_pd(), \
9115 (__mmask8)-1, (int)(R)))
9117#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
9118 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9119 (__v4sf)(__m128)(B), \
9120 (__v2df)(__m128d)(W), \
9121 (__mmask8)(U), (int)(R)))
9123#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
9124 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9125 (__v4sf)(__m128)(B), \
9126 (__v2df)_mm_setzero_pd(), \
9127 (__mmask8)(U), (int)(R)))
9132 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9141 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9155#define _mm_cvt_roundu64_sd(A, B, R) \
9156 ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9157 (unsigned long long)(B), (int)(R)))
9160_mm_cvtu64_sd (__m128d __A,
unsigned long long __B)
9167#define _mm_cvt_roundu32_ss(A, B, R) \
9168 ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9179#define _mm_cvt_roundu64_ss(A, B, R) \
9180 ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9181 (unsigned long long)(B), (int)(R)))
9184_mm_cvtu64_ss (__m128 __A,
unsigned long long __B)
9194 return (__m512i) __builtin_ia32_selectd_512(__M,
9202 return (__m512i) __builtin_ia32_selectq_512(__M,
9209 char __e58,
char __e57,
char __e56,
char __e55,
char __e54,
char __e53,
9210 char __e52,
char __e51,
char __e50,
char __e49,
char __e48,
char __e47,
9211 char __e46,
char __e45,
char __e44,
char __e43,
char __e42,
char __e41,
9212 char __e40,
char __e39,
char __e38,
char __e37,
char __e36,
char __e35,
9213 char __e34,
char __e33,
char __e32,
char __e31,
char __e30,
char __e29,
9214 char __e28,
char __e27,
char __e26,
char __e25,
char __e24,
char __e23,
9215 char __e22,
char __e21,
char __e20,
char __e19,
char __e18,
char __e17,
9216 char __e16,
char __e15,
char __e14,
char __e13,
char __e12,
char __e11,
9217 char __e10,
char __e9,
char __e8,
char __e7,
char __e6,
char __e5,
9218 char __e4,
char __e3,
char __e2,
char __e1,
char __e0) {
9220 return __extension__ (__m512i)(__v64qi)
9221 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9222 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9223 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9224 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9225 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9226 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9227 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9228 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9233 short __e27,
short __e26,
short __e25,
short __e24,
short __e23,
9234 short __e22,
short __e21,
short __e20,
short __e19,
short __e18,
9235 short __e17,
short __e16,
short __e15,
short __e14,
short __e13,
9236 short __e12,
short __e11,
short __e10,
short __e9,
short __e8,
9237 short __e7,
short __e6,
short __e5,
short __e4,
short __e3,
9238 short __e2,
short __e1,
short __e0) {
9239 return __extension__ (__m512i)(__v32hi)
9240 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9241 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9242 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9243 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9248 int __E,
int __F,
int __G,
int __H,
9249 int __I,
int __J,
int __K,
int __L,
9250 int __M,
int __N,
int __O,
int __P)
9252 return __extension__ (__m512i)(__v16si)
9253 {
__P, __O, __N, __M,
__L, __K, __J, __I,
9254 __H, __G, __F, __E,
__D, __C, __B, __A };
9257#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
9258 e8,e9,e10,e11,e12,e13,e14,e15) \
9259 _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9260 (e5),(e4),(e3),(e2),(e1),(e0))
9264 long long __D,
long long __E,
long long __F,
9265 long long __G,
long long __H)
9267 return __extension__ (__m512i) (__v8di)
9268 { __H, __G, __F, __E,
__D, __C, __B, __A };
9271#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
9272 _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9276 double __E,
double __F,
double __G,
double __H)
9278 return __extension__ (__m512d)
9279 { __H, __G, __F, __E,
__D, __C, __B, __A };
9282#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
9283 _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9287 float __E,
float __F,
float __G,
float __H,
9288 float __I,
float __J,
float __K,
float __L,
9289 float __M,
float __N,
float __O,
float __P)
9291 return __extension__ (__m512)
9292 {
__P, __O, __N, __M,
__L, __K, __J, __I,
9293 __H, __G, __F, __E,
__D, __C, __B, __A };
9296#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9297 _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9298 (e4),(e3),(e2),(e1),(e0))
9341 return __builtin_reduce_add((__v8di)__W);
9345 return __builtin_reduce_mul((__v8di)__W);
9349 return __builtin_reduce_and((__v8di)__W);
9353 return __builtin_reduce_or((__v8di)__W);
9359 return __builtin_reduce_add((__v8di)__W);
9365 return __builtin_reduce_mul((__v8di)__W);
9371 return __builtin_reduce_and((__v8di)__W);
9377 return __builtin_reduce_or((__v8di)__W);
9384 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
9388 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
9394 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
9400 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
9405 return __builtin_reduce_add((__v16si)__W);
9410 return __builtin_reduce_mul((__v16si)__W);
9415 return __builtin_reduce_and((__v16si)__W);
9420 return __builtin_reduce_or((__v16si)__W);
9426 return __builtin_reduce_add((__v16si)__W);
9432 return __builtin_reduce_mul((__v16si)__W);
9438 return __builtin_reduce_and((__v16si)__W);
9444 return __builtin_reduce_or((__v16si)__W);
9449 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9454 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9460 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9466 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9471 return __builtin_reduce_max((__v8di)__V);
9476 return __builtin_reduce_max((__v8du)__V);
9481 return __builtin_reduce_min((__v8di)__V);
9486 return __builtin_reduce_min((__v8du)__V);
9492 return __builtin_reduce_max((__v8di)__V);
9498 return __builtin_reduce_max((__v8du)__V);
9504 return __builtin_reduce_min((__v8di)__V);
9510 return __builtin_reduce_min((__v8du)__V);
9514 return __builtin_reduce_max((__v16si)__V);
9519 return __builtin_reduce_max((__v16su)__V);
9524 return __builtin_reduce_min((__v16si)__V);
9529 return __builtin_reduce_min((__v16su)__V);
9535 return __builtin_reduce_max((__v16si)__V);
9541 return __builtin_reduce_max((__v16su)__V);
9547 return __builtin_reduce_min((__v16si)__V);
9553 return __builtin_reduce_min((__v16su)__V);
9558 return __builtin_ia32_reduce_fmax_pd512(__V);
9563 return __builtin_ia32_reduce_fmin_pd512(__V);
9569 return __builtin_ia32_reduce_fmax_pd512(__V);
9575 return __builtin_ia32_reduce_fmin_pd512(__V);
9580 return __builtin_ia32_reduce_fmax_ps512(__V);
9585 return __builtin_ia32_reduce_fmin_ps512(__V);
9591 return __builtin_ia32_reduce_fmax_ps512(__V);
9597 return __builtin_ia32_reduce_fmin_ps512(__V);
9613 __v16si
__b = (__v16si)__A;
9632#define _mm512_i32logather_pd(vindex, base_addr, scale) \
9633 _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9656#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale) \
9657 _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex), \
9658 (base_addr), (scale))
9675#define _mm512_i32logather_epi64(vindex, base_addr, scale) \
9676 _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9698#define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale) \
9699 _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex), \
9700 (base_addr), (scale))
9716#define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale) \
9717 _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale))
9737#define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale) \
9738 _mm512_mask_i32scatter_pd((base_addr), (mask), \
9739 _mm512_castsi512_si256(vindex), (v1), (scale))
9755#define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale) \
9756 _mm512_i32scatter_epi64((base_addr), \
9757 _mm512_castsi512_si256(vindex), (v1), (scale))
9776#define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale) \
9777 _mm512_mask_i32scatter_epi64((base_addr), (mask), \
9778 _mm512_castsi512_si256(vindex), (v1), (scale))
9780#undef __DEFAULT_FN_ATTRS512
9781#undef __DEFAULT_FN_ATTRS128
9782#undef __DEFAULT_FN_ATTRS
9783#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
9784#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
9785#undef __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ vector float vector float __b
static __inline__ uint32_t volatile uint32_t * __p
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_si512(__m512i __a, __m512i __b)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32(__m128d __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castpd_ps(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512(void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi32(int __s)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu64(__m512i __V)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castps_si512(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastd_epi32(__m128i __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu32(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8(__m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi16(short __w)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i64x4(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_movedup_pd(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32(__m128 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov(__mmask16 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_cvtsi512_si32(__m512i __A)
Moves the least significant 32 bits of a vector of [16 x i32] to a 32-bit signed integer value.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
#define __DEFAULT_FN_ATTRS512_CONSTEXPR
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi32(__m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512(__m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps(__m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_ps(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS _store_mask16(__mmask16 *__A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_abs_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd(void *__P, __m512d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_movehdup_ps(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi8(char __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss(__mmask8 __U, const float *__A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi32(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m256d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd256(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi32(__m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64(void *__P, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
#define __DEFAULT_FN_ATTRS512
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi64(__m512i __A, unsigned int __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastq_epi64(__m128i __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask2int(__mmask16 __a)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi64(__m128i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set1_pd(double __w)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
#define __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mul_ps(__m512 __a, __m512 __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi64(__m512i __A, unsigned int __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcastss_ps(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i32x4(__m128i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi32(__m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi64(long long __d)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castps_pd(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi64(__m512i __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu64(__m512i __V)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32(__m512i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_pslo(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi32(__m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
static __inline __m128i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si128(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi64(__m128i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32lo_pd(__m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_min_ps(__m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi32(__m512i __V)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
char __v64qi __attribute__((__vector_size__(64)))
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_pd(void)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512(void *__P, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcast_f32x4(__m128 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi32(__m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu32(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_max_ps(__m512 __A, __m512 __B)
static __inline __m256 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps256(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtps_pd(__m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _load_mask16(__mmask16 *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
_MM_TERNLOG_ENUM
A helper to represent the ternary logic operations among vector A, B and C.
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps(void *__P, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_int2mask(int __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
#define __DEFAULT_FN_ATTRS128
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi32(__m512i __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castpd_si512(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi64(__m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512(void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi64(__m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestc(__mmask16 __A, __mmask16 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32lo_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512(__m128d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set1_ps(float __w)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castsi512_ps(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_add_ps(__m512 __a, __m512 __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32(void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_add_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd(__m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_abs_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64(void const *__P)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32(void *__P, __m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi32(__m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi64(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_pd(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _cvtu32_mask16(unsigned int __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ps(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcast_f64x4(__m256d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16(__m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_moveldup_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi32(__m512i __A, __m128i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_pd(__m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si256(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_si512(__m512i __a, __m512i __b)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_si512(void)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi32(__m512i __A, unsigned int __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512(__m256i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS _mm512_knot(__mmask16 __M)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mullox_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline __m128d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd128(__m512d __a)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask16_u32(__mmask16 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline __m128 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps128(__m512 __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi32(__m512i __A, unsigned int __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castsi512_pd(__m512i __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ void int __a
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
static __inline__ void short __D
struct __storeu_i16 *__P __v
static __inline__ void const void * __src
__inline unsigned int unsigned int unsigned int * __P
__inline unsigned int unsigned int __Y
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...