10#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
13#ifndef __AVX512FINTRIN_H
14#define __AVX512FINTRIN_H
24typedef unsigned char __v64qu
__attribute__((__vector_size__(64)));
25typedef unsigned short __v32hu
__attribute__((__vector_size__(64)));
26typedef unsigned long long __v8du
__attribute__((__vector_size__(64)));
27typedef unsigned int __v16su
__attribute__((__vector_size__(64)));
31typedef signed char __v64qs
__attribute__((__vector_size__(64)));
33typedef float __m512
__attribute__((__vector_size__(64), __aligned__(64)));
34typedef double __m512d
__attribute__((__vector_size__(64), __aligned__(64)));
35typedef long long __m512i
__attribute__((__vector_size__(64), __aligned__(64)));
37typedef float __m512_u
__attribute__((__vector_size__(64), __aligned__(1)));
38typedef double __m512d_u
__attribute__((__vector_size__(64), __aligned__(1)));
39typedef long long __m512i_u
__attribute__((__vector_size__(64), __aligned__(1)));
45#define _MM_FROUND_TO_NEAREST_INT 0x00
46#define _MM_FROUND_TO_NEG_INF 0x01
47#define _MM_FROUND_TO_POS_INF 0x02
48#define _MM_FROUND_TO_ZERO 0x03
49#define _MM_FROUND_CUR_DIRECTION 0x04
59#define _MM_CMPINT_GE _MM_CMPINT_NLT
61#define _MM_CMPINT_GT _MM_CMPINT_NLE
170#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,evex512"), __min_vector_width__(512)))
171#define __DEFAULT_FN_ATTRS128 \
172 __attribute__((__always_inline__, __nodebug__, \
173 __target__("avx512f,no-evex512"), __min_vector_width__(128)))
174#define __DEFAULT_FN_ATTRS \
175 __attribute__((__always_inline__, __nodebug__, \
176 __target__("avx512f,no-evex512")))
178#if defined(__cplusplus) && (__cplusplus >= 201103L)
179#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
180#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
181#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
183#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
184#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
185#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
192 return __extension__(__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0};
195#define _mm512_setzero_epi32 _mm512_setzero_si512
200 return (__m512d)__builtin_ia32_undef512();
206 return (__m512)__builtin_ia32_undef512();
212 return (__m512)__builtin_ia32_undef512();
218 return (__m512i)__builtin_ia32_undef512();
223 return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
230 return (__m512i)__builtin_ia32_selectd_512(__M,
238 return (__m512i)__builtin_ia32_selectd_512(__M,
245 return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
246 0, 0, 0, 0, 0, 0, 0, 0);
252 return (__m512i)__builtin_ia32_selectq_512(__M,
261 return (__m512i)__builtin_ia32_selectq_512(__M,
267 return __extension__(__m512){0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
268 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
271#define _mm512_setzero _mm512_setzero_ps
275 return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
281 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
282 __w, __w, __w, __w, __w, __w, __w, __w };
288 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
294 return __extension__ (__m512i)(__v64qi){
295 __w, __w, __w, __w, __w, __w, __w, __w,
296 __w, __w, __w, __w, __w, __w, __w, __w,
297 __w, __w, __w, __w, __w, __w, __w, __w,
298 __w, __w, __w, __w, __w, __w, __w, __w,
299 __w, __w, __w, __w, __w, __w, __w, __w,
300 __w, __w, __w, __w, __w, __w, __w, __w,
301 __w, __w, __w, __w, __w, __w, __w, __w,
302 __w, __w, __w, __w, __w, __w, __w, __w };
308 return __extension__ (__m512i)(__v32hi){
309 __w, __w, __w, __w, __w, __w, __w, __w,
310 __w, __w, __w, __w, __w, __w, __w, __w,
311 __w, __w, __w, __w, __w, __w, __w, __w,
312 __w, __w, __w, __w, __w, __w, __w, __w };
318 return __extension__ (__m512i)(__v16si){
319 __s, __s, __s, __s, __s, __s, __s, __s,
320 __s, __s, __s, __s, __s, __s, __s, __s };
326 return (__m512i)__builtin_ia32_selectd_512(__M,
334 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
340 return (__m512i)__builtin_ia32_selectq_512(__M,
347 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
353 return __extension__ (__m512i)(__v16si)
354 {
__D, __C, __B, __A,
__D, __C, __B, __A,
355 __D, __C, __B, __A,
__D, __C, __B, __A };
360 return __extension__ (__m512i) (__v8di)
361 {
__D, __C, __B, __A,
__D, __C, __B, __A };
366 return __extension__ (__m512d)
367 {
__D, __C, __B, __A,
__D, __C, __B, __A };
372 return __extension__ (__m512)
373 {
__D, __C, __B, __A,
__D, __C, __B, __A,
374 __D, __C, __B, __A,
__D, __C, __B, __A };
377#define _mm512_setr4_epi32(e0,e1,e2,e3) \
378 _mm512_set4_epi32((e3),(e2),(e1),(e0))
380#define _mm512_setr4_epi64(e0,e1,e2,e3) \
381 _mm512_set4_epi64((e3),(e2),(e1),(e0))
383#define _mm512_setr4_pd(e0,e1,e2,e3) \
384 _mm512_set4_pd((e3),(e2),(e1),(e0))
386#define _mm512_setr4_ps(e0,e1,e2,e3) \
387 _mm512_set4_ps((e3),(e2),(e1),(e0))
391 return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
392 0, 0, 0, 0, 0, 0, 0, 0);
400 return __builtin_shufflevector(
__a, __builtin_nondeterministic_value(
__a), 0,
401 1, 2, 3, 4, 5, 6, 7);
407 return __builtin_shufflevector(
__a, __builtin_nondeterministic_value(
__a), 0,
408 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
414 return __builtin_shufflevector(
__a,
__a, 0, 1);
420 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
426 return __builtin_shufflevector(
__a,
__a, 0, 1, 2, 3);
432 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
438 return (__m512) (__A);
444 return (__m512i) (__A);
450 __m256d __B = __builtin_nondeterministic_value(__B);
451 return __builtin_shufflevector(
452 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
453 __B, 0, 1, 2, 3, 4, 5, 6, 7);
459 return (__m512d) (__A);
465 return (__m512i) (__A);
471 __m256 __B = __builtin_nondeterministic_value(__B);
472 return __builtin_shufflevector(
473 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7),
474 __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
480 __m256i __B = __builtin_nondeterministic_value(__B);
481 return __builtin_shufflevector(
482 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
483 __B, 0, 1, 2, 3, 4, 5, 6, 7);
489 return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7);
495 return (__m512) (__A);
501 return (__m512d) (__A);
507 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
513 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
544 return __builtin_shufflevector((__v2df)
__a, (__v2df)
_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
563 return __builtin_shufflevector((__v4df)
__a, (__v4df)
_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
581 return __builtin_shufflevector((__v4sf)
__a, (__v4sf)
_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
599 return __builtin_shufflevector((__v8sf)
__a, (__v8sf)
_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
617 return __builtin_shufflevector((__v2di)
__a, (__v2di)
_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
642 return (__m512i)((__v16su)
__a & (__v16su)
__b);
647 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__k,
662 return (__m512i)((__v8du)
__a & (__v8du)
__b);
667 return (__m512i)__builtin_ia32_selectq_512(
681 return (__m512i)(~(__v8du)__A & (__v8du)__B);
687 return (__m512i)(~(__v16su)__A & (__v16su)__B);
693 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
708 return (__m512i)(~(__v8du)__A & (__v8du)__B);
714 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
729 return (__m512i)((__v16su)
__a | (__v16su)
__b);
735 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__k,
749 return (__m512i)((__v8du)
__a | (__v8du)
__b);
755 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__k,
769 return (__m512i)((__v16su)
__a ^ (__v16su)
__b);
775 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__k,
789 return (__m512i)((__v8du)
__a ^ (__v8du)
__b);
795 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__k,
809 return (__m512i)((__v8du)
__a & (__v8du)
__b);
815 return (__m512i)((__v8du)
__a | (__v8du)
__b);
821 return (__m512i)((__v8du)
__a ^ (__v8du)
__b);
828 return (__m512d)((__v8df)
__a + (__v8df)
__b);
833 return (__m512)((__v16sf)
__a + (__v16sf)
__b);
838 return (__m512d)((__v8df)
__a * (__v8df)
__b);
843 return (__m512)((__v16sf)
__a * (__v16sf)
__b);
848 return (__m512d)((__v8df)
__a - (__v8df)
__b);
853 return (__m512)((__v16sf)
__a - (__v16sf)
__b);
858 return (__m512i) ((__v8du) __A + (__v8du) __B);
864 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
872 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
880 return (__m512i) ((__v8du) __A - (__v8du) __B);
886 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
894 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
902 return (__m512i) ((__v16su) __A + (__v16su) __B);
908 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
916 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
924 return (__m512i) ((__v16su) __A - (__v16su) __B);
930 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
938 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
943#define _mm512_max_round_pd(A, B, R) \
944 ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
945 (__v8df)(__m512d)(B), (int)(R)))
947#define _mm512_mask_max_round_pd(W, U, A, B, R) \
948 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
949 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
952#define _mm512_maskz_max_round_pd(U, A, B, R) \
953 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
954 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
955 (__v8df)_mm512_setzero_pd()))
960 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
967 return (__m512d)__builtin_ia32_selectpd_512(__U,
975 return (__m512d)__builtin_ia32_selectpd_512(__U,
980#define _mm512_max_round_ps(A, B, R) \
981 ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
982 (__v16sf)(__m512)(B), (int)(R)))
984#define _mm512_mask_max_round_ps(W, U, A, B, R) \
985 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
986 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
989#define _mm512_maskz_max_round_ps(U, A, B, R) \
990 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
991 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
992 (__v16sf)_mm512_setzero_ps()))
997 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
1004 return (__m512)__builtin_ia32_selectps_512(__U,
1012 return (__m512)__builtin_ia32_selectps_512(__U,
1019 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1028 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1035#define _mm_max_round_ss(A, B, R) \
1036 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1037 (__v4sf)(__m128)(B), \
1038 (__v4sf)_mm_setzero_ps(), \
1039 (__mmask8)-1, (int)(R)))
1041#define _mm_mask_max_round_ss(W, U, A, B, R) \
1042 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1043 (__v4sf)(__m128)(B), \
1044 (__v4sf)(__m128)(W), (__mmask8)(U), \
1047#define _mm_maskz_max_round_ss(U, A, B, R) \
1048 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1049 (__v4sf)(__m128)(B), \
1050 (__v4sf)_mm_setzero_ps(), \
1051 (__mmask8)(U), (int)(R)))
1055 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1064 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1071#define _mm_max_round_sd(A, B, R) \
1072 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1073 (__v2df)(__m128d)(B), \
1074 (__v2df)_mm_setzero_pd(), \
1075 (__mmask8)-1, (int)(R)))
1077#define _mm_mask_max_round_sd(W, U, A, B, R) \
1078 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1079 (__v2df)(__m128d)(B), \
1080 (__v2df)(__m128d)(W), \
1081 (__mmask8)(U), (int)(R)))
1083#define _mm_maskz_max_round_sd(U, A, B, R) \
1084 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1085 (__v2df)(__m128d)(B), \
1086 (__v2df)_mm_setzero_pd(), \
1087 (__mmask8)(U), (int)(R)))
1089static __inline __m512i
1093 return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
1099 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1107 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1115 return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
1121 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1129 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1137 return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
1143 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1151 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1159 return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
1165 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1173 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1178#define _mm512_min_round_pd(A, B, R) \
1179 ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1180 (__v8df)(__m512d)(B), (int)(R)))
1182#define _mm512_mask_min_round_pd(W, U, A, B, R) \
1183 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1184 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1187#define _mm512_maskz_min_round_pd(U, A, B, R) \
1188 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1189 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1190 (__v8df)_mm512_setzero_pd()))
1195 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1202 return (__m512d)__builtin_ia32_selectpd_512(__U,
1210 return (__m512d)__builtin_ia32_selectpd_512(__U,
1215#define _mm512_min_round_ps(A, B, R) \
1216 ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1217 (__v16sf)(__m512)(B), (int)(R)))
1219#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1220 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1221 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1224#define _mm512_maskz_min_round_ps(U, A, B, R) \
1225 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1226 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1227 (__v16sf)_mm512_setzero_ps()))
1232 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1239 return (__m512)__builtin_ia32_selectps_512(__U,
1247 return (__m512)__builtin_ia32_selectps_512(__U,
1254 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1263 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1270#define _mm_min_round_ss(A, B, R) \
1271 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1272 (__v4sf)(__m128)(B), \
1273 (__v4sf)_mm_setzero_ps(), \
1274 (__mmask8)-1, (int)(R)))
1276#define _mm_mask_min_round_ss(W, U, A, B, R) \
1277 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1278 (__v4sf)(__m128)(B), \
1279 (__v4sf)(__m128)(W), (__mmask8)(U), \
1282#define _mm_maskz_min_round_ss(U, A, B, R) \
1283 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1284 (__v4sf)(__m128)(B), \
1285 (__v4sf)_mm_setzero_ps(), \
1286 (__mmask8)(U), (int)(R)))
1290 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1299 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1306#define _mm_min_round_sd(A, B, R) \
1307 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1308 (__v2df)(__m128d)(B), \
1309 (__v2df)_mm_setzero_pd(), \
1310 (__mmask8)-1, (int)(R)))
1312#define _mm_mask_min_round_sd(W, U, A, B, R) \
1313 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1314 (__v2df)(__m128d)(B), \
1315 (__v2df)(__m128d)(W), \
1316 (__mmask8)(U), (int)(R)))
1318#define _mm_maskz_min_round_sd(U, A, B, R) \
1319 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1320 (__v2df)(__m128d)(B), \
1321 (__v2df)_mm_setzero_pd(), \
1322 (__mmask8)(U), (int)(R)))
1324static __inline __m512i
1328 return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
1334 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1342 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1350 return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
1356 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1364 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1372 return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
1378 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1386 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1394 return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
1400 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1408 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1415 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si)
__Y);
1421 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1429 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1436 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)
__Y);
1442 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1450 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1458 return (__m512i) ((__v16su) __A * (__v16su) __B);
1464 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1472 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1479 return (__m512i) ((__v8du) __A * (__v8du) __B);
1484 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
1489#define _mm512_sqrt_round_pd(A, R) \
1490 ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)))
1492#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1493 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1494 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1495 (__v8df)(__m512d)(W)))
1497#define _mm512_maskz_sqrt_round_pd(U, A, R) \
1498 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1499 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1500 (__v8df)_mm512_setzero_pd()))
1505 return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1512 return (__m512d)__builtin_ia32_selectpd_512(__U,
1520 return (__m512d)__builtin_ia32_selectpd_512(__U,
1525#define _mm512_sqrt_round_ps(A, R) \
1526 ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)))
1528#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1529 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1530 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1531 (__v16sf)(__m512)(W)))
1533#define _mm512_maskz_sqrt_round_ps(U, A, R) \
1534 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1535 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1536 (__v16sf)_mm512_setzero_ps()))
1541 return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1548 return (__m512)__builtin_ia32_selectps_512(__U,
1556 return (__m512)__builtin_ia32_selectps_512(__U,
1564 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1572 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1580 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1589 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1598 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1606 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1615 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1625 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1634 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1643 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1653 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1662 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1671 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1680 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1688 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1697 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1706 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1714 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1723 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1733 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1742 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1751 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1761 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1770 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1779 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1781 (__v16sf) __A, (
unsigned short)-1,
1788 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1797 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1799 (__v8df) __A, (
unsigned char)-1,
1806 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1815 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1824 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1826 (__v16sf) __A, (
unsigned short)-1,
1833 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1835 (__v8df) __A, (
unsigned char)-1,
1842 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1850 return (__m512i)__builtin_elementwise_abs((__v8di)__A);
1855 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
1862 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
1869 return (__m512i)__builtin_elementwise_abs((__v16si) __A);
1874 return (__m512i)__builtin_ia32_selectd_512(__U,
1881 return (__m512i)__builtin_ia32_selectd_512(__U,
1889 return __builtin_ia32_selectss_128(__U, __A, __W);
1898#define _mm_add_round_ss(A, B, R) \
1899 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1900 (__v4sf)(__m128)(B), \
1901 (__v4sf)_mm_setzero_ps(), \
1902 (__mmask8)-1, (int)(R)))
1904#define _mm_mask_add_round_ss(W, U, A, B, R) \
1905 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1906 (__v4sf)(__m128)(B), \
1907 (__v4sf)(__m128)(W), (__mmask8)(U), \
1910#define _mm_maskz_add_round_ss(U, A, B, R) \
1911 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1912 (__v4sf)(__m128)(B), \
1913 (__v4sf)_mm_setzero_ps(), \
1914 (__mmask8)(U), (int)(R)))
1919 return __builtin_ia32_selectsd_128(__U, __A, __W);
1927#define _mm_add_round_sd(A, B, R) \
1928 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1929 (__v2df)(__m128d)(B), \
1930 (__v2df)_mm_setzero_pd(), \
1931 (__mmask8)-1, (int)(R)))
1933#define _mm_mask_add_round_sd(W, U, A, B, R) \
1934 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1935 (__v2df)(__m128d)(B), \
1936 (__v2df)(__m128d)(W), \
1937 (__mmask8)(U), (int)(R)))
1939#define _mm_maskz_add_round_sd(U, A, B, R) \
1940 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1941 (__v2df)(__m128d)(B), \
1942 (__v2df)_mm_setzero_pd(), \
1943 (__mmask8)(U), (int)(R)))
1947 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
1954 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
1961 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
1968 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
1973#define _mm512_add_round_pd(A, B, R) \
1974 ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1975 (__v8df)(__m512d)(B), (int)(R)))
1977#define _mm512_mask_add_round_pd(W, U, A, B, R) \
1978 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1979 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1980 (__v8df)(__m512d)(W)))
1982#define _mm512_maskz_add_round_pd(U, A, B, R) \
1983 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1984 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1985 (__v8df)_mm512_setzero_pd()))
1987#define _mm512_add_round_ps(A, B, R) \
1988 ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
1989 (__v16sf)(__m512)(B), (int)(R)))
1991#define _mm512_mask_add_round_ps(W, U, A, B, R) \
1992 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1993 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1994 (__v16sf)(__m512)(W)))
1996#define _mm512_maskz_add_round_ps(U, A, B, R) \
1997 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1998 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1999 (__v16sf)_mm512_setzero_ps()))
2004 return __builtin_ia32_selectss_128(__U, __A, __W);
2012#define _mm_sub_round_ss(A, B, R) \
2013 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2014 (__v4sf)(__m128)(B), \
2015 (__v4sf)_mm_setzero_ps(), \
2016 (__mmask8)-1, (int)(R)))
2018#define _mm_mask_sub_round_ss(W, U, A, B, R) \
2019 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2020 (__v4sf)(__m128)(B), \
2021 (__v4sf)(__m128)(W), (__mmask8)(U), \
2024#define _mm_maskz_sub_round_ss(U, A, B, R) \
2025 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2026 (__v4sf)(__m128)(B), \
2027 (__v4sf)_mm_setzero_ps(), \
2028 (__mmask8)(U), (int)(R)))
2033 return __builtin_ia32_selectsd_128(__U, __A, __W);
2042#define _mm_sub_round_sd(A, B, R) \
2043 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2044 (__v2df)(__m128d)(B), \
2045 (__v2df)_mm_setzero_pd(), \
2046 (__mmask8)-1, (int)(R)))
2048#define _mm_mask_sub_round_sd(W, U, A, B, R) \
2049 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2050 (__v2df)(__m128d)(B), \
2051 (__v2df)(__m128d)(W), \
2052 (__mmask8)(U), (int)(R)))
2054#define _mm_maskz_sub_round_sd(U, A, B, R) \
2055 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2056 (__v2df)(__m128d)(B), \
2057 (__v2df)_mm_setzero_pd(), \
2058 (__mmask8)(U), (int)(R)))
2062 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2069 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2076 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2083 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2088#define _mm512_sub_round_pd(A, B, R) \
2089 ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2090 (__v8df)(__m512d)(B), (int)(R)))
2092#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2093 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2094 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2095 (__v8df)(__m512d)(W)))
2097#define _mm512_maskz_sub_round_pd(U, A, B, R) \
2098 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2099 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2100 (__v8df)_mm512_setzero_pd()))
2102#define _mm512_sub_round_ps(A, B, R) \
2103 ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2104 (__v16sf)(__m512)(B), (int)(R)))
2106#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2107 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2108 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2109 (__v16sf)(__m512)(W)))
2111#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2112 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2113 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2114 (__v16sf)_mm512_setzero_ps()))
2119 return __builtin_ia32_selectss_128(__U, __A, __W);
2127#define _mm_mul_round_ss(A, B, R) \
2128 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2129 (__v4sf)(__m128)(B), \
2130 (__v4sf)_mm_setzero_ps(), \
2131 (__mmask8)-1, (int)(R)))
2133#define _mm_mask_mul_round_ss(W, U, A, B, R) \
2134 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2135 (__v4sf)(__m128)(B), \
2136 (__v4sf)(__m128)(W), (__mmask8)(U), \
2139#define _mm_maskz_mul_round_ss(U, A, B, R) \
2140 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2141 (__v4sf)(__m128)(B), \
2142 (__v4sf)_mm_setzero_ps(), \
2143 (__mmask8)(U), (int)(R)))
2148 return __builtin_ia32_selectsd_128(__U, __A, __W);
2157#define _mm_mul_round_sd(A, B, R) \
2158 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2159 (__v2df)(__m128d)(B), \
2160 (__v2df)_mm_setzero_pd(), \
2161 (__mmask8)-1, (int)(R)))
2163#define _mm_mask_mul_round_sd(W, U, A, B, R) \
2164 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2165 (__v2df)(__m128d)(B), \
2166 (__v2df)(__m128d)(W), \
2167 (__mmask8)(U), (int)(R)))
2169#define _mm_maskz_mul_round_sd(U, A, B, R) \
2170 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2171 (__v2df)(__m128d)(B), \
2172 (__v2df)_mm_setzero_pd(), \
2173 (__mmask8)(U), (int)(R)))
2177 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2184 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2191 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2198 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2203#define _mm512_mul_round_pd(A, B, R) \
2204 ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2205 (__v8df)(__m512d)(B), (int)(R)))
2207#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2208 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2209 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2210 (__v8df)(__m512d)(W)))
2212#define _mm512_maskz_mul_round_pd(U, A, B, R) \
2213 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2214 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2215 (__v8df)_mm512_setzero_pd()))
2217#define _mm512_mul_round_ps(A, B, R) \
2218 ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2219 (__v16sf)(__m512)(B), (int)(R)))
2221#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2222 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2223 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2224 (__v16sf)(__m512)(W)))
2226#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2227 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2228 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2229 (__v16sf)_mm512_setzero_ps()))
2234 return __builtin_ia32_selectss_128(__U, __A, __W);
2243#define _mm_div_round_ss(A, B, R) \
2244 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2245 (__v4sf)(__m128)(B), \
2246 (__v4sf)_mm_setzero_ps(), \
2247 (__mmask8)-1, (int)(R)))
2249#define _mm_mask_div_round_ss(W, U, A, B, R) \
2250 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2251 (__v4sf)(__m128)(B), \
2252 (__v4sf)(__m128)(W), (__mmask8)(U), \
2255#define _mm_maskz_div_round_ss(U, A, B, R) \
2256 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2257 (__v4sf)(__m128)(B), \
2258 (__v4sf)_mm_setzero_ps(), \
2259 (__mmask8)(U), (int)(R)))
2264 return __builtin_ia32_selectsd_128(__U, __A, __W);
2273#define _mm_div_round_sd(A, B, R) \
2274 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2275 (__v2df)(__m128d)(B), \
2276 (__v2df)_mm_setzero_pd(), \
2277 (__mmask8)-1, (int)(R)))
2279#define _mm_mask_div_round_sd(W, U, A, B, R) \
2280 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2281 (__v2df)(__m128d)(B), \
2282 (__v2df)(__m128d)(W), \
2283 (__mmask8)(U), (int)(R)))
2285#define _mm_maskz_div_round_sd(U, A, B, R) \
2286 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2287 (__v2df)(__m128d)(B), \
2288 (__v2df)_mm_setzero_pd(), \
2289 (__mmask8)(U), (int)(R)))
2291static __inline __m512d
2293 return (__m512d)((__v8df)
__a/(__v8df)
__b);
2298 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2305 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2312 return (__m512)((__v16sf)
__a/(__v16sf)
__b);
2317 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2324 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2329#define _mm512_div_round_pd(A, B, R) \
2330 ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2331 (__v8df)(__m512d)(B), (int)(R)))
2333#define _mm512_mask_div_round_pd(W, U, A, B, R) \
2334 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2335 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2336 (__v8df)(__m512d)(W)))
2338#define _mm512_maskz_div_round_pd(U, A, B, R) \
2339 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2340 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2341 (__v8df)_mm512_setzero_pd()))
2343#define _mm512_div_round_ps(A, B, R) \
2344 ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2345 (__v16sf)(__m512)(B), (int)(R)))
2347#define _mm512_mask_div_round_ps(W, U, A, B, R) \
2348 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2349 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2350 (__v16sf)(__m512)(W)))
2352#define _mm512_maskz_div_round_ps(U, A, B, R) \
2353 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2354 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2355 (__v16sf)_mm512_setzero_ps()))
2357#define _mm512_roundscale_ps(A, B) \
2358 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2359 (__v16sf)_mm512_undefined_ps(), \
2361 _MM_FROUND_CUR_DIRECTION))
2363#define _mm512_mask_roundscale_ps(A, B, C, imm) \
2364 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2365 (__v16sf)(__m512)(A), (__mmask16)(B), \
2366 _MM_FROUND_CUR_DIRECTION))
2368#define _mm512_maskz_roundscale_ps(A, B, imm) \
2369 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2370 (__v16sf)_mm512_setzero_ps(), \
2372 _MM_FROUND_CUR_DIRECTION))
2374#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2375 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2376 (__v16sf)(__m512)(A), (__mmask16)(B), \
2379#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2380 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2381 (__v16sf)_mm512_setzero_ps(), \
2382 (__mmask16)(A), (int)(R)))
2384#define _mm512_roundscale_round_ps(A, imm, R) \
2385 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2386 (__v16sf)_mm512_undefined_ps(), \
2387 (__mmask16)-1, (int)(R)))
2389#define _mm512_roundscale_pd(A, B) \
2390 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2391 (__v8df)_mm512_undefined_pd(), \
2393 _MM_FROUND_CUR_DIRECTION))
2395#define _mm512_mask_roundscale_pd(A, B, C, imm) \
2396 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2397 (__v8df)(__m512d)(A), (__mmask8)(B), \
2398 _MM_FROUND_CUR_DIRECTION))
2400#define _mm512_maskz_roundscale_pd(A, B, imm) \
2401 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2402 (__v8df)_mm512_setzero_pd(), \
2404 _MM_FROUND_CUR_DIRECTION))
2406#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2407 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2408 (__v8df)(__m512d)(A), (__mmask8)(B), \
2411#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2412 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2413 (__v8df)_mm512_setzero_pd(), \
2414 (__mmask8)(A), (int)(R)))
2416#define _mm512_roundscale_round_pd(A, imm, R) \
2417 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2418 (__v8df)_mm512_undefined_pd(), \
2419 (__mmask8)-1, (int)(R)))
2421#define _mm512_fmadd_round_pd(A, B, C, R) \
2422 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2423 (__v8df)(__m512d)(B), \
2424 (__v8df)(__m512d)(C), \
2425 (__mmask8)-1, (int)(R)))
2428#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2429 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2430 (__v8df)(__m512d)(B), \
2431 (__v8df)(__m512d)(C), \
2432 (__mmask8)(U), (int)(R)))
2435#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2436 ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2437 (__v8df)(__m512d)(B), \
2438 (__v8df)(__m512d)(C), \
2439 (__mmask8)(U), (int)(R)))
2442#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2443 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2444 (__v8df)(__m512d)(B), \
2445 (__v8df)(__m512d)(C), \
2446 (__mmask8)(U), (int)(R)))
2449#define _mm512_fmsub_round_pd(A, B, C, R) \
2450 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2451 (__v8df)(__m512d)(B), \
2452 -(__v8df)(__m512d)(C), \
2453 (__mmask8)-1, (int)(R)))
2456#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2457 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2458 (__v8df)(__m512d)(B), \
2459 -(__v8df)(__m512d)(C), \
2460 (__mmask8)(U), (int)(R)))
2463#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2464 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2465 (__v8df)(__m512d)(B), \
2466 -(__v8df)(__m512d)(C), \
2467 (__mmask8)(U), (int)(R)))
2470#define _mm512_fnmadd_round_pd(A, B, C, R) \
2471 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2472 (__v8df)(__m512d)(B), \
2473 (__v8df)(__m512d)(C), \
2474 (__mmask8)-1, (int)(R)))
2477#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2478 ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2479 (__v8df)(__m512d)(B), \
2480 (__v8df)(__m512d)(C), \
2481 (__mmask8)(U), (int)(R)))
2484#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2485 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2486 (__v8df)(__m512d)(B), \
2487 (__v8df)(__m512d)(C), \
2488 (__mmask8)(U), (int)(R)))
2491#define _mm512_fnmsub_round_pd(A, B, C, R) \
2492 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2493 (__v8df)(__m512d)(B), \
2494 -(__v8df)(__m512d)(C), \
2495 (__mmask8)-1, (int)(R)))
2498#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2499 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2500 (__v8df)(__m512d)(B), \
2501 -(__v8df)(__m512d)(C), \
2502 (__mmask8)(U), (int)(R)))
2508 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2518 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2528 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2538 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2548 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2558 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2568 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2578 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2588 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2598 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2608 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2618 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2625#define _mm512_fmadd_round_ps(A, B, C, R) \
2626 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2627 (__v16sf)(__m512)(B), \
2628 (__v16sf)(__m512)(C), \
2629 (__mmask16)-1, (int)(R)))
2632#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2633 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2634 (__v16sf)(__m512)(B), \
2635 (__v16sf)(__m512)(C), \
2636 (__mmask16)(U), (int)(R)))
2639#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2640 ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2641 (__v16sf)(__m512)(B), \
2642 (__v16sf)(__m512)(C), \
2643 (__mmask16)(U), (int)(R)))
2646#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2647 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2648 (__v16sf)(__m512)(B), \
2649 (__v16sf)(__m512)(C), \
2650 (__mmask16)(U), (int)(R)))
2653#define _mm512_fmsub_round_ps(A, B, C, R) \
2654 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2655 (__v16sf)(__m512)(B), \
2656 -(__v16sf)(__m512)(C), \
2657 (__mmask16)-1, (int)(R)))
2660#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2661 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2662 (__v16sf)(__m512)(B), \
2663 -(__v16sf)(__m512)(C), \
2664 (__mmask16)(U), (int)(R)))
2667#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2668 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2669 (__v16sf)(__m512)(B), \
2670 -(__v16sf)(__m512)(C), \
2671 (__mmask16)(U), (int)(R)))
2674#define _mm512_fnmadd_round_ps(A, B, C, R) \
2675 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2676 -(__v16sf)(__m512)(B), \
2677 (__v16sf)(__m512)(C), \
2678 (__mmask16)-1, (int)(R)))
2681#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2682 ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2683 (__v16sf)(__m512)(B), \
2684 (__v16sf)(__m512)(C), \
2685 (__mmask16)(U), (int)(R)))
2688#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2689 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2690 (__v16sf)(__m512)(B), \
2691 (__v16sf)(__m512)(C), \
2692 (__mmask16)(U), (int)(R)))
2695#define _mm512_fnmsub_round_ps(A, B, C, R) \
2696 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2697 -(__v16sf)(__m512)(B), \
2698 -(__v16sf)(__m512)(C), \
2699 (__mmask16)-1, (int)(R)))
2702#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2703 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2704 (__v16sf)(__m512)(B), \
2705 -(__v16sf)(__m512)(C), \
2706 (__mmask16)(U), (int)(R)))
2712 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2722 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2732 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2742 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2752 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2762 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2772 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2782 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2792 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2802 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2812 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2822 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2829#define _mm512_fmaddsub_round_pd(A, B, C, R) \
2830 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2831 (__v8df)(__m512d)(B), \
2832 (__v8df)(__m512d)(C), \
2833 (__mmask8)-1, (int)(R)))
2836#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2837 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2838 (__v8df)(__m512d)(B), \
2839 (__v8df)(__m512d)(C), \
2840 (__mmask8)(U), (int)(R)))
2843#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2844 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2845 (__v8df)(__m512d)(B), \
2846 (__v8df)(__m512d)(C), \
2847 (__mmask8)(U), (int)(R)))
2850#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2851 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2852 (__v8df)(__m512d)(B), \
2853 (__v8df)(__m512d)(C), \
2854 (__mmask8)(U), (int)(R)))
2857#define _mm512_fmsubadd_round_pd(A, B, C, R) \
2858 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2859 (__v8df)(__m512d)(B), \
2860 -(__v8df)(__m512d)(C), \
2861 (__mmask8)-1, (int)(R)))
2864#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2865 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2866 (__v8df)(__m512d)(B), \
2867 -(__v8df)(__m512d)(C), \
2868 (__mmask8)(U), (int)(R)))
2871#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2872 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2873 (__v8df)(__m512d)(B), \
2874 -(__v8df)(__m512d)(C), \
2875 (__mmask8)(U), (int)(R)))
2881 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2891 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2901 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2911 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2921 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2931 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2941 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2948#define _mm512_fmaddsub_round_ps(A, B, C, R) \
2949 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2950 (__v16sf)(__m512)(B), \
2951 (__v16sf)(__m512)(C), \
2952 (__mmask16)-1, (int)(R)))
2955#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2956 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2957 (__v16sf)(__m512)(B), \
2958 (__v16sf)(__m512)(C), \
2959 (__mmask16)(U), (int)(R)))
2962#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2963 ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2964 (__v16sf)(__m512)(B), \
2965 (__v16sf)(__m512)(C), \
2966 (__mmask16)(U), (int)(R)))
2969#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2970 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2971 (__v16sf)(__m512)(B), \
2972 (__v16sf)(__m512)(C), \
2973 (__mmask16)(U), (int)(R)))
2976#define _mm512_fmsubadd_round_ps(A, B, C, R) \
2977 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2978 (__v16sf)(__m512)(B), \
2979 -(__v16sf)(__m512)(C), \
2980 (__mmask16)-1, (int)(R)))
2983#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
2984 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2985 (__v16sf)(__m512)(B), \
2986 -(__v16sf)(__m512)(C), \
2987 (__mmask16)(U), (int)(R)))
2990#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
2991 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2992 (__v16sf)(__m512)(B), \
2993 -(__v16sf)(__m512)(C), \
2994 (__mmask16)(U), (int)(R)))
3000 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3010 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3020 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3030 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3040 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3050 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3060 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3067#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3068 ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3069 (__v8df)(__m512d)(B), \
3070 (__v8df)(__m512d)(C), \
3071 (__mmask8)(U), (int)(R)))
3077 return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3084#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3085 ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3086 (__v16sf)(__m512)(B), \
3087 (__v16sf)(__m512)(C), \
3088 (__mmask16)(U), (int)(R)))
3093 return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3100#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3101 ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3102 (__v8df)(__m512d)(B), \
3103 (__v8df)(__m512d)(C), \
3104 (__mmask8)(U), (int)(R)))
3110 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3117#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3118 ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3119 (__v16sf)(__m512)(B), \
3120 (__v16sf)(__m512)(C), \
3121 (__mmask16)(U), (int)(R)))
3127 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3134#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3135 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3136 -(__v8df)(__m512d)(B), \
3137 (__v8df)(__m512d)(C), \
3138 (__mmask8)(U), (int)(R)))
3144 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3151#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3152 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3153 -(__v16sf)(__m512)(B), \
3154 (__v16sf)(__m512)(C), \
3155 (__mmask16)(U), (int)(R)))
3161 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3168#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3169 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3170 -(__v8df)(__m512d)(B), \
3171 -(__v8df)(__m512d)(C), \
3172 (__mmask8)(U), (int)(R)))
3175#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3176 ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3177 (__v8df)(__m512d)(B), \
3178 (__v8df)(__m512d)(C), \
3179 (__mmask8)(U), (int)(R)))
3185 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3195 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
3202#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3203 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3204 -(__v16sf)(__m512)(B), \
3205 -(__v16sf)(__m512)(C), \
3206 (__mmask16)(U), (int)(R)))
3209#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3210 ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3211 (__v16sf)(__m512)(B), \
3212 (__v16sf)(__m512)(C), \
3213 (__mmask16)(U), (int)(R)))
3219 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3229 return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
3243 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3251 return (__m512i)__builtin_ia32_selectd_512(__U,
3260 return (__m512i)__builtin_ia32_selectd_512(__U,
3269 return (__m512i)__builtin_ia32_selectd_512(__U,
3277 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3285 return (__m512i)__builtin_ia32_selectq_512(__U,
3294 return (__m512i)__builtin_ia32_selectq_512(__U,
3303 return (__m512i)__builtin_ia32_selectq_512(__U,
3308#define _mm512_alignr_epi64(A, B, I) \
3309 ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3310 (__v8di)(__m512i)(B), (int)(I)))
3312#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3313 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3314 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3315 (__v8di)(__m512i)(W)))
3317#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3318 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3319 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3320 (__v8di)_mm512_setzero_si512()))
3322#define _mm512_alignr_epi32(A, B, I) \
3323 ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3324 (__v16si)(__m512i)(B), (int)(I)))
3326#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3327 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3328 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3329 (__v16si)(__m512i)(W)))
3331#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3332 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3333 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3334 (__v16si)_mm512_setzero_si512()))
3337#define _mm512_extractf64x4_pd(A, I) \
3338 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3339 (__v4df)_mm256_undefined_pd(), \
3342#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3343 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3344 (__v4df)(__m256d)(W), \
3347#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3348 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3349 (__v4df)_mm256_setzero_pd(), \
3352#define _mm512_extractf32x4_ps(A, I) \
3353 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3354 (__v4sf)_mm_undefined_ps(), \
3357#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3358 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3359 (__v4sf)(__m128)(W), \
3362#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3363 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3364 (__v4sf)_mm_setzero_ps(), \
3372 return (__m512d) __builtin_ia32_selectpd_512 ((
__mmask8) __U,
3380 return (__m512) __builtin_ia32_selectps_512 ((
__mmask16) __U,
3388 return (__m512i) __builtin_ia32_selectq_512 ((
__mmask8) __U,
3396 return (__m512i) __builtin_ia32_selectd_512 ((
__mmask16) __U,
3403#define _mm512_cmp_round_ps_mask(A, B, P, R) \
3404 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3405 (__v16sf)(__m512)(B), (int)(P), \
3406 (__mmask16)-1, (int)(R)))
3408#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3409 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3410 (__v16sf)(__m512)(B), (int)(P), \
3411 (__mmask16)(U), (int)(R)))
3413#define _mm512_cmp_ps_mask(A, B, P) \
3414 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3415#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3416 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3418#define _mm512_cmpeq_ps_mask(A, B) \
3419 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3420#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3421 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3423#define _mm512_cmplt_ps_mask(A, B) \
3424 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3425#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3426 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3428#define _mm512_cmple_ps_mask(A, B) \
3429 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3430#define _mm512_mask_cmple_ps_mask(k, A, B) \
3431 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3433#define _mm512_cmpunord_ps_mask(A, B) \
3434 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3435#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3436 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3438#define _mm512_cmpneq_ps_mask(A, B) \
3439 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3440#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3441 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3443#define _mm512_cmpnlt_ps_mask(A, B) \
3444 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3445#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3446 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3448#define _mm512_cmpnle_ps_mask(A, B) \
3449 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3450#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3451 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3453#define _mm512_cmpord_ps_mask(A, B) \
3454 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3455#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3456 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3458#define _mm512_cmp_round_pd_mask(A, B, P, R) \
3459 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3460 (__v8df)(__m512d)(B), (int)(P), \
3461 (__mmask8)-1, (int)(R)))
3463#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3464 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3465 (__v8df)(__m512d)(B), (int)(P), \
3466 (__mmask8)(U), (int)(R)))
3468#define _mm512_cmp_pd_mask(A, B, P) \
3469 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3470#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3471 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3473#define _mm512_cmpeq_pd_mask(A, B) \
3474 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3475#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3476 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3478#define _mm512_cmplt_pd_mask(A, B) \
3479 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3480#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3481 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3483#define _mm512_cmple_pd_mask(A, B) \
3484 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3485#define _mm512_mask_cmple_pd_mask(k, A, B) \
3486 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3488#define _mm512_cmpunord_pd_mask(A, B) \
3489 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3490#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3491 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3493#define _mm512_cmpneq_pd_mask(A, B) \
3494 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3495#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3496 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3498#define _mm512_cmpnlt_pd_mask(A, B) \
3499 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3500#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3501 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3503#define _mm512_cmpnle_pd_mask(A, B) \
3504 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3505#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3506 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3508#define _mm512_cmpord_pd_mask(A, B) \
3509 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3510#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3511 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3515#define _mm512_cvtt_roundps_epu32(A, R) \
3516 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3517 (__v16si)_mm512_undefined_epi32(), \
3518 (__mmask16)-1, (int)(R)))
3520#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3521 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3522 (__v16si)(__m512i)(W), \
3523 (__mmask16)(U), (int)(R)))
3525#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3526 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3527 (__v16si)_mm512_setzero_si512(), \
3528 (__mmask16)(U), (int)(R)))
3534 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3544 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3553 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3559#define _mm512_cvt_roundepi32_ps(A, R) \
3560 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3561 (__v16sf)_mm512_setzero_ps(), \
3562 (__mmask16)-1, (int)(R)))
3564#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3565 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3566 (__v16sf)(__m512)(W), \
3567 (__mmask16)(U), (int)(R)))
3569#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3570 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3571 (__v16sf)_mm512_setzero_ps(), \
3572 (__mmask16)(U), (int)(R)))
3574#define _mm512_cvt_roundepu32_ps(A, R) \
3575 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3576 (__v16sf)_mm512_setzero_ps(), \
3577 (__mmask16)-1, (int)(R)))
3579#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3580 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3581 (__v16sf)(__m512)(W), \
3582 (__mmask16)(U), (int)(R)))
3584#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3585 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3586 (__v16sf)_mm512_setzero_ps(), \
3587 (__mmask16)(U), (int)(R)))
3592 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3598 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3606 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3614 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3620 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3628 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3648 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3654 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3662 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3670 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3676 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3684 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3701#define _mm512_cvt_roundpd_ps(A, R) \
3702 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3703 (__v8sf)_mm256_setzero_ps(), \
3704 (__mmask8)-1, (int)(R)))
3706#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3707 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3708 (__v8sf)(__m256)(W), (__mmask8)(U), \
3711#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3712 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3713 (__v8sf)_mm256_setzero_ps(), \
3714 (__mmask8)(U), (int)(R)))
3719 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3728 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3737 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3746 return (__m512) __builtin_shufflevector((__v8sf)
_mm512_cvtpd_ps(__A),
3748 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3754 return (__m512) __builtin_shufflevector (
3758 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3761#define _mm512_cvt_roundps_ph(A, I) \
3762 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3763 (__v16hi)_mm256_undefined_si256(), \
3766#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3767 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3768 (__v16hi)(__m256i)(U), \
3771#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3772 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3773 (__v16hi)_mm256_setzero_si256(), \
3776#define _mm512_cvtps_ph _mm512_cvt_roundps_ph
3777#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
3778#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3780#define _mm512_cvt_roundph_ps(A, R) \
3781 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3782 (__v16sf)_mm512_undefined_ps(), \
3783 (__mmask16)-1, (int)(R)))
3785#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3786 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3787 (__v16sf)(__m512)(W), \
3788 (__mmask16)(U), (int)(R)))
3790#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3791 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3792 (__v16sf)_mm512_setzero_ps(), \
3793 (__mmask16)(U), (int)(R)))
3799 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3809 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3818 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3824#define _mm512_cvtt_roundpd_epi32(A, R) \
3825 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3826 (__v8si)_mm256_setzero_si256(), \
3827 (__mmask8)-1, (int)(R)))
3829#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3830 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3831 (__v8si)(__m256i)(W), \
3832 (__mmask8)(U), (int)(R)))
3834#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3835 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3836 (__v8si)_mm256_setzero_si256(), \
3837 (__mmask8)(U), (int)(R)))
3842 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)
__a,
3851 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3860 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3866#define _mm512_cvtt_roundps_epi32(A, R) \
3867 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3868 (__v16si)_mm512_setzero_si512(), \
3869 (__mmask16)-1, (int)(R)))
3871#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3872 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3873 (__v16si)(__m512i)(W), \
3874 (__mmask16)(U), (int)(R)))
3876#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3877 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3878 (__v16si)_mm512_setzero_si512(), \
3879 (__mmask16)(U), (int)(R)))
3885 __builtin_ia32_cvttps2dq512_mask((__v16sf)
__a,
3893 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3902 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3908#define _mm512_cvt_roundps_epi32(A, R) \
3909 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3910 (__v16si)_mm512_setzero_si512(), \
3911 (__mmask16)-1, (int)(R)))
3913#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3914 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3915 (__v16si)(__m512i)(W), \
3916 (__mmask16)(U), (int)(R)))
3918#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3919 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3920 (__v16si)_mm512_setzero_si512(), \
3921 (__mmask16)(U), (int)(R)))
3926 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3935 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3944 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3951#define _mm512_cvt_roundpd_epi32(A, R) \
3952 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3953 (__v8si)_mm256_setzero_si256(), \
3954 (__mmask8)-1, (int)(R)))
3956#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3957 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3958 (__v8si)(__m256i)(W), \
3959 (__mmask8)(U), (int)(R)))
3961#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3962 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3963 (__v8si)_mm256_setzero_si256(), \
3964 (__mmask8)(U), (int)(R)))
3969 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3979 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3988 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3995#define _mm512_cvt_roundps_epu32(A, R) \
3996 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3997 (__v16si)_mm512_setzero_si512(), \
3998 (__mmask16)-1, (int)(R)))
4000#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
4001 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4002 (__v16si)(__m512i)(W), \
4003 (__mmask16)(U), (int)(R)))
4005#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
4006 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4007 (__v16si)_mm512_setzero_si512(), \
4008 (__mmask16)(U), (int)(R)))
4013 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4023 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4032 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4039#define _mm512_cvt_roundpd_epu32(A, R) \
4040 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4041 (__v8si)_mm256_setzero_si256(), \
4042 (__mmask8)-1, (int)(R)))
4044#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
4045 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4046 (__v8si)(__m256i)(W), \
4047 (__mmask8)(U), (int)(R)))
4049#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
4050 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4051 (__v8si)_mm256_setzero_si256(), \
4052 (__mmask8)(U), (int)(R)))
4057 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4067 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4076 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4099 return (__m512d)__builtin_shufflevector((__v8df)
__a, (__v8df)
__b,
4100 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4106 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
4114 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
4121 return (__m512d)__builtin_shufflevector((__v8df)
__a, (__v8df)
__b,
4122 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4128 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
4136 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
4143 return (__m512)__builtin_shufflevector((__v16sf)
__a, (__v16sf)
__b,
4145 2+4, 18+4, 3+4, 19+4,
4146 2+8, 18+8, 3+8, 19+8,
4147 2+12, 18+12, 3+12, 19+12);
4153 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
4161 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
4168 return (__m512)__builtin_shufflevector((__v16sf)
__a, (__v16sf)
__b,
4170 0+4, 16+4, 1+4, 17+4,
4171 0+8, 16+8, 1+8, 17+8,
4172 0+12, 16+12, 1+12, 17+12);
4178 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
4186 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
4193 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4195 2+4, 18+4, 3+4, 19+4,
4196 2+8, 18+8, 3+8, 19+8,
4197 2+12, 18+12, 3+12, 19+12);
4203 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
4211 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
4218 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4220 0+4, 16+4, 1+4, 17+4,
4221 0+8, 16+8, 1+8, 17+8,
4222 0+12, 16+12, 1+12, 17+12);
4228 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
4236 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
4243 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4244 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4250 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4258 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4265 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4266 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4272 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4280 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4291 struct __loadu_si512 {
4294 return ((
const struct __loadu_si512*)
__P)->__v;
4300 struct __loadu_epi32 {
4303 return ((
const struct __loadu_epi32*)
__P)->__v;
4309 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const int *)
__P,
4318 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const int *)
__P,
4327 struct __loadu_epi64 {
4330 return ((
const struct __loadu_epi64*)
__P)->__v;
4336 return (__m512i) __builtin_ia32_loaddqudi512_mask ((
const long long *)
__P,
4344 return (__m512i) __builtin_ia32_loaddqudi512_mask ((
const long long *)
__P,
4353 return (__m512) __builtin_ia32_loadups512_mask ((
const float *)
__P,
4361 return (__m512) __builtin_ia32_loadups512_mask ((
const float *)
__P,
4370 return (__m512d) __builtin_ia32_loadupd512_mask ((
const double *)
__P,
4378 return (__m512d) __builtin_ia32_loadupd512_mask ((
const double *)
__P,
4390 return ((
const struct __loadu_pd*)
__p)->__v;
4399 return ((
const struct __loadu_ps*)
__p)->__v;
4405 return *(
const __m512*)
__p;
4411 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *)
__P,
4419 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *)
__P,
4428 return *(
const __m512d*)
__p;
4434 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *)
__P,
4442 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *)
__P,
4451 return *(
const __m512i *)
__P;
4457 return *(
const __m512i *)
__P;
4463 return *(
const __m512i *)
__P;
4471 struct __storeu_epi64 {
4474 ((
struct __storeu_epi64*)
__P)->__v = __A;
4480 __builtin_ia32_storedqudi512_mask ((
long long *)
__P, (__v8di) __A,
4487 struct __storeu_si512 {
4490 ((
struct __storeu_si512*)
__P)->__v = __A;
4496 struct __storeu_epi32 {
4499 ((
struct __storeu_epi32*)
__P)->__v = __A;
4505 __builtin_ia32_storedqusi512_mask ((
int *)
__P, (__v16si) __A,
4512 __builtin_ia32_storeupd512_mask ((
double *)
__P, (__v8df) __A, (
__mmask8) __U);
4518 struct __storeu_pd {
4521 ((
struct __storeu_pd*)
__P)->__v = __A;
4527 __builtin_ia32_storeups512_mask ((
float *)
__P, (__v16sf) __A,
4534 struct __storeu_ps {
4537 ((
struct __storeu_ps*)
__P)->__v = __A;
4543 __builtin_ia32_storeapd512_mask ((__v8df *)
__P, (__v8df) __A, (
__mmask8) __U);
4549 *(__m512d*)
__P = __A;
4555 __builtin_ia32_storeaps512_mask ((__v16sf *)
__P, (__v16sf) __A,
4562 *(__m512*)
__P = __A;
4568 *(__m512i *)
__P = __A;
4574 *(__m512i *)
__P = __A;
4580 *(__m512i *)
__P = __A;
4588 return __builtin_ia32_knothi(__M);
4593#define _mm512_cmpeq_epi32_mask(A, B) \
4594 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4595#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4596 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4597#define _mm512_cmpge_epi32_mask(A, B) \
4598 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4599#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4600 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4601#define _mm512_cmpgt_epi32_mask(A, B) \
4602 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4603#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4604 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4605#define _mm512_cmple_epi32_mask(A, B) \
4606 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4607#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4608 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4609#define _mm512_cmplt_epi32_mask(A, B) \
4610 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4611#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4612 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4613#define _mm512_cmpneq_epi32_mask(A, B) \
4614 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4615#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4616 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4618#define _mm512_cmpeq_epu32_mask(A, B) \
4619 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4620#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4621 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4622#define _mm512_cmpge_epu32_mask(A, B) \
4623 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4624#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4625 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4626#define _mm512_cmpgt_epu32_mask(A, B) \
4627 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4628#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4629 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4630#define _mm512_cmple_epu32_mask(A, B) \
4631 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4632#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4633 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4634#define _mm512_cmplt_epu32_mask(A, B) \
4635 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4636#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4637 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4638#define _mm512_cmpneq_epu32_mask(A, B) \
4639 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4640#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4641 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4643#define _mm512_cmpeq_epi64_mask(A, B) \
4644 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4645#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4646 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4647#define _mm512_cmpge_epi64_mask(A, B) \
4648 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4649#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4650 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4651#define _mm512_cmpgt_epi64_mask(A, B) \
4652 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4653#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4654 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4655#define _mm512_cmple_epi64_mask(A, B) \
4656 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4657#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4658 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4659#define _mm512_cmplt_epi64_mask(A, B) \
4660 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4661#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4662 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4663#define _mm512_cmpneq_epi64_mask(A, B) \
4664 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4665#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4666 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4668#define _mm512_cmpeq_epu64_mask(A, B) \
4669 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4670#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4671 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4672#define _mm512_cmpge_epu64_mask(A, B) \
4673 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4674#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4675 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4676#define _mm512_cmpgt_epu64_mask(A, B) \
4677 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4678#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4679 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4680#define _mm512_cmple_epu64_mask(A, B) \
4681 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4682#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4683 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4684#define _mm512_cmplt_epu64_mask(A, B) \
4685 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4686#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4687 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4688#define _mm512_cmpneq_epu64_mask(A, B) \
4689 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4690#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4691 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4697 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4703 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4711 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4720 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4726 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4734 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4741 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4747 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4755 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4762 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4768 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4776 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4783 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4789 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4797 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4804 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4810 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4818 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4825 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4831 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4839 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4846 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4852 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4860 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4867 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4873 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4881 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4888 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4894 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4902 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4910 return (__m512i)__builtin_elementwise_fshr((__v16su)__A,(__v16su)__A, (__v16su)__B);
4916 return (__m512i)__builtin_ia32_selectd_512(__U,
4924 return (__m512i)__builtin_ia32_selectd_512(__U,
4932 return (__m512i)__builtin_elementwise_fshr((__v8du)__A, (__v8du)__A, (__v8du)__B);
4938 return (__m512i)__builtin_ia32_selectq_512(__U,
4946 return (__m512i)__builtin_ia32_selectq_512(__U,
4953#define _mm512_cmp_epi32_mask(a, b, p) \
4954 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4955 (__v16si)(__m512i)(b), (int)(p), \
4958#define _mm512_cmp_epu32_mask(a, b, p) \
4959 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4960 (__v16si)(__m512i)(b), (int)(p), \
4963#define _mm512_cmp_epi64_mask(a, b, p) \
4964 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4965 (__v8di)(__m512i)(b), (int)(p), \
4968#define _mm512_cmp_epu64_mask(a, b, p) \
4969 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4970 (__v8di)(__m512i)(b), (int)(p), \
4973#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
4974 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4975 (__v16si)(__m512i)(b), (int)(p), \
4978#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
4979 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4980 (__v16si)(__m512i)(b), (int)(p), \
4983#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
4984 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4985 (__v8di)(__m512i)(b), (int)(p), \
4988#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
4989 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4990 (__v8di)(__m512i)(b), (int)(p), \
4993#define _mm512_rol_epi32(a, b) \
4994 ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)))
4996#define _mm512_mask_rol_epi32(W, U, a, b) \
4997 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4998 (__v16si)_mm512_rol_epi32((a), (b)), \
4999 (__v16si)(__m512i)(W)))
5001#define _mm512_maskz_rol_epi32(U, a, b) \
5002 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5003 (__v16si)_mm512_rol_epi32((a), (b)), \
5004 (__v16si)_mm512_setzero_si512()))
5006#define _mm512_rol_epi64(a, b) \
5007 ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)))
5009#define _mm512_mask_rol_epi64(W, U, a, b) \
5010 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5011 (__v8di)_mm512_rol_epi64((a), (b)), \
5012 (__v8di)(__m512i)(W)))
5014#define _mm512_maskz_rol_epi64(U, a, b) \
5015 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5016 (__v8di)_mm512_rol_epi64((a), (b)), \
5017 (__v8di)_mm512_setzero_si512()))
5022 return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__A, (__v16su)__B);
5028 return (__m512i)__builtin_ia32_selectd_512(__U,
5036 return (__m512i)__builtin_ia32_selectd_512(__U,
5044 return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__A, (__v8du)__B);
5050 return (__m512i)__builtin_ia32_selectq_512(__U,
5058 return (__m512i)__builtin_ia32_selectq_512(__U,
5063#define _mm512_ror_epi32(A, B) \
5064 ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)))
5066#define _mm512_mask_ror_epi32(W, U, A, B) \
5067 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5068 (__v16si)_mm512_ror_epi32((A), (B)), \
5069 (__v16si)(__m512i)(W)))
5071#define _mm512_maskz_ror_epi32(U, A, B) \
5072 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5073 (__v16si)_mm512_ror_epi32((A), (B)), \
5074 (__v16si)_mm512_setzero_si512()))
5076#define _mm512_ror_epi64(A, B) \
5077 ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)))
5079#define _mm512_mask_ror_epi64(W, U, A, B) \
5080 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5081 (__v8di)_mm512_ror_epi64((A), (B)), \
5082 (__v8di)(__m512i)(W)))
5084#define _mm512_maskz_ror_epi64(U, A, B) \
5085 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5086 (__v8di)_mm512_ror_epi64((A), (B)), \
5087 (__v8di)_mm512_setzero_si512()))
5091 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (
int)__B);
5097 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5104 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5111 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (
int)__B);
5117 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5124 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5131 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (
int)__B);
5137 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5144 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5151 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (
int)__B);
5157 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5164 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5172 return (__m512i) __builtin_ia32_movdqa32load512_mask ((
const __v16si *)
__P,
5180 return (__m512i) __builtin_ia32_movdqa32load512_mask ((
const __v16si *)
__P,
5189 __builtin_ia32_movdqa32store512_mask ((__v16si *)
__P, (__v16si) __A,
5196 return (__m512i) __builtin_ia32_selectd_512 ((
__mmask16) __U,
5204 return (__m512i) __builtin_ia32_selectd_512 ((
__mmask16) __U,
5212 return (__m512i) __builtin_ia32_selectq_512 ((
__mmask8) __U,
5220 return (__m512i) __builtin_ia32_selectq_512 ((
__mmask8) __U,
5228 return (__m512i) __builtin_ia32_movdqa64load512_mask ((
const __v8di *)
__P,
5236 return (__m512i) __builtin_ia32_movdqa64load512_mask ((
const __v8di *)
__P,
5245 __builtin_ia32_movdqa64store512_mask ((__v8di *)
__P, (__v8di) __A,
5252 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5253 0, 0, 2, 2, 4, 4, 6, 6);
5259 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
5267 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
5272#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5273 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5274 (__v8df)(__m512d)(B), \
5275 (__v8di)(__m512i)(C), (int)(imm), \
5276 (__mmask8)-1, (int)(R)))
5278#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5279 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5280 (__v8df)(__m512d)(B), \
5281 (__v8di)(__m512i)(C), (int)(imm), \
5282 (__mmask8)(U), (int)(R)))
5284#define _mm512_fixupimm_pd(A, B, C, imm) \
5285 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5286 (__v8df)(__m512d)(B), \
5287 (__v8di)(__m512i)(C), (int)(imm), \
5289 _MM_FROUND_CUR_DIRECTION))
5291#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5292 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5293 (__v8df)(__m512d)(B), \
5294 (__v8di)(__m512i)(C), (int)(imm), \
5296 _MM_FROUND_CUR_DIRECTION))
5298#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5299 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5300 (__v8df)(__m512d)(B), \
5301 (__v8di)(__m512i)(C), \
5302 (int)(imm), (__mmask8)(U), \
5305#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5306 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5307 (__v8df)(__m512d)(B), \
5308 (__v8di)(__m512i)(C), \
5309 (int)(imm), (__mmask8)(U), \
5310 _MM_FROUND_CUR_DIRECTION))
5312#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5313 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5314 (__v16sf)(__m512)(B), \
5315 (__v16si)(__m512i)(C), (int)(imm), \
5316 (__mmask16)-1, (int)(R)))
5318#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5319 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5320 (__v16sf)(__m512)(B), \
5321 (__v16si)(__m512i)(C), (int)(imm), \
5322 (__mmask16)(U), (int)(R)))
5324#define _mm512_fixupimm_ps(A, B, C, imm) \
5325 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5326 (__v16sf)(__m512)(B), \
5327 (__v16si)(__m512i)(C), (int)(imm), \
5329 _MM_FROUND_CUR_DIRECTION))
5331#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5332 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5333 (__v16sf)(__m512)(B), \
5334 (__v16si)(__m512i)(C), (int)(imm), \
5336 _MM_FROUND_CUR_DIRECTION))
5338#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5339 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5340 (__v16sf)(__m512)(B), \
5341 (__v16si)(__m512i)(C), \
5342 (int)(imm), (__mmask16)(U), \
5345#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5346 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5347 (__v16sf)(__m512)(B), \
5348 (__v16si)(__m512i)(C), \
5349 (int)(imm), (__mmask16)(U), \
5350 _MM_FROUND_CUR_DIRECTION))
5352#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5353 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5354 (__v2df)(__m128d)(B), \
5355 (__v2di)(__m128i)(C), (int)(imm), \
5356 (__mmask8)-1, (int)(R)))
5358#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5359 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5360 (__v2df)(__m128d)(B), \
5361 (__v2di)(__m128i)(C), (int)(imm), \
5362 (__mmask8)(U), (int)(R)))
5364#define _mm_fixupimm_sd(A, B, C, imm) \
5365 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5366 (__v2df)(__m128d)(B), \
5367 (__v2di)(__m128i)(C), (int)(imm), \
5369 _MM_FROUND_CUR_DIRECTION))
5371#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5372 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5373 (__v2df)(__m128d)(B), \
5374 (__v2di)(__m128i)(C), (int)(imm), \
5376 _MM_FROUND_CUR_DIRECTION))
5378#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5379 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5380 (__v2df)(__m128d)(B), \
5381 (__v2di)(__m128i)(C), (int)(imm), \
5382 (__mmask8)(U), (int)(R)))
5384#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5385 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5386 (__v2df)(__m128d)(B), \
5387 (__v2di)(__m128i)(C), (int)(imm), \
5389 _MM_FROUND_CUR_DIRECTION))
5391#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5392 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5393 (__v4sf)(__m128)(B), \
5394 (__v4si)(__m128i)(C), (int)(imm), \
5395 (__mmask8)-1, (int)(R)))
5397#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5398 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5399 (__v4sf)(__m128)(B), \
5400 (__v4si)(__m128i)(C), (int)(imm), \
5401 (__mmask8)(U), (int)(R)))
5403#define _mm_fixupimm_ss(A, B, C, imm) \
5404 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5405 (__v4sf)(__m128)(B), \
5406 (__v4si)(__m128i)(C), (int)(imm), \
5408 _MM_FROUND_CUR_DIRECTION))
5410#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5411 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5412 (__v4sf)(__m128)(B), \
5413 (__v4si)(__m128i)(C), (int)(imm), \
5415 _MM_FROUND_CUR_DIRECTION))
5417#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5418 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5419 (__v4sf)(__m128)(B), \
5420 (__v4si)(__m128i)(C), (int)(imm), \
5421 (__mmask8)(U), (int)(R)))
5423#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5424 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5425 (__v4sf)(__m128)(B), \
5426 (__v4si)(__m128i)(C), (int)(imm), \
5428 _MM_FROUND_CUR_DIRECTION))
5430#define _mm_getexp_round_sd(A, B, R) \
5431 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5432 (__v2df)(__m128d)(B), \
5433 (__v2df)_mm_setzero_pd(), \
5434 (__mmask8)-1, (int)(R)))
5440 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5447 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5454#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5455 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5456 (__v2df)(__m128d)(B), \
5457 (__v2df)(__m128d)(W), \
5458 (__mmask8)(U), (int)(R)))
5463 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5470#define _mm_maskz_getexp_round_sd(U, A, B, R) \
5471 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5472 (__v2df)(__m128d)(B), \
5473 (__v2df)_mm_setzero_pd(), \
5474 (__mmask8)(U), (int)(R)))
5476#define _mm_getexp_round_ss(A, B, R) \
5477 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5478 (__v4sf)(__m128)(B), \
5479 (__v4sf)_mm_setzero_ps(), \
5480 (__mmask8)-1, (int)(R)))
5485 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5492 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5499#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5500 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5501 (__v4sf)(__m128)(B), \
5502 (__v4sf)(__m128)(W), \
5503 (__mmask8)(U), (int)(R)))
5508 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5515#define _mm_maskz_getexp_round_ss(U, A, B, R) \
5516 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5517 (__v4sf)(__m128)(B), \
5518 (__v4sf)_mm_setzero_ps(), \
5519 (__mmask8)(U), (int)(R)))
5521#define _mm_getmant_round_sd(A, B, C, D, R) \
5522 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5523 (__v2df)(__m128d)(B), \
5524 (int)(((D)<<2) | (C)), \
5525 (__v2df)_mm_setzero_pd(), \
5526 (__mmask8)-1, (int)(R)))
5528#define _mm_getmant_sd(A, B, C, D) \
5529 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5530 (__v2df)(__m128d)(B), \
5531 (int)(((D)<<2) | (C)), \
5532 (__v2df)_mm_setzero_pd(), \
5534 _MM_FROUND_CUR_DIRECTION))
5536#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5537 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5538 (__v2df)(__m128d)(B), \
5539 (int)(((D)<<2) | (C)), \
5540 (__v2df)(__m128d)(W), \
5542 _MM_FROUND_CUR_DIRECTION))
5544#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5545 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5546 (__v2df)(__m128d)(B), \
5547 (int)(((D)<<2) | (C)), \
5548 (__v2df)(__m128d)(W), \
5549 (__mmask8)(U), (int)(R)))
5551#define _mm_maskz_getmant_sd(U, A, B, C, D) \
5552 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5553 (__v2df)(__m128d)(B), \
5554 (int)(((D)<<2) | (C)), \
5555 (__v2df)_mm_setzero_pd(), \
5557 _MM_FROUND_CUR_DIRECTION))
5559#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5560 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5561 (__v2df)(__m128d)(B), \
5562 (int)(((D)<<2) | (C)), \
5563 (__v2df)_mm_setzero_pd(), \
5564 (__mmask8)(U), (int)(R)))
5566#define _mm_getmant_round_ss(A, B, C, D, R) \
5567 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5568 (__v4sf)(__m128)(B), \
5569 (int)(((D)<<2) | (C)), \
5570 (__v4sf)_mm_setzero_ps(), \
5571 (__mmask8)-1, (int)(R)))
5573#define _mm_getmant_ss(A, B, C, D) \
5574 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5575 (__v4sf)(__m128)(B), \
5576 (int)(((D)<<2) | (C)), \
5577 (__v4sf)_mm_setzero_ps(), \
5579 _MM_FROUND_CUR_DIRECTION))
5581#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5582 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5583 (__v4sf)(__m128)(B), \
5584 (int)(((D)<<2) | (C)), \
5585 (__v4sf)(__m128)(W), \
5587 _MM_FROUND_CUR_DIRECTION))
5589#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5590 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5591 (__v4sf)(__m128)(B), \
5592 (int)(((D)<<2) | (C)), \
5593 (__v4sf)(__m128)(W), \
5594 (__mmask8)(U), (int)(R)))
5596#define _mm_maskz_getmant_ss(U, A, B, C, D) \
5597 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5598 (__v4sf)(__m128)(B), \
5599 (int)(((D)<<2) | (C)), \
5600 (__v4sf)_mm_setzero_ps(), \
5602 _MM_FROUND_CUR_DIRECTION))
5604#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5605 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5606 (__v4sf)(__m128)(B), \
5607 (int)(((D)<<2) | (C)), \
5608 (__v4sf)_mm_setzero_ps(), \
5609 (__mmask8)(U), (int)(R)))
5617#define _mm_comi_round_sd(A, B, P, R) \
5618 ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5619 (int)(P), (int)(R)))
5621#define _mm_comi_round_ss(A, B, P, R) \
5622 ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5623 (int)(P), (int)(R)))
5626#define _mm_cvt_roundsd_si64(A, R) \
5627 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5633 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5639 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5647 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5655 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5661 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5669 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5677 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)
__Y);
5683 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5691 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5699 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)
__Y);
5705 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5713 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5721 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5727 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5735 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5743 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5749 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5757 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5765 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)
__Y);
5771 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5779 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5787 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)
__Y);
5793 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5801 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5809 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5815 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5823 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5831 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5837 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5845 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5853 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)
__Y);
5859 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5867 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5875 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)
__Y);
5881 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5889 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5903#define _mm512_ternarylogic_epi32(A, B, C, imm) \
5904 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5905 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5906 (unsigned char)(imm), (__mmask16)-1))
5908#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
5909 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5910 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5911 (unsigned char)(imm), (__mmask16)(U)))
5913#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
5914 ((__m512i)__builtin_ia32_pternlogd512_maskz( \
5915 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5916 (unsigned char)(imm), (__mmask16)(U)))
5918#define _mm512_ternarylogic_epi64(A, B, C, imm) \
5919 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5920 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5921 (unsigned char)(imm), (__mmask8)-1))
5923#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5924 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5925 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5926 (unsigned char)(imm), (__mmask8)(U)))
5928#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5929 ((__m512i)__builtin_ia32_pternlogq512_maskz( \
5930 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5931 (unsigned char)(imm), (__mmask8)(U)))
5934#define _mm_cvt_roundsd_i64(A, R) \
5935 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5938#define _mm_cvt_roundsd_si32(A, R) \
5939 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5941#define _mm_cvt_roundsd_i32(A, R) \
5942 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5944#define _mm_cvt_roundsd_u32(A, R) \
5945 ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5950 return (
unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5955#define _mm_cvt_roundsd_u64(A, R) \
5956 ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
5960_mm_cvtsd_u64 (__m128d __A)
5962 return (
unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
5968#define _mm_cvt_roundss_si32(A, R) \
5969 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5971#define _mm_cvt_roundss_i32(A, R) \
5972 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5975#define _mm_cvt_roundss_si64(A, R) \
5976 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5978#define _mm_cvt_roundss_i64(A, R) \
5979 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5982#define _mm_cvt_roundss_u32(A, R) \
5983 ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)))
5988 return (
unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
5993#define _mm_cvt_roundss_u64(A, R) \
5994 ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
5998_mm_cvtss_u64 (__m128 __A)
6000 return (
unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6006#define _mm_cvtt_roundsd_i32(A, R) \
6007 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
6009#define _mm_cvtt_roundsd_si32(A, R) \
6010 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
6015 return (
int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6020#define _mm_cvtt_roundsd_si64(A, R) \
6021 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
6023#define _mm_cvtt_roundsd_i64(A, R) \
6024 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
6027_mm_cvttsd_i64 (__m128d __A)
6029 return (
long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6034#define _mm_cvtt_roundsd_u32(A, R) \
6035 ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)))
6040 return (
unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6045#define _mm_cvtt_roundsd_u64(A, R) \
6046 ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6050_mm_cvttsd_u64 (__m128d __A)
6052 return (
unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6058#define _mm_cvtt_roundss_i32(A, R) \
6059 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
6061#define _mm_cvtt_roundss_si32(A, R) \
6062 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
6067 return (
int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6072#define _mm_cvtt_roundss_i64(A, R) \
6073 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
6075#define _mm_cvtt_roundss_si64(A, R) \
6076 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
6079_mm_cvttss_i64 (__m128 __A)
6081 return (
long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6086#define _mm_cvtt_roundss_u32(A, R) \
6087 ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)))
6092 return (
unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6097#define _mm_cvtt_roundss_u64(A, R) \
6098 ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6102_mm_cvttss_u64 (__m128 __A)
6104 return (
unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6110#define _mm512_permute_pd(X, C) \
6111 ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)))
6113#define _mm512_mask_permute_pd(W, U, X, C) \
6114 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6115 (__v8df)_mm512_permute_pd((X), (C)), \
6116 (__v8df)(__m512d)(W)))
6118#define _mm512_maskz_permute_pd(U, X, C) \
6119 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6120 (__v8df)_mm512_permute_pd((X), (C)), \
6121 (__v8df)_mm512_setzero_pd()))
6123#define _mm512_permute_ps(X, C) \
6124 ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)))
6126#define _mm512_mask_permute_ps(W, U, X, C) \
6127 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6128 (__v16sf)_mm512_permute_ps((X), (C)), \
6129 (__v16sf)(__m512)(W)))
6131#define _mm512_maskz_permute_ps(U, X, C) \
6132 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6133 (__v16sf)_mm512_permute_ps((X), (C)), \
6134 (__v16sf)_mm512_setzero_ps()))
6139 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6145 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
6153 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
6161 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6167 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
6175 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
6183 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
6190 return (__m512d)__builtin_ia32_selectpd_512(__U,
6199 return (__m512d)__builtin_ia32_selectpd_512(__U,
6201 (__v8df)(__m512d)__I);
6208 return (__m512d)__builtin_ia32_selectpd_512(__U,
6216 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
6223 return (__m512)__builtin_ia32_selectps_512(__U,
6231 return (__m512)__builtin_ia32_selectps_512(__U,
6233 (__v16sf)(__m512)__I);
6239 return (__m512)__builtin_ia32_selectps_512(__U,
6245#define _mm512_cvtt_roundpd_epu32(A, R) \
6246 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6247 (__v8si)_mm256_undefined_si256(), \
6248 (__mmask8)-1, (int)(R)))
6250#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
6251 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6252 (__v8si)(__m256i)(W), \
6253 (__mmask8)(U), (int)(R)))
6255#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
6256 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6257 (__v8si)_mm256_setzero_si256(), \
6258 (__mmask8)(U), (int)(R)))
6263 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6273 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6282 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6289#define _mm_roundscale_round_sd(A, B, imm, R) \
6290 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6291 (__v2df)(__m128d)(B), \
6292 (__v2df)_mm_setzero_pd(), \
6293 (__mmask8)-1, (int)(imm), \
6296#define _mm_roundscale_sd(A, B, imm) \
6297 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6298 (__v2df)(__m128d)(B), \
6299 (__v2df)_mm_setzero_pd(), \
6300 (__mmask8)-1, (int)(imm), \
6301 _MM_FROUND_CUR_DIRECTION))
6303#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6304 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6305 (__v2df)(__m128d)(B), \
6306 (__v2df)(__m128d)(W), \
6307 (__mmask8)(U), (int)(imm), \
6308 _MM_FROUND_CUR_DIRECTION))
6310#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6311 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6312 (__v2df)(__m128d)(B), \
6313 (__v2df)(__m128d)(W), \
6314 (__mmask8)(U), (int)(I), \
6317#define _mm_maskz_roundscale_sd(U, A, B, I) \
6318 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6319 (__v2df)(__m128d)(B), \
6320 (__v2df)_mm_setzero_pd(), \
6321 (__mmask8)(U), (int)(I), \
6322 _MM_FROUND_CUR_DIRECTION))
6324#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6325 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6326 (__v2df)(__m128d)(B), \
6327 (__v2df)_mm_setzero_pd(), \
6328 (__mmask8)(U), (int)(I), \
6331#define _mm_roundscale_round_ss(A, B, imm, R) \
6332 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6333 (__v4sf)(__m128)(B), \
6334 (__v4sf)_mm_setzero_ps(), \
6335 (__mmask8)-1, (int)(imm), \
6338#define _mm_roundscale_ss(A, B, imm) \
6339 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6340 (__v4sf)(__m128)(B), \
6341 (__v4sf)_mm_setzero_ps(), \
6342 (__mmask8)-1, (int)(imm), \
6343 _MM_FROUND_CUR_DIRECTION))
6345#define _mm_mask_roundscale_ss(W, U, A, B, I) \
6346 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6347 (__v4sf)(__m128)(B), \
6348 (__v4sf)(__m128)(W), \
6349 (__mmask8)(U), (int)(I), \
6350 _MM_FROUND_CUR_DIRECTION))
6352#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6353 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6354 (__v4sf)(__m128)(B), \
6355 (__v4sf)(__m128)(W), \
6356 (__mmask8)(U), (int)(I), \
6359#define _mm_maskz_roundscale_ss(U, A, B, I) \
6360 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6361 (__v4sf)(__m128)(B), \
6362 (__v4sf)_mm_setzero_ps(), \
6363 (__mmask8)(U), (int)(I), \
6364 _MM_FROUND_CUR_DIRECTION))
6366#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6367 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6368 (__v4sf)(__m128)(B), \
6369 (__v4sf)_mm_setzero_ps(), \
6370 (__mmask8)(U), (int)(I), \
6373#define _mm512_scalef_round_pd(A, B, R) \
6374 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6375 (__v8df)(__m512d)(B), \
6376 (__v8df)_mm512_undefined_pd(), \
6377 (__mmask8)-1, (int)(R)))
6379#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6380 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6381 (__v8df)(__m512d)(B), \
6382 (__v8df)(__m512d)(W), \
6383 (__mmask8)(U), (int)(R)))
6385#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6386 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6387 (__v8df)(__m512d)(B), \
6388 (__v8df)_mm512_setzero_pd(), \
6389 (__mmask8)(U), (int)(R)))
6394 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6405 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6415 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6423#define _mm512_scalef_round_ps(A, B, R) \
6424 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6425 (__v16sf)(__m512)(B), \
6426 (__v16sf)_mm512_undefined_ps(), \
6427 (__mmask16)-1, (int)(R)))
6429#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6430 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6431 (__v16sf)(__m512)(B), \
6432 (__v16sf)(__m512)(W), \
6433 (__mmask16)(U), (int)(R)))
6435#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6436 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6437 (__v16sf)(__m512)(B), \
6438 (__v16sf)_mm512_setzero_ps(), \
6439 (__mmask16)(U), (int)(R)))
6444 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6455 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6465 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6473#define _mm_scalef_round_sd(A, B, R) \
6474 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6475 (__v2df)(__m128d)(B), \
6476 (__v2df)_mm_setzero_pd(), \
6477 (__mmask8)-1, (int)(R)))
6482 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6491 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6498#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6499 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6500 (__v2df)(__m128d)(B), \
6501 (__v2df)(__m128d)(W), \
6502 (__mmask8)(U), (int)(R)))
6507 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6514#define _mm_maskz_scalef_round_sd(U, A, B, R) \
6515 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6516 (__v2df)(__m128d)(B), \
6517 (__v2df)_mm_setzero_pd(), \
6518 (__mmask8)(U), (int)(R)))
6520#define _mm_scalef_round_ss(A, B, R) \
6521 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6522 (__v4sf)(__m128)(B), \
6523 (__v4sf)_mm_setzero_ps(), \
6524 (__mmask8)-1, (int)(R)))
6529 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6538 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6545#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6546 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6547 (__v4sf)(__m128)(B), \
6548 (__v4sf)(__m128)(W), \
6549 (__mmask8)(U), (int)(R)))
6554 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6561#define _mm_maskz_scalef_round_ss(U, A, B, R) \
6562 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6563 (__v4sf)(__m128)(B), \
6564 (__v4sf)_mm_setzero_ps(), \
6570 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (
int)__B);
6576 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
6583 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
6590 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (
int)__B);
6596 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
6603 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
6608#define _mm512_shuffle_f32x4(A, B, imm) \
6609 ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6610 (__v16sf)(__m512)(B), (int)(imm)))
6612#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6613 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6614 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6615 (__v16sf)(__m512)(W)))
6617#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6618 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6619 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6620 (__v16sf)_mm512_setzero_ps()))
6622#define _mm512_shuffle_f64x2(A, B, imm) \
6623 ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6624 (__v8df)(__m512d)(B), (int)(imm)))
6626#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6627 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6628 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6629 (__v8df)(__m512d)(W)))
6631#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6632 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6633 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6634 (__v8df)_mm512_setzero_pd()))
6636#define _mm512_shuffle_i32x4(A, B, imm) \
6637 ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6638 (__v16si)(__m512i)(B), (int)(imm)))
6640#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6641 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6642 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6643 (__v16si)(__m512i)(W)))
6645#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6646 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6647 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6648 (__v16si)_mm512_setzero_si512()))
6650#define _mm512_shuffle_i64x2(A, B, imm) \
6651 ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6652 (__v8di)(__m512i)(B), (int)(imm)))
6654#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6655 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6656 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6657 (__v8di)(__m512i)(W)))
6659#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6660 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6661 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6662 (__v8di)_mm512_setzero_si512()))
6664#define _mm512_shuffle_pd(A, B, M) \
6665 ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6666 (__v8df)(__m512d)(B), (int)(M)))
6668#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6669 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6670 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6671 (__v8df)(__m512d)(W)))
6673#define _mm512_maskz_shuffle_pd(U, A, B, M) \
6674 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6675 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6676 (__v8df)_mm512_setzero_pd()))
6678#define _mm512_shuffle_ps(A, B, M) \
6679 ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6680 (__v16sf)(__m512)(B), (int)(M)))
6682#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6683 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6684 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6685 (__v16sf)(__m512)(W)))
6687#define _mm512_maskz_shuffle_ps(U, A, B, M) \
6688 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6689 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6690 (__v16sf)_mm512_setzero_ps()))
6692#define _mm_sqrt_round_sd(A, B, R) \
6693 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6694 (__v2df)(__m128d)(B), \
6695 (__v2df)_mm_setzero_pd(), \
6696 (__mmask8)-1, (int)(R)))
6701 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6708#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6709 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6710 (__v2df)(__m128d)(B), \
6711 (__v2df)(__m128d)(W), \
6712 (__mmask8)(U), (int)(R)))
6717 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6724#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6725 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6726 (__v2df)(__m128d)(B), \
6727 (__v2df)_mm_setzero_pd(), \
6728 (__mmask8)(U), (int)(R)))
6730#define _mm_sqrt_round_ss(A, B, R) \
6731 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6732 (__v4sf)(__m128)(B), \
6733 (__v4sf)_mm_setzero_ps(), \
6734 (__mmask8)-1, (int)(R)))
6739 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6746#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6747 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6748 (__v4sf)(__m128)(B), \
6749 (__v4sf)(__m128)(W), (__mmask8)(U), \
6755 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6762#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6763 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6764 (__v4sf)(__m128)(B), \
6765 (__v4sf)_mm_setzero_ps(), \
6766 (__mmask8)(U), (int)(R)))
6770 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6771 0, 1, 2, 3, 0, 1, 2, 3,
6772 0, 1, 2, 3, 0, 1, 2, 3);
6778 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__M,
6786 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__M,
6793 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6794 0, 1, 2, 3, 0, 1, 2, 3);
6800 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__M,
6808 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__M,
6815 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6816 0, 1, 2, 3, 0, 1, 2, 3,
6817 0, 1, 2, 3, 0, 1, 2, 3);
6823 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
6831 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
6838 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6839 0, 1, 2, 3, 0, 1, 2, 3);
6845 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
6853 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
6861 return (__m512d)__builtin_ia32_selectpd_512(__M,
6869 return (__m512d)__builtin_ia32_selectpd_512(__M,
6877 return (__m512)__builtin_ia32_selectps_512(__M,
6885 return (__m512)__builtin_ia32_selectps_512(__M,
6893 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6901 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6902 (__v16qi) __O, __M);
6908 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6916 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *)
__P, (__v16si) __A, __M);
6922 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6930 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6931 (__v16hi) __O, __M);
6937 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6945 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*)
__P, (__v16si) __A, __M);
6951 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6959 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6960 (__v16qi) __O, __M);
6966 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6974 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *)
__P, (__v8di) __A, __M);
6980 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6988 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6995 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7003 __builtin_ia32_pmovsqd512mem_mask ((__v8si *)
__P, (__v8di) __A, __M);
7009 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7017 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7024 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7032 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *)
__P, (__v8di) __A, __M);
7038 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7046 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7054 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7062 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *)
__P, (__v16si) __A, __M);
7068 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7076 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7084 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7092 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*)
__P, (__v16si) __A, __M);
7098 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7106 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7114 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7122 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *)
__P, (__v8di) __A, __M);
7128 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7136 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7143 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7151 __builtin_ia32_pmovusqd512mem_mask ((__v8si*)
__P, (__v8di) __A, __M);
7157 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7165 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7172 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7180 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*)
__P, (__v8di) __A, __M);
7186 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7194 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7195 (__v16qi) __O, __M);
7201 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7209 __builtin_ia32_pmovdb512mem_mask ((__v16qi *)
__P, (__v16si) __A, __M);
7215 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7223 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7224 (__v16hi) __O, __M);
7230 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7238 __builtin_ia32_pmovdw512mem_mask ((__v16hi *)
__P, (__v16si) __A, __M);
7244 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7252 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7253 (__v16qi) __O, __M);
7259 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7267 __builtin_ia32_pmovqb512mem_mask ((__v16qi *)
__P, (__v8di) __A, __M);
7273 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7281 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7288 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7296 __builtin_ia32_pmovqd512mem_mask ((__v8si *)
__P, (__v8di) __A, __M);
7302 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7310 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7317 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7325 __builtin_ia32_pmovqw512mem_mask ((__v8hi *)
__P, (__v8di) __A, __M);
7328#define _mm512_extracti32x4_epi32(A, imm) \
7329 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7330 (__v4si)_mm_undefined_si128(), \
7333#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7334 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7335 (__v4si)(__m128i)(W), \
7338#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7339 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7340 (__v4si)_mm_setzero_si128(), \
7343#define _mm512_extracti64x4_epi64(A, imm) \
7344 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7345 (__v4di)_mm256_undefined_si256(), \
7348#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7349 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7350 (__v4di)(__m256i)(W), \
7353#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7354 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7355 (__v4di)_mm256_setzero_si256(), \
7358#define _mm512_insertf64x4(A, B, imm) \
7359 ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7360 (__v4df)(__m256d)(B), (int)(imm)))
7362#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7363 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7364 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7365 (__v8df)(__m512d)(W)))
7367#define _mm512_maskz_insertf64x4(U, A, B, imm) \
7368 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7369 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7370 (__v8df)_mm512_setzero_pd()))
7372#define _mm512_inserti64x4(A, B, imm) \
7373 ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7374 (__v4di)(__m256i)(B), (int)(imm)))
7376#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7377 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7378 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7379 (__v8di)(__m512i)(W)))
7381#define _mm512_maskz_inserti64x4(U, A, B, imm) \
7382 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7383 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7384 (__v8di)_mm512_setzero_si512()))
7386#define _mm512_insertf32x4(A, B, imm) \
7387 ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7388 (__v4sf)(__m128)(B), (int)(imm)))
7390#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7391 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7392 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7393 (__v16sf)(__m512)(W)))
7395#define _mm512_maskz_insertf32x4(U, A, B, imm) \
7396 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7397 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7398 (__v16sf)_mm512_setzero_ps()))
7400#define _mm512_inserti32x4(A, B, imm) \
7401 ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7402 (__v4si)(__m128i)(B), (int)(imm)))
7404#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7405 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7406 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7407 (__v16si)(__m512i)(W)))
7409#define _mm512_maskz_inserti32x4(U, A, B, imm) \
7410 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7411 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7412 (__v16si)_mm512_setzero_si512()))
7414#define _mm512_getmant_round_pd(A, B, C, R) \
7415 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7416 (int)(((C)<<2) | (B)), \
7417 (__v8df)_mm512_undefined_pd(), \
7418 (__mmask8)-1, (int)(R)))
7420#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7421 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7422 (int)(((C)<<2) | (B)), \
7423 (__v8df)(__m512d)(W), \
7424 (__mmask8)(U), (int)(R)))
7426#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7427 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7428 (int)(((C)<<2) | (B)), \
7429 (__v8df)_mm512_setzero_pd(), \
7430 (__mmask8)(U), (int)(R)))
7432#define _mm512_getmant_pd(A, B, C) \
7433 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7434 (int)(((C)<<2) | (B)), \
7435 (__v8df)_mm512_setzero_pd(), \
7437 _MM_FROUND_CUR_DIRECTION))
7439#define _mm512_mask_getmant_pd(W, U, A, B, C) \
7440 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7441 (int)(((C)<<2) | (B)), \
7442 (__v8df)(__m512d)(W), \
7444 _MM_FROUND_CUR_DIRECTION))
7446#define _mm512_maskz_getmant_pd(U, A, B, C) \
7447 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7448 (int)(((C)<<2) | (B)), \
7449 (__v8df)_mm512_setzero_pd(), \
7451 _MM_FROUND_CUR_DIRECTION))
7453#define _mm512_getmant_round_ps(A, B, C, R) \
7454 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7455 (int)(((C)<<2) | (B)), \
7456 (__v16sf)_mm512_undefined_ps(), \
7457 (__mmask16)-1, (int)(R)))
7459#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7460 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7461 (int)(((C)<<2) | (B)), \
7462 (__v16sf)(__m512)(W), \
7463 (__mmask16)(U), (int)(R)))
7465#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7466 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7467 (int)(((C)<<2) | (B)), \
7468 (__v16sf)_mm512_setzero_ps(), \
7469 (__mmask16)(U), (int)(R)))
7471#define _mm512_getmant_ps(A, B, C) \
7472 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7473 (int)(((C)<<2)|(B)), \
7474 (__v16sf)_mm512_undefined_ps(), \
7476 _MM_FROUND_CUR_DIRECTION))
7478#define _mm512_mask_getmant_ps(W, U, A, B, C) \
7479 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7480 (int)(((C)<<2)|(B)), \
7481 (__v16sf)(__m512)(W), \
7483 _MM_FROUND_CUR_DIRECTION))
7485#define _mm512_maskz_getmant_ps(U, A, B, C) \
7486 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7487 (int)(((C)<<2)|(B)), \
7488 (__v16sf)_mm512_setzero_ps(), \
7490 _MM_FROUND_CUR_DIRECTION))
7492#define _mm512_getexp_round_pd(A, R) \
7493 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7494 (__v8df)_mm512_undefined_pd(), \
7495 (__mmask8)-1, (int)(R)))
7497#define _mm512_mask_getexp_round_pd(W, U, A, R) \
7498 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7499 (__v8df)(__m512d)(W), \
7500 (__mmask8)(U), (int)(R)))
7502#define _mm512_maskz_getexp_round_pd(U, A, R) \
7503 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7504 (__v8df)_mm512_setzero_pd(), \
7505 (__mmask8)(U), (int)(R)))
7510 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7519 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7528 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7534#define _mm512_getexp_round_ps(A, R) \
7535 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7536 (__v16sf)_mm512_undefined_ps(), \
7537 (__mmask16)-1, (int)(R)))
7539#define _mm512_mask_getexp_round_ps(W, U, A, R) \
7540 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7541 (__v16sf)(__m512)(W), \
7542 (__mmask16)(U), (int)(R)))
7544#define _mm512_maskz_getexp_round_ps(U, A, R) \
7545 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7546 (__v16sf)_mm512_setzero_ps(), \
7547 (__mmask16)(U), (int)(R)))
7552 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7561 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7570 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7576#define _mm512_i64gather_ps(index, addr, scale) \
7577 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7578 (void const *)(addr), \
7579 (__v8di)(__m512i)(index), (__mmask8)-1, \
7582#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7583 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7584 (void const *)(addr), \
7585 (__v8di)(__m512i)(index), \
7586 (__mmask8)(mask), (int)(scale)))
7588#define _mm512_i64gather_epi32(index, addr, scale) \
7589 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7590 (void const *)(addr), \
7591 (__v8di)(__m512i)(index), \
7592 (__mmask8)-1, (int)(scale)))
7594#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7595 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7596 (void const *)(addr), \
7597 (__v8di)(__m512i)(index), \
7598 (__mmask8)(mask), (int)(scale)))
7600#define _mm512_i64gather_pd(index, addr, scale) \
7601 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7602 (void const *)(addr), \
7603 (__v8di)(__m512i)(index), (__mmask8)-1, \
7606#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7607 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7608 (void const *)(addr), \
7609 (__v8di)(__m512i)(index), \
7610 (__mmask8)(mask), (int)(scale)))
7612#define _mm512_i64gather_epi64(index, addr, scale) \
7613 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7614 (void const *)(addr), \
7615 (__v8di)(__m512i)(index), (__mmask8)-1, \
7618#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7619 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7620 (void const *)(addr), \
7621 (__v8di)(__m512i)(index), \
7622 (__mmask8)(mask), (int)(scale)))
7624#define _mm512_i32gather_ps(index, addr, scale) \
7625 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7626 (void const *)(addr), \
7627 (__v16si)(__m512)(index), \
7628 (__mmask16)-1, (int)(scale)))
7630#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7631 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7632 (void const *)(addr), \
7633 (__v16si)(__m512)(index), \
7634 (__mmask16)(mask), (int)(scale)))
7636#define _mm512_i32gather_epi32(index, addr, scale) \
7637 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7638 (void const *)(addr), \
7639 (__v16si)(__m512i)(index), \
7640 (__mmask16)-1, (int)(scale)))
7642#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7643 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7644 (void const *)(addr), \
7645 (__v16si)(__m512i)(index), \
7646 (__mmask16)(mask), (int)(scale)))
7648#define _mm512_i32gather_pd(index, addr, scale) \
7649 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7650 (void const *)(addr), \
7651 (__v8si)(__m256i)(index), (__mmask8)-1, \
7654#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7655 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7656 (void const *)(addr), \
7657 (__v8si)(__m256i)(index), \
7658 (__mmask8)(mask), (int)(scale)))
7660#define _mm512_i32gather_epi64(index, addr, scale) \
7661 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7662 (void const *)(addr), \
7663 (__v8si)(__m256i)(index), (__mmask8)-1, \
7666#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7667 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7668 (void const *)(addr), \
7669 (__v8si)(__m256i)(index), \
7670 (__mmask8)(mask), (int)(scale)))
7672#define _mm512_i64scatter_ps(addr, index, v1, scale) \
7673 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7674 (__v8di)(__m512i)(index), \
7675 (__v8sf)(__m256)(v1), (int)(scale))
7677#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7678 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7679 (__v8di)(__m512i)(index), \
7680 (__v8sf)(__m256)(v1), (int)(scale))
7682#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7683 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7684 (__v8di)(__m512i)(index), \
7685 (__v8si)(__m256i)(v1), (int)(scale))
7687#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7688 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7689 (__v8di)(__m512i)(index), \
7690 (__v8si)(__m256i)(v1), (int)(scale))
7692#define _mm512_i64scatter_pd(addr, index, v1, scale) \
7693 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7694 (__v8di)(__m512i)(index), \
7695 (__v8df)(__m512d)(v1), (int)(scale))
7697#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7698 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7699 (__v8di)(__m512i)(index), \
7700 (__v8df)(__m512d)(v1), (int)(scale))
7702#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7703 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7704 (__v8di)(__m512i)(index), \
7705 (__v8di)(__m512i)(v1), (int)(scale))
7707#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7708 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7709 (__v8di)(__m512i)(index), \
7710 (__v8di)(__m512i)(v1), (int)(scale))
7712#define _mm512_i32scatter_ps(addr, index, v1, scale) \
7713 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7714 (__v16si)(__m512i)(index), \
7715 (__v16sf)(__m512)(v1), (int)(scale))
7717#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7718 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7719 (__v16si)(__m512i)(index), \
7720 (__v16sf)(__m512)(v1), (int)(scale))
7722#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7723 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7724 (__v16si)(__m512i)(index), \
7725 (__v16si)(__m512i)(v1), (int)(scale))
7727#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7728 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7729 (__v16si)(__m512i)(index), \
7730 (__v16si)(__m512i)(v1), (int)(scale))
7732#define _mm512_i32scatter_pd(addr, index, v1, scale) \
7733 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7734 (__v8si)(__m256i)(index), \
7735 (__v8df)(__m512d)(v1), (int)(scale))
7737#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7738 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7739 (__v8si)(__m256i)(index), \
7740 (__v8df)(__m512d)(v1), (int)(scale))
7742#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7743 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7744 (__v8si)(__m256i)(index), \
7745 (__v8di)(__m512i)(v1), (int)(scale))
7747#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7748 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7749 (__v8si)(__m256i)(index), \
7750 (__v8di)(__m512i)(v1), (int)(scale))
7755 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7762#define _mm_fmadd_round_ss(A, B, C, R) \
7763 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7764 (__v4sf)(__m128)(B), \
7765 (__v4sf)(__m128)(C), (__mmask8)-1, \
7768#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7769 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7770 (__v4sf)(__m128)(A), \
7771 (__v4sf)(__m128)(B), (__mmask8)(U), \
7777 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7784#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7785 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7786 (__v4sf)(__m128)(B), \
7787 (__v4sf)(__m128)(C), (__mmask8)(U), \
7793 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7800#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7801 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7802 (__v4sf)(__m128)(X), \
7803 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7809 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7816#define _mm_fmsub_round_ss(A, B, C, R) \
7817 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7818 (__v4sf)(__m128)(B), \
7819 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7822#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7823 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7824 (__v4sf)(__m128)(A), \
7825 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7831 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7838#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7839 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7840 (__v4sf)(__m128)(B), \
7841 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7847 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7854#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7855 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7856 (__v4sf)(__m128)(X), \
7857 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7863 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7870#define _mm_fnmadd_round_ss(A, B, C, R) \
7871 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7872 -(__v4sf)(__m128)(B), \
7873 (__v4sf)(__m128)(C), (__mmask8)-1, \
7876#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7877 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7878 -(__v4sf)(__m128)(A), \
7879 (__v4sf)(__m128)(B), (__mmask8)(U), \
7885 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7892#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7893 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7894 -(__v4sf)(__m128)(B), \
7895 (__v4sf)(__m128)(C), (__mmask8)(U), \
7901 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7908#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7909 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7910 -(__v4sf)(__m128)(X), \
7911 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7917 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7924#define _mm_fnmsub_round_ss(A, B, C, R) \
7925 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7926 -(__v4sf)(__m128)(B), \
7927 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7930#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7931 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7932 -(__v4sf)(__m128)(A), \
7933 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7939 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7946#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
7947 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7948 -(__v4sf)(__m128)(B), \
7949 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7955 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7962#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
7963 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7964 -(__v4sf)(__m128)(X), \
7965 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7971 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7978#define _mm_fmadd_round_sd(A, B, C, R) \
7979 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7980 (__v2df)(__m128d)(B), \
7981 (__v2df)(__m128d)(C), (__mmask8)-1, \
7984#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
7985 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7986 (__v2df)(__m128d)(A), \
7987 (__v2df)(__m128d)(B), (__mmask8)(U), \
7993 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8000#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
8001 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8002 (__v2df)(__m128d)(B), \
8003 (__v2df)(__m128d)(C), (__mmask8)(U), \
8009 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8016#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
8017 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8018 (__v2df)(__m128d)(X), \
8019 (__v2df)(__m128d)(Y), (__mmask8)(U), \
8025 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8032#define _mm_fmsub_round_sd(A, B, C, R) \
8033 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8034 (__v2df)(__m128d)(B), \
8035 -(__v2df)(__m128d)(C), (__mmask8)-1, \
8038#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
8039 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8040 (__v2df)(__m128d)(A), \
8041 -(__v2df)(__m128d)(B), (__mmask8)(U), \
8047 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8054#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
8055 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8056 (__v2df)(__m128d)(B), \
8057 -(__v2df)(__m128d)(C), \
8058 (__mmask8)(U), (int)(R)))
8063 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8070#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
8071 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8072 (__v2df)(__m128d)(X), \
8073 (__v2df)(__m128d)(Y), \
8074 (__mmask8)(U), (int)(R)))
8079 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8086#define _mm_fnmadd_round_sd(A, B, C, R) \
8087 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8088 -(__v2df)(__m128d)(B), \
8089 (__v2df)(__m128d)(C), (__mmask8)-1, \
8092#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
8093 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8094 -(__v2df)(__m128d)(A), \
8095 (__v2df)(__m128d)(B), (__mmask8)(U), \
8101 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8108#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
8109 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8110 -(__v2df)(__m128d)(B), \
8111 (__v2df)(__m128d)(C), (__mmask8)(U), \
8117 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8124#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
8125 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8126 -(__v2df)(__m128d)(X), \
8127 (__v2df)(__m128d)(Y), (__mmask8)(U), \
8133 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8140#define _mm_fnmsub_round_sd(A, B, C, R) \
8141 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8142 -(__v2df)(__m128d)(B), \
8143 -(__v2df)(__m128d)(C), (__mmask8)-1, \
8146#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
8147 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8148 -(__v2df)(__m128d)(A), \
8149 -(__v2df)(__m128d)(B), (__mmask8)(U), \
8155 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8162#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
8163 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8164 -(__v2df)(__m128d)(B), \
8165 -(__v2df)(__m128d)(C), \
8172 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8179#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
8180 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8181 -(__v2df)(__m128d)(X), \
8182 (__v2df)(__m128d)(Y), \
8183 (__mmask8)(U), (int)(R)))
8185#define _mm512_permutex_pd(X, C) \
8186 ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)))
8188#define _mm512_mask_permutex_pd(W, U, X, C) \
8189 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8190 (__v8df)_mm512_permutex_pd((X), (C)), \
8191 (__v8df)(__m512d)(W)))
8193#define _mm512_maskz_permutex_pd(U, X, C) \
8194 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8195 (__v8df)_mm512_permutex_pd((X), (C)), \
8196 (__v8df)_mm512_setzero_pd()))
8198#define _mm512_permutex_epi64(X, C) \
8199 ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)))
8201#define _mm512_mask_permutex_epi64(W, U, X, C) \
8202 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8203 (__v8di)_mm512_permutex_epi64((X), (C)), \
8204 (__v8di)(__m512i)(W)))
8206#define _mm512_maskz_permutex_epi64(U, X, C) \
8207 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8208 (__v8di)_mm512_permutex_epi64((X), (C)), \
8209 (__v8di)_mm512_setzero_si512()))
8214 return (__m512d)__builtin_ia32_permvardf512((__v8df)
__Y, (__v8di) __X);
8220 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
8228 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
8236 return (__m512i)__builtin_ia32_permvardi512((__v8di)
__Y, (__v8di)__X);
8242 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
8251 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
8259 return (__m512)__builtin_ia32_permvarsf512((__v16sf)
__Y, (__v16si)__X);
8265 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8273 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8281 return (__m512i)__builtin_ia32_permvarsi512((__v16si)
__Y, (__v16si)__X);
8284#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8289 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
8298 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
8303#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8338 return (
unsigned char)__builtin_ia32_kortestchi(__A, __B);
8344 return (
unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8349 *__C = (
unsigned char)__builtin_ia32_kortestchi(__A, __B);
8350 return (
unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8371#define _kand_mask16 _mm512_kand
8372#define _kandn_mask16 _mm512_kandn
8373#define _knot_mask16 _mm512_knot
8374#define _kor_mask16 _mm512_kor
8375#define _kxnor_mask16 _mm512_kxnor
8376#define _kxor_mask16 _mm512_kxor
8378#define _kshiftli_mask16(A, I) \
8379 ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)))
8381#define _kshiftri_mask16(A, I) \
8382 ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)))
8386 return (
unsigned int)__builtin_ia32_kmovw((
__mmask16)__A);
8408 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)
__P);
8415 return (__m512i) __builtin_nontemporal_load((
const __v8di_aligned *)
__P);
8422 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)
__P);
8428 typedef __v16sf __v16sf_aligned
__attribute__((aligned(64)));
8429 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)
__P);
8435 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8443 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8452 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8460 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8469 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8477 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8486 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8494 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8500#define _mm_cmp_round_ss_mask(X, Y, P, R) \
8501 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8502 (__v4sf)(__m128)(Y), (int)(P), \
8503 (__mmask8)-1, (int)(R)))
8505#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8506 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8507 (__v4sf)(__m128)(Y), (int)(P), \
8508 (__mmask8)(M), (int)(R)))
8510#define _mm_cmp_ss_mask(X, Y, P) \
8511 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8512 (__v4sf)(__m128)(Y), (int)(P), \
8514 _MM_FROUND_CUR_DIRECTION))
8516#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8517 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8518 (__v4sf)(__m128)(Y), (int)(P), \
8520 _MM_FROUND_CUR_DIRECTION))
8522#define _mm_cmp_round_sd_mask(X, Y, P, R) \
8523 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8524 (__v2df)(__m128d)(Y), (int)(P), \
8525 (__mmask8)-1, (int)(R)))
8527#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8528 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8529 (__v2df)(__m128d)(Y), (int)(P), \
8530 (__mmask8)(M), (int)(R)))
8532#define _mm_cmp_sd_mask(X, Y, P) \
8533 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8534 (__v2df)(__m128d)(Y), (int)(P), \
8536 _MM_FROUND_CUR_DIRECTION))
8538#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8539 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8540 (__v2df)(__m128d)(Y), (int)(P), \
8542 _MM_FROUND_CUR_DIRECTION))
8605 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8606 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8612 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8620 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8628 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8629 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8635 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8643 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8651 return __builtin_ia32_selectss_128(__U,
_mm_move_ss(__A, __B), __W);
8657 return __builtin_ia32_selectss_128(__U,
_mm_move_ss(__A, __B),
8664 return __builtin_ia32_selectsd_128(__U,
_mm_move_sd(__A, __B), __W);
8670 return __builtin_ia32_selectsd_128(__U,
_mm_move_sd(__A, __B),
8677 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8683 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8689 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8693 return (__m128) __builtin_ia32_loadss128_mask ((
const __v4sf *) __A, src, __U & 1);
8699 return (__m128)__builtin_ia32_loadss128_mask ((
const __v4sf *) __A,
8707 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8711 return (__m128d) __builtin_ia32_loadsd128_mask ((
const __v2df *) __A, src, __U & 1);
8717 return (__m128d) __builtin_ia32_loadsd128_mask ((
const __v2df *) __A,
8722#define _mm512_shuffle_epi32(A, I) \
8723 ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)))
8725#define _mm512_mask_shuffle_epi32(W, U, A, I) \
8726 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8727 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8728 (__v16si)(__m512i)(W)))
8730#define _mm512_maskz_shuffle_epi32(U, A, I) \
8731 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8732 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8733 (__v16si)_mm512_setzero_si512()))
8738 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8746 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8754 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8762 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8770 return (__m512d) __builtin_ia32_expandloaddf512_mask ((
const __v8df *)
__P,
8778 return (__m512d) __builtin_ia32_expandloaddf512_mask ((
const __v8df *)
__P,
8786 return (__m512i) __builtin_ia32_expandloaddi512_mask ((
const __v8di *)
__P,
8794 return (__m512i) __builtin_ia32_expandloaddi512_mask ((
const __v8di *)
__P,
8802 return (__m512) __builtin_ia32_expandloadsf512_mask ((
const __v16sf *)
__P,
8810 return (__m512) __builtin_ia32_expandloadsf512_mask ((
const __v16sf *)
__P,
8818 return (__m512i) __builtin_ia32_expandloadsi512_mask ((
const __v16si *)
__P,
8826 return (__m512i) __builtin_ia32_expandloadsi512_mask ((
const __v16si *)
__P,
8834 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8842 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8850 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8858 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8863#define _mm512_cvt_roundps_pd(A, R) \
8864 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8865 (__v8df)_mm512_undefined_pd(), \
8866 (__mmask8)-1, (int)(R)))
8868#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8869 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8870 (__v8df)(__m512d)(W), \
8871 (__mmask8)(U), (int)(R)))
8873#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8874 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8875 (__v8df)_mm512_setzero_pd(), \
8876 (__mmask8)(U), (int)(R)))
8881 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8887 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
8895 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
8914 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U, (__v8df)__A,
8920 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U, (__v8df)__A,
8926 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U, (__v16sf)__A,
8932 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U, (__v16sf)__A,
8939 __builtin_ia32_compressstoredf512_mask ((__v8df *)
__P, (__v8df) __A,
8946 __builtin_ia32_compressstoredi512_mask ((__v8di *)
__P, (__v8di) __A,
8953 __builtin_ia32_compressstoresf512_mask ((__v16sf *)
__P, (__v16sf) __A,
8960 __builtin_ia32_compressstoresi512_mask ((__v16si *)
__P, (__v16si) __A,
8964#define _mm_cvt_roundsd_ss(A, B, R) \
8965 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8966 (__v2df)(__m128d)(B), \
8967 (__v4sf)_mm_undefined_ps(), \
8968 (__mmask8)-1, (int)(R)))
8970#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
8971 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8972 (__v2df)(__m128d)(B), \
8973 (__v4sf)(__m128)(W), \
8974 (__mmask8)(U), (int)(R)))
8976#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
8977 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8978 (__v2df)(__m128d)(B), \
8979 (__v4sf)_mm_setzero_ps(), \
8980 (__mmask8)(U), (int)(R)))
8985 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8994 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9000#define _mm_cvtss_i32 _mm_cvtss_si32
9001#define _mm_cvtsd_i32 _mm_cvtsd_si32
9002#define _mm_cvti32_sd _mm_cvtsi32_sd
9003#define _mm_cvti32_ss _mm_cvtsi32_ss
9005#define _mm_cvtss_i64 _mm_cvtss_si64
9006#define _mm_cvtsd_i64 _mm_cvtsd_si64
9007#define _mm_cvti64_sd _mm_cvtsi64_sd
9008#define _mm_cvti64_ss _mm_cvtsi64_ss
9012#define _mm_cvt_roundi64_sd(A, B, R) \
9013 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9016#define _mm_cvt_roundsi64_sd(A, B, R) \
9017 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9021#define _mm_cvt_roundsi32_ss(A, B, R) \
9022 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
9024#define _mm_cvt_roundi32_ss(A, B, R) \
9025 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
9028#define _mm_cvt_roundsi64_ss(A, B, R) \
9029 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9032#define _mm_cvt_roundi64_ss(A, B, R) \
9033 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9037#define _mm_cvt_roundss_sd(A, B, R) \
9038 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9039 (__v4sf)(__m128)(B), \
9040 (__v2df)_mm_undefined_pd(), \
9041 (__mmask8)-1, (int)(R)))
9043#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
9044 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9045 (__v4sf)(__m128)(B), \
9046 (__v2df)(__m128d)(W), \
9047 (__mmask8)(U), (int)(R)))
9049#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
9050 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9051 (__v4sf)(__m128)(B), \
9052 (__v2df)_mm_setzero_pd(), \
9053 (__mmask8)(U), (int)(R)))
9058 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9067 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9081#define _mm_cvt_roundu64_sd(A, B, R) \
9082 ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9083 (unsigned long long)(B), (int)(R)))
9086_mm_cvtu64_sd (__m128d __A,
unsigned long long __B)
9093#define _mm_cvt_roundu32_ss(A, B, R) \
9094 ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9105#define _mm_cvt_roundu64_ss(A, B, R) \
9106 ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9107 (unsigned long long)(B), (int)(R)))
9110_mm_cvtu64_ss (__m128 __A,
unsigned long long __B)
9120 return (__m512i) __builtin_ia32_selectd_512(__M,
9128 return (__m512i) __builtin_ia32_selectq_512(__M,
9134 char __e63,
char __e62,
char __e61,
char __e60,
char __e59,
char __e58,
9135 char __e57,
char __e56,
char __e55,
char __e54,
char __e53,
char __e52,
9136 char __e51,
char __e50,
char __e49,
char __e48,
char __e47,
char __e46,
9137 char __e45,
char __e44,
char __e43,
char __e42,
char __e41,
char __e40,
9138 char __e39,
char __e38,
char __e37,
char __e36,
char __e35,
char __e34,
9139 char __e33,
char __e32,
char __e31,
char __e30,
char __e29,
char __e28,
9140 char __e27,
char __e26,
char __e25,
char __e24,
char __e23,
char __e22,
9141 char __e21,
char __e20,
char __e19,
char __e18,
char __e17,
char __e16,
9142 char __e15,
char __e14,
char __e13,
char __e12,
char __e11,
char __e10,
9143 char __e9,
char __e8,
char __e7,
char __e6,
char __e5,
char __e4,
char __e3,
9144 char __e2,
char __e1,
char __e0) {
9146 return __extension__ (__m512i)(__v64qi)
9147 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9148 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9149 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9150 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9151 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9152 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9153 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9154 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9158 short __e31,
short __e30,
short __e29,
short __e28,
short __e27,
9159 short __e26,
short __e25,
short __e24,
short __e23,
short __e22,
9160 short __e21,
short __e20,
short __e19,
short __e18,
short __e17,
9161 short __e16,
short __e15,
short __e14,
short __e13,
short __e12,
9162 short __e11,
short __e10,
short __e9,
short __e8,
short __e7,
short __e6,
9163 short __e5,
short __e4,
short __e3,
short __e2,
short __e1,
short __e0) {
9164 return __extension__ (__m512i)(__v32hi)
9165 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9166 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9167 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9168 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9172 int __A,
int __B,
int __C,
int __D,
int __E,
int __F,
int __G,
int __H,
9173 int __I,
int __J,
int __K,
int __L,
int __M,
int __N,
int __O,
int __P) {
9174 return __extension__ (__m512i)(__v16si)
9175 {
__P, __O, __N, __M,
__L, __K, __J, __I,
9176 __H, __G, __F, __E,
__D, __C, __B, __A };
9179#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
9180 e8,e9,e10,e11,e12,e13,e14,e15) \
9181 _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9182 (e5),(e4),(e3),(e2),(e1),(e0))
9186 long long __E,
long long __F,
long long __G,
long long __H) {
9187 return __extension__ (__m512i) (__v8di)
9188 { __H, __G, __F, __E,
__D, __C, __B, __A };
9191#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
9192 _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9196 double __F,
double __G,
double __H) {
9197 return __extension__ (__m512d)
9198 { __H, __G, __F, __E,
__D, __C, __B, __A };
9201#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
9202 _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9206 float __G,
float __H,
float __I,
float __J,
float __K,
float __L,
9207 float __M,
float __N,
float __O,
float __P) {
9208 return __extension__ (__m512)
9209 {
__P, __O, __N, __M,
__L, __K, __J, __I,
9210 __H, __G, __F, __E,
__D, __C, __B, __A };
9213#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9214 _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9215 (e4),(e3),(e2),(e1),(e0))
9255 return __builtin_reduce_add((__v8di)__W);
9260 return __builtin_reduce_mul((__v8di)__W);
9265 return __builtin_reduce_and((__v8di)__W);
9270 return __builtin_reduce_or((__v8di)__W);
9276 return __builtin_reduce_add((__v8di)__W);
9282 return __builtin_reduce_mul((__v8di)__W);
9288 return __builtin_reduce_and((__v8di)__W);
9294 return __builtin_reduce_or((__v8di)__W);
9301 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
9305 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
9311 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
9317 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
9322 return __builtin_reduce_add((__v16si)__W);
9327 return __builtin_reduce_mul((__v16si)__W);
9332 return __builtin_reduce_and((__v16si)__W);
9337 return __builtin_reduce_or((__v16si)__W);
9343 return __builtin_reduce_add((__v16si)__W);
9349 return __builtin_reduce_mul((__v16si)__W);
9355 return __builtin_reduce_and((__v16si)__W);
9361 return __builtin_reduce_or((__v16si)__W);
9366 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9371 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9377 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9383 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9388 return __builtin_reduce_max((__v8di)__V);
9393 return __builtin_reduce_max((__v8du)__V);
9398 return __builtin_reduce_min((__v8di)__V);
9403 return __builtin_reduce_min((__v8du)__V);
9409 return __builtin_reduce_max((__v8di)__V);
9415 return __builtin_reduce_max((__v8du)__V);
9421 return __builtin_reduce_min((__v8di)__V);
9427 return __builtin_reduce_min((__v8du)__V);
9431 return __builtin_reduce_max((__v16si)__V);
9436 return __builtin_reduce_max((__v16su)__V);
9441 return __builtin_reduce_min((__v16si)__V);
9446 return __builtin_reduce_min((__v16su)__V);
9452 return __builtin_reduce_max((__v16si)__V);
9458 return __builtin_reduce_max((__v16su)__V);
9464 return __builtin_reduce_min((__v16si)__V);
9470 return __builtin_reduce_min((__v16su)__V);
9475 return __builtin_ia32_reduce_fmax_pd512(__V);
9480 return __builtin_ia32_reduce_fmin_pd512(__V);
9486 return __builtin_ia32_reduce_fmax_pd512(__V);
9492 return __builtin_ia32_reduce_fmin_pd512(__V);
9497 return __builtin_ia32_reduce_fmax_ps512(__V);
9502 return __builtin_ia32_reduce_fmin_ps512(__V);
9508 return __builtin_ia32_reduce_fmax_ps512(__V);
9514 return __builtin_ia32_reduce_fmin_ps512(__V);
9530 __v16si
__b = (__v16si)__A;
9549#define _mm512_i32logather_pd(vindex, base_addr, scale) \
9550 _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9573#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale) \
9574 _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex), \
9575 (base_addr), (scale))
9592#define _mm512_i32logather_epi64(vindex, base_addr, scale) \
9593 _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9615#define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale) \
9616 _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex), \
9617 (base_addr), (scale))
9633#define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale) \
9634 _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale))
9654#define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale) \
9655 _mm512_mask_i32scatter_pd((base_addr), (mask), \
9656 _mm512_castsi512_si256(vindex), (v1), (scale))
9672#define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale) \
9673 _mm512_i32scatter_epi64((base_addr), \
9674 _mm512_castsi512_si256(vindex), (v1), (scale))
9693#define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale) \
9694 _mm512_mask_i32scatter_epi64((base_addr), (mask), \
9695 _mm512_castsi512_si256(vindex), (v1), (scale))
9697#undef __DEFAULT_FN_ATTRS512
9698#undef __DEFAULT_FN_ATTRS128
9699#undef __DEFAULT_FN_ATTRS
9700#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
9701#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
9702#undef __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ vector float vector float __b
static __inline__ uint32_t volatile uint32_t * __p
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi64(long long __d)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castpd_ps(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512(void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi64(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f32x4(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castps_si512(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi64(__m512i __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi64(__m512i __W)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi64(__m512i __A, unsigned int __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov(__mmask16 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_cvtsi512_si32(__m512i __A)
Moves the least significant 32 bits of a vector of [16 x i32] to a 32-bit signed integer value.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
#define __DEFAULT_FN_ATTRS512_CONSTEXPR
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512(__m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi32(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi64(__m512i __W)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_ps(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS _store_mask16(__mmask16 *__A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd(void *__P, __m512d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss(__mmask8 __U, const float *__A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_si512(__m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi32(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m256d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd256(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64(void *__P, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi32(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
#define __DEFAULT_FN_ATTRS512
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi64(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask2int(__mmask16 __a)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movehdup_ps(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi64(__m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
#define __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_pd(double __w)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi64(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastd_epi32(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_ps(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i64x4(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castps_pd(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi8(char __w)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32(__m512i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_pslo(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline __m128i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si128(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_pd(__m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32lo_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi64(__m128i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_min_ps(__m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
char __v64qi __attribute__((__vector_size__(64)))
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi64(__m128i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512(void *__P, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movedup_pd(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_max_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m256 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps256(__m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_pd(__m512d __a, __m512d __b)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtps_pd(__m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastq_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64(void *__P, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _load_mask16(__mmask16 *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
_MM_TERNLOG_ENUM
A helper to represent the ternary logic operations among vector A, B and C.
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi32(__m512i __W)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi32(__m512i __A, unsigned int __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps(void *__P, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_int2mask(int __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
#define __DEFAULT_FN_ATTRS128
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castpd_si512(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_ps(__m512 __a, __m512 __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi32(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512(void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi32(__m512i __A, unsigned int __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestc(__mmask16 __A, __mmask16 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32lo_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu64(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castsi512_ps(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_ps(float __w)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32(void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu64(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f64x4(__m256d __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd(__m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi32(int __s)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64(void const *__P)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32(void *__P, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_pd(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastss_ps(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _cvtu32_mask16(unsigned int __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ps(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i32x4(__m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi32(__m512i __A, __m128i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32(__m128 __A)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi32(__m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32(void *__P, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_pd(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi32(__m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si256(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_si512(void)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_si512(__m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu32(__m512i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512(__m256i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS _mm512_knot(__mmask16 __M)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mullox_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m128d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd128(__m512d __a)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi32(__m512i __W)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask16_u32(__mmask16 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline __m128 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps128(__m512 __a)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi16(short __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castsi512_pd(__m512i __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_moveldup_ps(__m512 __A)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi32(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ void int __a
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
static __inline__ void short __D
static __inline__ void const void * __src
__inline unsigned int unsigned int unsigned int * __P
__inline unsigned int unsigned int __Y
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...