10#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
13#ifndef __AVX512FINTRIN_H
14#define __AVX512FINTRIN_H
24typedef unsigned char __v64qu
__attribute__((__vector_size__(64)));
25typedef unsigned short __v32hu
__attribute__((__vector_size__(64)));
26typedef unsigned long long __v8du
__attribute__((__vector_size__(64)));
27typedef unsigned int __v16su
__attribute__((__vector_size__(64)));
31typedef signed char __v64qs
__attribute__((__vector_size__(64)));
33typedef float __m512
__attribute__((__vector_size__(64), __aligned__(64)));
34typedef double __m512d
__attribute__((__vector_size__(64), __aligned__(64)));
35typedef long long __m512i
__attribute__((__vector_size__(64), __aligned__(64)));
37typedef float __m512_u
__attribute__((__vector_size__(64), __aligned__(1)));
38typedef double __m512d_u
__attribute__((__vector_size__(64), __aligned__(1)));
39typedef long long __m512i_u
__attribute__((__vector_size__(64), __aligned__(1)));
45#define _MM_FROUND_TO_NEAREST_INT 0x00
46#define _MM_FROUND_TO_NEAREST_TIES_EVEN 0x00
47#define _MM_FROUND_TO_NEG_INF 0x01
48#define _MM_FROUND_TO_POS_INF 0x02
49#define _MM_FROUND_TO_ZERO 0x03
50#define _MM_FROUND_CUR_DIRECTION 0x04
60#define _MM_CMPINT_GE _MM_CMPINT_NLT
62#define _MM_CMPINT_GT _MM_CMPINT_NLE
171#define __DEFAULT_FN_ATTRS512 \
172 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \
173 __min_vector_width__(512)))
174#define __DEFAULT_FN_ATTRS128 \
175 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \
176 __min_vector_width__(128)))
177#define __DEFAULT_FN_ATTRS \
178 __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
180#if defined(__cplusplus) && (__cplusplus >= 201103L)
181#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
182#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
183#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
185#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
186#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
187#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
194 return __extension__(__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0};
197#define _mm512_setzero_epi32 _mm512_setzero_si512
202 return (__m512d)__builtin_ia32_undef512();
208 return (__m512)__builtin_ia32_undef512();
212 return (__m512)__builtin_ia32_undef512();
218 return (__m512i)__builtin_ia32_undef512();
223 return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
229 return (__m512i)__builtin_ia32_selectd_512(__M,
236 return (__m512i)__builtin_ia32_selectd_512(__M,
243 return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
244 0, 0, 0, 0, 0, 0, 0, 0);
249 return (__m512i)__builtin_ia32_selectq_512(
255 return (__m512i)__builtin_ia32_selectq_512(__M,
261 return __extension__(__m512){0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
262 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
265#define _mm512_setzero _mm512_setzero_ps
269 return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
275 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
276 __w, __w, __w, __w, __w, __w, __w, __w };
282 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
288 return __extension__ (__m512i)(__v64qi){
289 __w, __w, __w, __w, __w, __w, __w, __w,
290 __w, __w, __w, __w, __w, __w, __w, __w,
291 __w, __w, __w, __w, __w, __w, __w, __w,
292 __w, __w, __w, __w, __w, __w, __w, __w,
293 __w, __w, __w, __w, __w, __w, __w, __w,
294 __w, __w, __w, __w, __w, __w, __w, __w,
295 __w, __w, __w, __w, __w, __w, __w, __w,
296 __w, __w, __w, __w, __w, __w, __w, __w };
302 return __extension__ (__m512i)(__v32hi){
303 __w, __w, __w, __w, __w, __w, __w, __w,
304 __w, __w, __w, __w, __w, __w, __w, __w,
305 __w, __w, __w, __w, __w, __w, __w, __w,
306 __w, __w, __w, __w, __w, __w, __w, __w };
312 return __extension__ (__m512i)(__v16si){
313 __s, __s, __s, __s, __s, __s, __s, __s,
314 __s, __s, __s, __s, __s, __s, __s, __s };
319 return (__m512i)__builtin_ia32_selectd_512(__M,
327 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
332 return (__m512i)__builtin_ia32_selectq_512(__M,
339 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
340 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
345 return __extension__ (__m512i)(__v16si)
346 {
__D, __C, __B, __A,
__D, __C, __B, __A,
347 __D, __C, __B, __A,
__D, __C, __B, __A };
352 return __extension__ (__m512i) (__v8di)
353 {
__D, __C, __B, __A,
__D, __C, __B, __A };
358 return __extension__ (__m512d)
359 {
__D, __C, __B, __A,
__D, __C, __B, __A };
364 return __extension__ (__m512)
365 {
__D, __C, __B, __A,
__D, __C, __B, __A,
366 __D, __C, __B, __A,
__D, __C, __B, __A };
391 return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
392 0, 0, 0, 0, 0, 0, 0, 0);
400 return __builtin_shufflevector(
__a, __builtin_nondeterministic_value(
__a), 0,
401 1, 2, 3, 4, 5, 6, 7);
407 return __builtin_shufflevector(
__a, __builtin_nondeterministic_value(
__a), 0,
408 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
414 return __builtin_shufflevector(
__a,
__a, 0, 1);
420 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
426 return __builtin_shufflevector(
__a,
__a, 0, 1, 2, 3);
431 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
437 return (__m512) (__A);
443 return (__m512i) (__A);
449 __m256d __B = __builtin_nondeterministic_value(__B);
450 return __builtin_shufflevector(
451 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
452 __B, 0, 1, 2, 3, 4, 5, 6, 7);
458 return (__m512d) (__A);
464 return (__m512i) (__A);
470 __m256 __B = __builtin_nondeterministic_value(__B);
471 return __builtin_shufflevector(
472 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7),
473 __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
479 __m256i __B = __builtin_nondeterministic_value(__B);
480 return __builtin_shufflevector(
481 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
482 __B, 0, 1, 2, 3, 4, 5, 6, 7);
488 return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7);
494 return (__m512) (__A);
500 return (__m512d) (__A);
506 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
511 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
539 return __builtin_shufflevector((__v2df)
__a, (__v2df)
_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
557 return __builtin_shufflevector((__v4df)
__a, (__v4df)
_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
574 return __builtin_shufflevector((__v4sf)
__a, (__v4sf)
_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
591 return __builtin_shufflevector((__v8sf)
__a, (__v8sf)
_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
608 return __builtin_shufflevector((__v2di)
__a, (__v2di)
_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
632 return (__m512i)((__v16su)
__a & (__v16su)
__b);
637 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__k,
652 return (__m512i)((__v8du)
__a & (__v8du)
__b);
657 return (__m512i)__builtin_ia32_selectq_512(
671 return (__m512i)(~(__v8du)__A & (__v8du)__B);
677 return (__m512i)(~(__v16su)__A & (__v16su)__B);
683 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
698 return (__m512i)(~(__v8du)__A & (__v8du)__B);
704 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
719 return (__m512i)((__v16su)
__a | (__v16su)
__b);
725 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__k,
739 return (__m512i)((__v8du)
__a | (__v8du)
__b);
745 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__k,
759 return (__m512i)((__v16su)
__a ^ (__v16su)
__b);
765 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__k,
779 return (__m512i)((__v8du)
__a ^ (__v8du)
__b);
785 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__k,
799 return (__m512i)((__v8du)
__a & (__v8du)
__b);
805 return (__m512i)((__v8du)
__a | (__v8du)
__b);
811 return (__m512i)((__v8du)
__a ^ (__v8du)
__b);
818 return (__m512d)((__v8df)
__a + (__v8df)
__b);
823 return (__m512)((__v16sf)
__a + (__v16sf)
__b);
828 return (__m512d)((__v8df)
__a * (__v8df)
__b);
833 return (__m512)((__v16sf)
__a * (__v16sf)
__b);
838 return (__m512d)((__v8df)
__a - (__v8df)
__b);
843 return (__m512)((__v16sf)
__a - (__v16sf)
__b);
848 return (__m512i) ((__v8du) __A + (__v8du) __B);
853 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
860 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
867 return (__m512i) ((__v8du) __A - (__v8du) __B);
872 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
879 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
886 return (__m512i) ((__v16su) __A + (__v16su) __B);
891 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
898 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
905 return (__m512i) ((__v16su) __A - (__v16su) __B);
910 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
917 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
922#define _mm512_max_round_pd(A, B, R) \
923 ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
924 (__v8df)(__m512d)(B), (int)(R)))
926#define _mm512_mask_max_round_pd(W, U, A, B, R) \
927 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
928 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
931#define _mm512_maskz_max_round_pd(U, A, B, R) \
932 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
933 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
934 (__v8df)_mm512_setzero_pd()))
936static __inline__ __m512d
938 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
944 return (__m512d)__builtin_ia32_selectpd_512(__U,
951 return (__m512d)__builtin_ia32_selectpd_512(__U,
956#define _mm512_max_round_ps(A, B, R) \
957 ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
958 (__v16sf)(__m512)(B), (int)(R)))
960#define _mm512_mask_max_round_ps(W, U, A, B, R) \
961 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
962 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
965#define _mm512_maskz_max_round_ps(U, A, B, R) \
966 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
967 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
968 (__v16sf)_mm512_setzero_ps()))
970static __inline__ __m512
972 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
978 return (__m512)__builtin_ia32_selectps_512(__U,
985 return (__m512)__builtin_ia32_selectps_512(__U,
992 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1001 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1008#define _mm_max_round_ss(A, B, R) \
1009 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1010 (__v4sf)(__m128)(B), \
1011 (__v4sf)_mm_setzero_ps(), \
1012 (__mmask8)-1, (int)(R)))
1014#define _mm_mask_max_round_ss(W, U, A, B, R) \
1015 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1016 (__v4sf)(__m128)(B), \
1017 (__v4sf)(__m128)(W), (__mmask8)(U), \
1020#define _mm_maskz_max_round_ss(U, A, B, R) \
1021 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1022 (__v4sf)(__m128)(B), \
1023 (__v4sf)_mm_setzero_ps(), \
1024 (__mmask8)(U), (int)(R)))
1028 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1037 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1044#define _mm_max_round_sd(A, B, R) \
1045 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1046 (__v2df)(__m128d)(B), \
1047 (__v2df)_mm_setzero_pd(), \
1048 (__mmask8)-1, (int)(R)))
1050#define _mm_mask_max_round_sd(W, U, A, B, R) \
1051 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1052 (__v2df)(__m128d)(B), \
1053 (__v2df)(__m128d)(W), \
1054 (__mmask8)(U), (int)(R)))
1056#define _mm_maskz_max_round_sd(U, A, B, R) \
1057 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1058 (__v2df)(__m128d)(B), \
1059 (__v2df)_mm_setzero_pd(), \
1060 (__mmask8)(U), (int)(R)))
1062static __inline __m512i
1064 return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
1069 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1076 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1083 return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
1088 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1095 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1102 return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
1107 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1114 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1121 return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
1126 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1133 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1138#define _mm512_min_round_pd(A, B, R) \
1139 ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1140 (__v8df)(__m512d)(B), (int)(R)))
1142#define _mm512_mask_min_round_pd(W, U, A, B, R) \
1143 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1144 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1147#define _mm512_maskz_min_round_pd(U, A, B, R) \
1148 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1149 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1150 (__v8df)_mm512_setzero_pd()))
1152static __inline__ __m512d
1154 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1160 return (__m512d)__builtin_ia32_selectpd_512(__U,
1167 return (__m512d)__builtin_ia32_selectpd_512(__U,
1172#define _mm512_min_round_ps(A, B, R) \
1173 ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1174 (__v16sf)(__m512)(B), (int)(R)))
1176#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1177 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1178 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1181#define _mm512_maskz_min_round_ps(U, A, B, R) \
1182 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1183 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1184 (__v16sf)_mm512_setzero_ps()))
1186static __inline__ __m512
1188 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1194 return (__m512)__builtin_ia32_selectps_512(__U,
1201 return (__m512)__builtin_ia32_selectps_512(__U,
1208 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1217 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1224#define _mm_min_round_ss(A, B, R) \
1225 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1226 (__v4sf)(__m128)(B), \
1227 (__v4sf)_mm_setzero_ps(), \
1228 (__mmask8)-1, (int)(R)))
1230#define _mm_mask_min_round_ss(W, U, A, B, R) \
1231 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1232 (__v4sf)(__m128)(B), \
1233 (__v4sf)(__m128)(W), (__mmask8)(U), \
1236#define _mm_maskz_min_round_ss(U, A, B, R) \
1237 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1238 (__v4sf)(__m128)(B), \
1239 (__v4sf)_mm_setzero_ps(), \
1240 (__mmask8)(U), (int)(R)))
1244 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1253 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1260#define _mm_min_round_sd(A, B, R) \
1261 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1262 (__v2df)(__m128d)(B), \
1263 (__v2df)_mm_setzero_pd(), \
1264 (__mmask8)-1, (int)(R)))
1266#define _mm_mask_min_round_sd(W, U, A, B, R) \
1267 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1268 (__v2df)(__m128d)(B), \
1269 (__v2df)(__m128d)(W), \
1270 (__mmask8)(U), (int)(R)))
1272#define _mm_maskz_min_round_sd(U, A, B, R) \
1273 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1274 (__v2df)(__m128d)(B), \
1275 (__v2df)_mm_setzero_pd(), \
1276 (__mmask8)(U), (int)(R)))
1278static __inline __m512i
1280 return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
1285 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1292 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1299 return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
1304 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1311 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1318 return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
1323 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1330 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1337 return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
1342 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1349 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1356 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si)
__Y);
1361 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1368 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1375 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)
__Y);
1380 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1387 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1394 return (__m512i) ((__v16su) __A * (__v16su) __B);
1399 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1406 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1413 return (__m512i) ((__v8du) __A * (__v8du) __B);
1418 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
1423#define _mm512_sqrt_round_pd(A, R) \
1424 ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)))
1426#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1427 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1428 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1429 (__v8df)(__m512d)(W)))
1431#define _mm512_maskz_sqrt_round_pd(U, A, R) \
1432 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1433 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1434 (__v8df)_mm512_setzero_pd()))
1437 return (__m512d)__builtin_elementwise_sqrt((__v8df)__A);
1442 return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)
_mm512_sqrt_pd(__A),
1448 return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)
_mm512_sqrt_pd(__A),
1452#define _mm512_sqrt_round_ps(A, R) \
1453 ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)))
1455#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1456 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1457 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1458 (__v16sf)(__m512)(W)))
1460#define _mm512_maskz_sqrt_round_ps(U, A, R) \
1461 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1462 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1463 (__v16sf)_mm512_setzero_ps()))
1466 return (__m512)__builtin_elementwise_sqrt((__v16sf)__A);
1471 return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)
_mm512_sqrt_ps(__A),
1477 return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)
_mm512_sqrt_ps(__A),
1484 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1492 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1500 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1509 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1518 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1526 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1535 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1545 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1554 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1563 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1573 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1582 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1591 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1600 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1608 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1617 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1626 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1634 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1643 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1653 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1662 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1671 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1681 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1690 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1699 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1701 (__v16sf) __A, (
unsigned short)-1,
1708 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1717 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1719 (__v8df) __A, (
unsigned char)-1,
1726 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1735 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1744 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1746 (__v16sf) __A, (
unsigned short)-1,
1753 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1755 (__v8df) __A, (
unsigned char)-1,
1762 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1770 return (__m512i)__builtin_elementwise_abs((__v8di)__A);
1775 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
1782 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
1789 return (__m512i)__builtin_elementwise_abs((__v16si) __A);
1794 return (__m512i)__builtin_ia32_selectd_512(__U,
1801 return (__m512i)__builtin_ia32_selectd_512(__U,
1809 return __builtin_ia32_selectss_128(__U, __A, __W);
1818#define _mm_add_round_ss(A, B, R) \
1819 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1820 (__v4sf)(__m128)(B), \
1821 (__v4sf)_mm_setzero_ps(), \
1822 (__mmask8)-1, (int)(R)))
1824#define _mm_mask_add_round_ss(W, U, A, B, R) \
1825 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1826 (__v4sf)(__m128)(B), \
1827 (__v4sf)(__m128)(W), (__mmask8)(U), \
1830#define _mm_maskz_add_round_ss(U, A, B, R) \
1831 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1832 (__v4sf)(__m128)(B), \
1833 (__v4sf)_mm_setzero_ps(), \
1834 (__mmask8)(U), (int)(R)))
1839 return __builtin_ia32_selectsd_128(__U, __A, __W);
1847#define _mm_add_round_sd(A, B, R) \
1848 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1849 (__v2df)(__m128d)(B), \
1850 (__v2df)_mm_setzero_pd(), \
1851 (__mmask8)-1, (int)(R)))
1853#define _mm_mask_add_round_sd(W, U, A, B, R) \
1854 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1855 (__v2df)(__m128d)(B), \
1856 (__v2df)(__m128d)(W), \
1857 (__mmask8)(U), (int)(R)))
1859#define _mm_maskz_add_round_sd(U, A, B, R) \
1860 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1861 (__v2df)(__m128d)(B), \
1862 (__v2df)_mm_setzero_pd(), \
1863 (__mmask8)(U), (int)(R)))
1867 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
1874 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
1881 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
1888 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
1893#define _mm512_add_round_pd(A, B, R) \
1894 ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1895 (__v8df)(__m512d)(B), (int)(R)))
1897#define _mm512_mask_add_round_pd(W, U, A, B, R) \
1898 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1899 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1900 (__v8df)(__m512d)(W)))
1902#define _mm512_maskz_add_round_pd(U, A, B, R) \
1903 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1904 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1905 (__v8df)_mm512_setzero_pd()))
1907#define _mm512_add_round_ps(A, B, R) \
1908 ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
1909 (__v16sf)(__m512)(B), (int)(R)))
1911#define _mm512_mask_add_round_ps(W, U, A, B, R) \
1912 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1913 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1914 (__v16sf)(__m512)(W)))
1916#define _mm512_maskz_add_round_ps(U, A, B, R) \
1917 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1918 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1919 (__v16sf)_mm512_setzero_ps()))
1924 return __builtin_ia32_selectss_128(__U, __A, __W);
1932#define _mm_sub_round_ss(A, B, R) \
1933 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1934 (__v4sf)(__m128)(B), \
1935 (__v4sf)_mm_setzero_ps(), \
1936 (__mmask8)-1, (int)(R)))
1938#define _mm_mask_sub_round_ss(W, U, A, B, R) \
1939 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1940 (__v4sf)(__m128)(B), \
1941 (__v4sf)(__m128)(W), (__mmask8)(U), \
1944#define _mm_maskz_sub_round_ss(U, A, B, R) \
1945 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1946 (__v4sf)(__m128)(B), \
1947 (__v4sf)_mm_setzero_ps(), \
1948 (__mmask8)(U), (int)(R)))
1953 return __builtin_ia32_selectsd_128(__U, __A, __W);
1962#define _mm_sub_round_sd(A, B, R) \
1963 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1964 (__v2df)(__m128d)(B), \
1965 (__v2df)_mm_setzero_pd(), \
1966 (__mmask8)-1, (int)(R)))
1968#define _mm_mask_sub_round_sd(W, U, A, B, R) \
1969 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1970 (__v2df)(__m128d)(B), \
1971 (__v2df)(__m128d)(W), \
1972 (__mmask8)(U), (int)(R)))
1974#define _mm_maskz_sub_round_sd(U, A, B, R) \
1975 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1976 (__v2df)(__m128d)(B), \
1977 (__v2df)_mm_setzero_pd(), \
1978 (__mmask8)(U), (int)(R)))
1982 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
1989 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
1996 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2003 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2008#define _mm512_sub_round_pd(A, B, R) \
2009 ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2010 (__v8df)(__m512d)(B), (int)(R)))
2012#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2013 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2014 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2015 (__v8df)(__m512d)(W)))
2017#define _mm512_maskz_sub_round_pd(U, A, B, R) \
2018 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2019 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2020 (__v8df)_mm512_setzero_pd()))
2022#define _mm512_sub_round_ps(A, B, R) \
2023 ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2024 (__v16sf)(__m512)(B), (int)(R)))
2026#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2027 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2028 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2029 (__v16sf)(__m512)(W)))
2031#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2032 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2033 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2034 (__v16sf)_mm512_setzero_ps()))
2039 return __builtin_ia32_selectss_128(__U, __A, __W);
2047#define _mm_mul_round_ss(A, B, R) \
2048 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2049 (__v4sf)(__m128)(B), \
2050 (__v4sf)_mm_setzero_ps(), \
2051 (__mmask8)-1, (int)(R)))
2053#define _mm_mask_mul_round_ss(W, U, A, B, R) \
2054 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2055 (__v4sf)(__m128)(B), \
2056 (__v4sf)(__m128)(W), (__mmask8)(U), \
2059#define _mm_maskz_mul_round_ss(U, A, B, R) \
2060 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2061 (__v4sf)(__m128)(B), \
2062 (__v4sf)_mm_setzero_ps(), \
2063 (__mmask8)(U), (int)(R)))
2068 return __builtin_ia32_selectsd_128(__U, __A, __W);
2077#define _mm_mul_round_sd(A, B, R) \
2078 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2079 (__v2df)(__m128d)(B), \
2080 (__v2df)_mm_setzero_pd(), \
2081 (__mmask8)-1, (int)(R)))
2083#define _mm_mask_mul_round_sd(W, U, A, B, R) \
2084 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2085 (__v2df)(__m128d)(B), \
2086 (__v2df)(__m128d)(W), \
2087 (__mmask8)(U), (int)(R)))
2089#define _mm_maskz_mul_round_sd(U, A, B, R) \
2090 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2091 (__v2df)(__m128d)(B), \
2092 (__v2df)_mm_setzero_pd(), \
2093 (__mmask8)(U), (int)(R)))
2097 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2104 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2111 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2118 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2123#define _mm512_mul_round_pd(A, B, R) \
2124 ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2125 (__v8df)(__m512d)(B), (int)(R)))
2127#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2128 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2129 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2130 (__v8df)(__m512d)(W)))
2132#define _mm512_maskz_mul_round_pd(U, A, B, R) \
2133 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2134 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2135 (__v8df)_mm512_setzero_pd()))
2137#define _mm512_mul_round_ps(A, B, R) \
2138 ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2139 (__v16sf)(__m512)(B), (int)(R)))
2141#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2142 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2143 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2144 (__v16sf)(__m512)(W)))
2146#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2147 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2148 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2149 (__v16sf)_mm512_setzero_ps()))
2154 return __builtin_ia32_selectss_128(__U, __A, __W);
2163#define _mm_div_round_ss(A, B, R) \
2164 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2165 (__v4sf)(__m128)(B), \
2166 (__v4sf)_mm_setzero_ps(), \
2167 (__mmask8)-1, (int)(R)))
2169#define _mm_mask_div_round_ss(W, U, A, B, R) \
2170 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2171 (__v4sf)(__m128)(B), \
2172 (__v4sf)(__m128)(W), (__mmask8)(U), \
2175#define _mm_maskz_div_round_ss(U, A, B, R) \
2176 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2177 (__v4sf)(__m128)(B), \
2178 (__v4sf)_mm_setzero_ps(), \
2179 (__mmask8)(U), (int)(R)))
2184 return __builtin_ia32_selectsd_128(__U, __A, __W);
2193#define _mm_div_round_sd(A, B, R) \
2194 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2195 (__v2df)(__m128d)(B), \
2196 (__v2df)_mm_setzero_pd(), \
2197 (__mmask8)-1, (int)(R)))
2199#define _mm_mask_div_round_sd(W, U, A, B, R) \
2200 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2201 (__v2df)(__m128d)(B), \
2202 (__v2df)(__m128d)(W), \
2203 (__mmask8)(U), (int)(R)))
2205#define _mm_maskz_div_round_sd(U, A, B, R) \
2206 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2207 (__v2df)(__m128d)(B), \
2208 (__v2df)_mm_setzero_pd(), \
2209 (__mmask8)(U), (int)(R)))
2211static __inline __m512d
2213 return (__m512d)((__v8df)
__a/(__v8df)
__b);
2218 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2225 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2232 return (__m512)((__v16sf)
__a/(__v16sf)
__b);
2237 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2244 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2249#define _mm512_div_round_pd(A, B, R) \
2250 ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2251 (__v8df)(__m512d)(B), (int)(R)))
2253#define _mm512_mask_div_round_pd(W, U, A, B, R) \
2254 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2255 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2256 (__v8df)(__m512d)(W)))
2258#define _mm512_maskz_div_round_pd(U, A, B, R) \
2259 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2260 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2261 (__v8df)_mm512_setzero_pd()))
2263#define _mm512_div_round_ps(A, B, R) \
2264 ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2265 (__v16sf)(__m512)(B), (int)(R)))
2267#define _mm512_mask_div_round_ps(W, U, A, B, R) \
2268 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2269 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2270 (__v16sf)(__m512)(W)))
2272#define _mm512_maskz_div_round_ps(U, A, B, R) \
2273 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2274 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2275 (__v16sf)_mm512_setzero_ps()))
2277#define _mm512_roundscale_ps(A, B) \
2278 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2279 (__v16sf)_mm512_undefined_ps(), \
2281 _MM_FROUND_CUR_DIRECTION))
2283#define _mm512_mask_roundscale_ps(A, B, C, imm) \
2284 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2285 (__v16sf)(__m512)(A), (__mmask16)(B), \
2286 _MM_FROUND_CUR_DIRECTION))
2288#define _mm512_maskz_roundscale_ps(A, B, imm) \
2289 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2290 (__v16sf)_mm512_setzero_ps(), \
2292 _MM_FROUND_CUR_DIRECTION))
2294#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2295 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2296 (__v16sf)(__m512)(A), (__mmask16)(B), \
2299#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2300 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2301 (__v16sf)_mm512_setzero_ps(), \
2302 (__mmask16)(A), (int)(R)))
2304#define _mm512_roundscale_round_ps(A, imm, R) \
2305 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2306 (__v16sf)_mm512_undefined_ps(), \
2307 (__mmask16)-1, (int)(R)))
2309#define _mm512_roundscale_pd(A, B) \
2310 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2311 (__v8df)_mm512_undefined_pd(), \
2313 _MM_FROUND_CUR_DIRECTION))
2315#define _mm512_mask_roundscale_pd(A, B, C, imm) \
2316 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2317 (__v8df)(__m512d)(A), (__mmask8)(B), \
2318 _MM_FROUND_CUR_DIRECTION))
2320#define _mm512_maskz_roundscale_pd(A, B, imm) \
2321 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2322 (__v8df)_mm512_setzero_pd(), \
2324 _MM_FROUND_CUR_DIRECTION))
2326#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2327 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2328 (__v8df)(__m512d)(A), (__mmask8)(B), \
2331#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2332 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2333 (__v8df)_mm512_setzero_pd(), \
2334 (__mmask8)(A), (int)(R)))
2336#define _mm512_roundscale_round_pd(A, imm, R) \
2337 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2338 (__v8df)_mm512_undefined_pd(), \
2339 (__mmask8)-1, (int)(R)))
2341#define _mm512_fmadd_round_pd(A, B, C, R) \
2342 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2343 (__v8df)(__m512d)(B), \
2344 (__v8df)(__m512d)(C), \
2345 (__mmask8)-1, (int)(R)))
2348#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2349 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2350 (__v8df)(__m512d)(B), \
2351 (__v8df)(__m512d)(C), \
2352 (__mmask8)(U), (int)(R)))
2355#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2356 ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2357 (__v8df)(__m512d)(B), \
2358 (__v8df)(__m512d)(C), \
2359 (__mmask8)(U), (int)(R)))
2362#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2363 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2364 (__v8df)(__m512d)(B), \
2365 (__v8df)(__m512d)(C), \
2366 (__mmask8)(U), (int)(R)))
2369#define _mm512_fmsub_round_pd(A, B, C, R) \
2370 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2371 (__v8df)(__m512d)(B), \
2372 -(__v8df)(__m512d)(C), \
2373 (__mmask8)-1, (int)(R)))
2376#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2377 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2378 (__v8df)(__m512d)(B), \
2379 -(__v8df)(__m512d)(C), \
2380 (__mmask8)(U), (int)(R)))
2383#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2384 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2385 (__v8df)(__m512d)(B), \
2386 -(__v8df)(__m512d)(C), \
2387 (__mmask8)(U), (int)(R)))
2390#define _mm512_fnmadd_round_pd(A, B, C, R) \
2391 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2392 (__v8df)(__m512d)(B), \
2393 (__v8df)(__m512d)(C), \
2394 (__mmask8)-1, (int)(R)))
2397#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2398 ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2399 (__v8df)(__m512d)(B), \
2400 (__v8df)(__m512d)(C), \
2401 (__mmask8)(U), (int)(R)))
2404#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2405 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2406 (__v8df)(__m512d)(B), \
2407 (__v8df)(__m512d)(C), \
2408 (__mmask8)(U), (int)(R)))
2411#define _mm512_fnmsub_round_pd(A, B, C, R) \
2412 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2413 (__v8df)(__m512d)(B), \
2414 -(__v8df)(__m512d)(C), \
2415 (__mmask8)-1, (int)(R)))
2418#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2419 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2420 (__v8df)(__m512d)(B), \
2421 -(__v8df)(__m512d)(C), \
2422 (__mmask8)(U), (int)(R)))
2426 return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
2432 return (__m512d)__builtin_ia32_selectpd_512(
2438 return (__m512d)__builtin_ia32_selectpd_512(
2444 return (__m512d)__builtin_ia32_selectpd_512(
2451 return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
2457 return (__m512d)__builtin_ia32_selectpd_512(
2463 return (__m512d)__builtin_ia32_selectpd_512(
2469 return (__m512d)__builtin_ia32_selectpd_512(
2476 return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
2482 return (__m512d)__builtin_ia32_selectpd_512(
2488 return (__m512d)__builtin_ia32_selectpd_512(
2494 return (__m512d)__builtin_ia32_selectpd_512(
2501 return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
2507 return (__m512d)__builtin_ia32_selectpd_512(
2513 return (__m512d)__builtin_ia32_selectpd_512(
2519 return (__m512d)__builtin_ia32_selectpd_512(
2524#define _mm512_fmadd_round_ps(A, B, C, R) \
2525 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2526 (__v16sf)(__m512)(B), \
2527 (__v16sf)(__m512)(C), \
2528 (__mmask16)-1, (int)(R)))
2531#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2532 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2533 (__v16sf)(__m512)(B), \
2534 (__v16sf)(__m512)(C), \
2535 (__mmask16)(U), (int)(R)))
2538#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2539 ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2540 (__v16sf)(__m512)(B), \
2541 (__v16sf)(__m512)(C), \
2542 (__mmask16)(U), (int)(R)))
2545#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2546 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2547 (__v16sf)(__m512)(B), \
2548 (__v16sf)(__m512)(C), \
2549 (__mmask16)(U), (int)(R)))
2552#define _mm512_fmsub_round_ps(A, B, C, R) \
2553 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2554 (__v16sf)(__m512)(B), \
2555 -(__v16sf)(__m512)(C), \
2556 (__mmask16)-1, (int)(R)))
2559#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2560 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2561 (__v16sf)(__m512)(B), \
2562 -(__v16sf)(__m512)(C), \
2563 (__mmask16)(U), (int)(R)))
2566#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2567 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2568 (__v16sf)(__m512)(B), \
2569 -(__v16sf)(__m512)(C), \
2570 (__mmask16)(U), (int)(R)))
2573#define _mm512_fnmadd_round_ps(A, B, C, R) \
2574 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2575 -(__v16sf)(__m512)(B), \
2576 (__v16sf)(__m512)(C), \
2577 (__mmask16)-1, (int)(R)))
2580#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2581 ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2582 (__v16sf)(__m512)(B), \
2583 (__v16sf)(__m512)(C), \
2584 (__mmask16)(U), (int)(R)))
2587#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2588 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2589 (__v16sf)(__m512)(B), \
2590 (__v16sf)(__m512)(C), \
2591 (__mmask16)(U), (int)(R)))
2594#define _mm512_fnmsub_round_ps(A, B, C, R) \
2595 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2596 -(__v16sf)(__m512)(B), \
2597 -(__v16sf)(__m512)(C), \
2598 (__mmask16)-1, (int)(R)))
2601#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2602 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2603 (__v16sf)(__m512)(B), \
2604 -(__v16sf)(__m512)(C), \
2605 (__mmask16)(U), (int)(R)))
2609 return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
2615 return (__m512)__builtin_ia32_selectps_512(
2621 return (__m512)__builtin_ia32_selectps_512(
2627 return (__m512)__builtin_ia32_selectps_512(
2634 return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
2640 return (__m512)__builtin_ia32_selectps_512(
2646 return (__m512)__builtin_ia32_selectps_512(
2652 return (__m512)__builtin_ia32_selectps_512(
2659 return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
2665 return (__m512)__builtin_ia32_selectps_512(
2671 return (__m512)__builtin_ia32_selectps_512(
2677 return (__m512)__builtin_ia32_selectps_512(
2684 return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
2690 return (__m512)__builtin_ia32_selectps_512(
2696 return (__m512)__builtin_ia32_selectps_512(
2702 return (__m512)__builtin_ia32_selectps_512(
2707#define _mm512_fmaddsub_round_pd(A, B, C, R) \
2708 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2709 (__v8df)(__m512d)(B), \
2710 (__v8df)(__m512d)(C), \
2711 (__mmask8)-1, (int)(R)))
2714#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2715 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2716 (__v8df)(__m512d)(B), \
2717 (__v8df)(__m512d)(C), \
2718 (__mmask8)(U), (int)(R)))
2721#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2722 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2723 (__v8df)(__m512d)(B), \
2724 (__v8df)(__m512d)(C), \
2725 (__mmask8)(U), (int)(R)))
2728#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2729 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2730 (__v8df)(__m512d)(B), \
2731 (__v8df)(__m512d)(C), \
2732 (__mmask8)(U), (int)(R)))
2735#define _mm512_fmsubadd_round_pd(A, B, C, R) \
2736 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2737 (__v8df)(__m512d)(B), \
2738 -(__v8df)(__m512d)(C), \
2739 (__mmask8)-1, (int)(R)))
2742#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2743 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2744 (__v8df)(__m512d)(B), \
2745 -(__v8df)(__m512d)(C), \
2746 (__mmask8)(U), (int)(R)))
2749#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2750 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2751 (__v8df)(__m512d)(B), \
2752 -(__v8df)(__m512d)(C), \
2753 (__mmask8)(U), (int)(R)))
2759 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2769 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2779 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2789 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2799 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2809 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2819 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2826#define _mm512_fmaddsub_round_ps(A, B, C, R) \
2827 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2828 (__v16sf)(__m512)(B), \
2829 (__v16sf)(__m512)(C), \
2830 (__mmask16)-1, (int)(R)))
2833#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2834 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2835 (__v16sf)(__m512)(B), \
2836 (__v16sf)(__m512)(C), \
2837 (__mmask16)(U), (int)(R)))
2840#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2841 ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2842 (__v16sf)(__m512)(B), \
2843 (__v16sf)(__m512)(C), \
2844 (__mmask16)(U), (int)(R)))
2847#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2848 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2849 (__v16sf)(__m512)(B), \
2850 (__v16sf)(__m512)(C), \
2851 (__mmask16)(U), (int)(R)))
2854#define _mm512_fmsubadd_round_ps(A, B, C, R) \
2855 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2856 (__v16sf)(__m512)(B), \
2857 -(__v16sf)(__m512)(C), \
2858 (__mmask16)-1, (int)(R)))
2861#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
2862 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2863 (__v16sf)(__m512)(B), \
2864 -(__v16sf)(__m512)(C), \
2865 (__mmask16)(U), (int)(R)))
2868#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
2869 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2870 (__v16sf)(__m512)(B), \
2871 -(__v16sf)(__m512)(C), \
2872 (__mmask16)(U), (int)(R)))
2878 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2888 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2898 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2908 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2918 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2928 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2938 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2945#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
2946 ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
2947 (__v8df)(__m512d)(B), \
2948 (__v8df)(__m512d)(C), \
2949 (__mmask8)(U), (int)(R)))
2951#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
2952 ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
2953 (__v16sf)(__m512)(B), \
2954 (__v16sf)(__m512)(C), \
2955 (__mmask16)(U), (int)(R)))
2957#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
2958 ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
2959 (__v8df)(__m512d)(B), \
2960 (__v8df)(__m512d)(C), \
2961 (__mmask8)(U), (int)(R)))
2967 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
2974#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
2975 ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
2976 (__v16sf)(__m512)(B), \
2977 (__v16sf)(__m512)(C), \
2978 (__mmask16)(U), (int)(R)))
2984 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
2991#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
2992 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2993 -(__v8df)(__m512d)(B), \
2994 (__v8df)(__m512d)(C), \
2995 (__mmask8)(U), (int)(R)))
2997#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
2998 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2999 -(__v16sf)(__m512)(B), \
3000 (__v16sf)(__m512)(C), \
3001 (__mmask16)(U), (int)(R)))
3003#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3004 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3005 -(__v8df)(__m512d)(B), \
3006 -(__v8df)(__m512d)(C), \
3007 (__mmask8)(U), (int)(R)))
3010#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3011 ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3012 (__v8df)(__m512d)(B), \
3013 (__v8df)(__m512d)(C), \
3014 (__mmask8)(U), (int)(R)))
3016#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3017 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3018 -(__v16sf)(__m512)(B), \
3019 -(__v16sf)(__m512)(C), \
3020 (__mmask16)(U), (int)(R)))
3023#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3024 ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3025 (__v16sf)(__m512)(B), \
3026 (__v16sf)(__m512)(C), \
3027 (__mmask16)(U), (int)(R)))
3033 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3040 return (__m512i)__builtin_ia32_selectd_512(__U,
3048 return (__m512i)__builtin_ia32_selectd_512(__U,
3056 return (__m512i)__builtin_ia32_selectd_512(__U,
3063 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3070 return (__m512i)__builtin_ia32_selectq_512(__U,
3078 return (__m512i)__builtin_ia32_selectq_512(__U,
3086 return (__m512i)__builtin_ia32_selectq_512(__U,
3091#define _mm512_alignr_epi64(A, B, I) \
3092 ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3093 (__v8di)(__m512i)(B), (int)(I)))
3095#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3096 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3097 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3098 (__v8di)(__m512i)(W)))
3100#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3101 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3102 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3103 (__v8di)_mm512_setzero_si512()))
3105#define _mm512_alignr_epi32(A, B, I) \
3106 ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3107 (__v16si)(__m512i)(B), (int)(I)))
3109#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3110 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3111 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3112 (__v16si)(__m512i)(W)))
3114#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3115 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3116 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3117 (__v16si)_mm512_setzero_si512()))
3120#define _mm512_extractf64x4_pd(A, I) \
3121 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3122 (__v4df)_mm256_setzero_pd(), \
3125#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3126 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3127 (__v4df)(__m256d)(W), \
3130#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3131 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3132 (__v4df)_mm256_setzero_pd(), \
3135#define _mm512_extractf32x4_ps(A, I) \
3136 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3137 (__v4sf)_mm_setzero_ps(), \
3140#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3141 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3142 (__v4sf)(__m128)(W), \
3145#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3146 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3147 (__v4sf)_mm_setzero_ps(), \
3154 return (__m512d) __builtin_ia32_selectpd_512 ((
__mmask8) __U,
3161 return (__m512) __builtin_ia32_selectps_512 ((
__mmask16) __U,
3168 return (__m512i) __builtin_ia32_selectq_512 ((
__mmask8) __U,
3175 return (__m512i) __builtin_ia32_selectd_512 ((
__mmask16) __U,
3182#define _mm512_cmp_round_ps_mask(A, B, P, R) \
3183 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3184 (__v16sf)(__m512)(B), (int)(P), \
3185 (__mmask16)-1, (int)(R)))
3187#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3188 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3189 (__v16sf)(__m512)(B), (int)(P), \
3190 (__mmask16)(U), (int)(R)))
3192#define _mm512_cmp_ps_mask(A, B, P) \
3193 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3194#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3195 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3197#define _mm512_cmpeq_ps_mask(A, B) \
3198 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3199#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3200 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3202#define _mm512_cmplt_ps_mask(A, B) \
3203 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3204#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3205 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3207#define _mm512_cmple_ps_mask(A, B) \
3208 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3209#define _mm512_mask_cmple_ps_mask(k, A, B) \
3210 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3212#define _mm512_cmpunord_ps_mask(A, B) \
3213 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3214#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3215 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3217#define _mm512_cmpneq_ps_mask(A, B) \
3218 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3219#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3220 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3222#define _mm512_cmpnlt_ps_mask(A, B) \
3223 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3224#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3225 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3227#define _mm512_cmpnle_ps_mask(A, B) \
3228 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3229#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3230 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3232#define _mm512_cmpord_ps_mask(A, B) \
3233 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3234#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3235 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3237#define _mm512_cmp_round_pd_mask(A, B, P, R) \
3238 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3239 (__v8df)(__m512d)(B), (int)(P), \
3240 (__mmask8)-1, (int)(R)))
3242#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3243 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3244 (__v8df)(__m512d)(B), (int)(P), \
3245 (__mmask8)(U), (int)(R)))
3247#define _mm512_cmp_pd_mask(A, B, P) \
3248 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3249#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3250 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3252#define _mm512_cmpeq_pd_mask(A, B) \
3253 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3254#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3255 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3257#define _mm512_cmplt_pd_mask(A, B) \
3258 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3259#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3260 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3262#define _mm512_cmple_pd_mask(A, B) \
3263 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3264#define _mm512_mask_cmple_pd_mask(k, A, B) \
3265 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3267#define _mm512_cmpunord_pd_mask(A, B) \
3268 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3269#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3270 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3272#define _mm512_cmpneq_pd_mask(A, B) \
3273 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3274#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3275 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3277#define _mm512_cmpnlt_pd_mask(A, B) \
3278 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3279#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3280 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3282#define _mm512_cmpnle_pd_mask(A, B) \
3283 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3284#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3285 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3287#define _mm512_cmpord_pd_mask(A, B) \
3288 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3289#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3290 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3294#define _mm512_cvtt_roundps_epu32(A, R) \
3295 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3296 (__v16si)_mm512_undefined_epi32(), \
3297 (__mmask16)-1, (int)(R)))
3299#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3300 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3301 (__v16si)(__m512i)(W), \
3302 (__mmask16)(U), (int)(R)))
3304#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3305 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3306 (__v16si)_mm512_setzero_si512(), \
3307 (__mmask16)(U), (int)(R)))
3313 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3323 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3332 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3338#define _mm512_cvt_roundepi32_ps(A, R) \
3339 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3340 (__v16sf)_mm512_setzero_ps(), \
3341 (__mmask16)-1, (int)(R)))
3343#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3344 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3345 (__v16sf)(__m512)(W), \
3346 (__mmask16)(U), (int)(R)))
3348#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3349 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3350 (__v16sf)_mm512_setzero_ps(), \
3351 (__mmask16)(U), (int)(R)))
3353#define _mm512_cvt_roundepu32_ps(A, R) \
3354 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3355 (__v16sf)_mm512_setzero_ps(), \
3356 (__mmask16)-1, (int)(R)))
3358#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3359 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3360 (__v16sf)(__m512)(W), \
3361 (__mmask16)(U), (int)(R)))
3363#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3364 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3365 (__v16sf)_mm512_setzero_ps(), \
3366 (__mmask16)(U), (int)(R)))
3368static __inline__ __m512
3370 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3375 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3382 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3389 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3394 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3401 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3418 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3423 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3430 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3437 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3442 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3449 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3464#define _mm512_cvt_roundpd_ps(A, R) \
3465 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3466 (__v8sf)_mm256_setzero_ps(), \
3467 (__mmask8)-1, (int)(R)))
3469#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3470 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3471 (__v8sf)(__m256)(W), (__mmask8)(U), \
3474#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3475 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3476 (__v8sf)_mm256_setzero_ps(), \
3477 (__mmask8)(U), (int)(R)))
3479static __inline__ __m256
3481 return (__m256)__builtin_ia32_cvtpd2ps512_mask(
3488 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3496 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3504 return (__m512) __builtin_shufflevector((__v8sf)
_mm512_cvtpd_ps(__A),
3506 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3511 return (__m512) __builtin_shufflevector (
3515 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3518#define _mm512_cvt_roundps_ph(A, I) \
3519 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3520 (__v16hi)_mm256_undefined_si256(), \
3523#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3524 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3525 (__v16hi)(__m256i)(U), \
3528#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3529 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3530 (__v16hi)_mm256_setzero_si256(), \
3533#define _mm512_cvtps_ph _mm512_cvt_roundps_ph
3534#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
3535#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3537#define _mm512_cvt_roundph_ps(A, R) \
3538 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3539 (__v16sf)_mm512_undefined_ps(), \
3540 (__mmask16)-1, (int)(R)))
3542#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3543 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3544 (__v16sf)(__m512)(W), \
3545 (__mmask16)(U), (int)(R)))
3547#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3548 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3549 (__v16sf)_mm512_setzero_ps(), \
3550 (__mmask16)(U), (int)(R)))
3556 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3566 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3575 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3581#define _mm512_cvtt_roundpd_epi32(A, R) \
3582 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3583 (__v8si)_mm256_setzero_si256(), \
3584 (__mmask8)-1, (int)(R)))
3586#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3587 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3588 (__v8si)(__m256i)(W), \
3589 (__mmask8)(U), (int)(R)))
3591#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3592 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3593 (__v8si)_mm256_setzero_si256(), \
3594 (__mmask8)(U), (int)(R)))
3599 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)
__a,
3608 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3617 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3623#define _mm512_cvtt_roundps_epi32(A, R) \
3624 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3625 (__v16si)_mm512_setzero_si512(), \
3626 (__mmask16)-1, (int)(R)))
3628#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3629 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3630 (__v16si)(__m512i)(W), \
3631 (__mmask16)(U), (int)(R)))
3633#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3634 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3635 (__v16si)_mm512_setzero_si512(), \
3636 (__mmask16)(U), (int)(R)))
3642 __builtin_ia32_cvttps2dq512_mask((__v16sf)
__a,
3650 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3659 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3665#define _mm512_cvt_roundps_epi32(A, R) \
3666 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3667 (__v16si)_mm512_setzero_si512(), \
3668 (__mmask16)-1, (int)(R)))
3670#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3671 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3672 (__v16si)(__m512i)(W), \
3673 (__mmask16)(U), (int)(R)))
3675#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3676 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3677 (__v16si)_mm512_setzero_si512(), \
3678 (__mmask16)(U), (int)(R)))
3683 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3692 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3701 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3708#define _mm512_cvt_roundpd_epi32(A, R) \
3709 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3710 (__v8si)_mm256_setzero_si256(), \
3711 (__mmask8)-1, (int)(R)))
3713#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3714 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3715 (__v8si)(__m256i)(W), \
3716 (__mmask8)(U), (int)(R)))
3718#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3719 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3720 (__v8si)_mm256_setzero_si256(), \
3721 (__mmask8)(U), (int)(R)))
3726 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3736 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3745 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3752#define _mm512_cvt_roundps_epu32(A, R) \
3753 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3754 (__v16si)_mm512_setzero_si512(), \
3755 (__mmask16)-1, (int)(R)))
3757#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
3758 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3759 (__v16si)(__m512i)(W), \
3760 (__mmask16)(U), (int)(R)))
3762#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
3763 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3764 (__v16si)_mm512_setzero_si512(), \
3765 (__mmask16)(U), (int)(R)))
3770 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
3780 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
3789 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
3796#define _mm512_cvt_roundpd_epu32(A, R) \
3797 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3798 (__v8si)_mm256_setzero_si256(), \
3799 (__mmask8)-1, (int)(R)))
3801#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
3802 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3803 (__v8si)(__m256i)(W), \
3804 (__mmask8)(U), (int)(R)))
3806#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
3807 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3808 (__v8si)_mm256_setzero_si256(), \
3809 (__mmask8)(U), (int)(R)))
3814 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3824 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3833 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3856 return (__m512d)__builtin_shufflevector((__v8df)
__a, (__v8df)
__b,
3857 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
3863 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3871 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3878 return (__m512d)__builtin_shufflevector((__v8df)
__a, (__v8df)
__b,
3879 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
3885 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3893 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3900 return (__m512)__builtin_shufflevector((__v16sf)
__a, (__v16sf)
__b,
3902 2+4, 18+4, 3+4, 19+4,
3903 2+8, 18+8, 3+8, 19+8,
3904 2+12, 18+12, 3+12, 19+12);
3910 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
3918 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
3925 return (__m512)__builtin_shufflevector((__v16sf)
__a, (__v16sf)
__b,
3927 0+4, 16+4, 1+4, 17+4,
3928 0+8, 16+8, 1+8, 17+8,
3929 0+12, 16+12, 1+12, 17+12);
3935 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
3943 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
3950 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
3952 2+4, 18+4, 3+4, 19+4,
3953 2+8, 18+8, 3+8, 19+8,
3954 2+12, 18+12, 3+12, 19+12);
3960 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
3968 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
3975 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
3977 0+4, 16+4, 1+4, 17+4,
3978 0+8, 16+8, 1+8, 17+8,
3979 0+12, 16+12, 1+12, 17+12);
3985 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
3993 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
4000 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4001 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4007 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4015 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4022 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4023 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4029 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4037 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4048 struct __loadu_si512 {
4051 return ((
const struct __loadu_si512*)
__P)->__v;
4057 struct __loadu_epi32 {
4060 return ((
const struct __loadu_epi32*)
__P)->__v;
4066 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const int *)
__P,
4075 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const int *)
__P,
4084 struct __loadu_epi64 {
4087 return ((
const struct __loadu_epi64*)
__P)->__v;
4093 return (__m512i) __builtin_ia32_loaddqudi512_mask ((
const long long *)
__P,
4101 return (__m512i) __builtin_ia32_loaddqudi512_mask ((
const long long *)
__P,
4110 return (__m512) __builtin_ia32_loadups512_mask ((
const float *)
__P,
4118 return (__m512) __builtin_ia32_loadups512_mask ((
const float *)
__P,
4127 return (__m512d) __builtin_ia32_loadupd512_mask ((
const double *)
__P,
4135 return (__m512d) __builtin_ia32_loadupd512_mask ((
const double *)
__P,
4147 return ((
const struct __loadu_pd*)
__p)->__v;
4156 return ((
const struct __loadu_ps*)
__p)->__v;
4162 return *(
const __m512*)
__p;
4168 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *)
__P,
4176 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *)
__P,
4185 return *(
const __m512d*)
__p;
4191 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *)
__P,
4199 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *)
__P,
4208 return *(
const __m512i *)
__P;
4214 return *(
const __m512i *)
__P;
4220 return *(
const __m512i *)
__P;
4228 struct __storeu_epi64 {
4231 ((
struct __storeu_epi64*)
__P)->
__v = __A;
4237 __builtin_ia32_storedqudi512_mask ((
long long *)
__P, (__v8di) __A,
4244 struct __storeu_si512 {
4247 ((
struct __storeu_si512*)
__P)->
__v = __A;
4253 struct __storeu_epi32 {
4256 ((
struct __storeu_epi32*)
__P)->
__v = __A;
4262 __builtin_ia32_storedqusi512_mask ((
int *)
__P, (__v16si) __A,
4269 __builtin_ia32_storeupd512_mask ((
double *)
__P, (__v8df) __A, (
__mmask8) __U);
4275 struct __storeu_pd {
4278 ((
struct __storeu_pd*)
__P)->
__v = __A;
4284 __builtin_ia32_storeups512_mask ((
float *)
__P, (__v16sf) __A,
4291 struct __storeu_ps {
4294 ((
struct __storeu_ps*)
__P)->
__v = __A;
4300 __builtin_ia32_storeapd512_mask ((__v8df *)
__P, (__v8df) __A, (
__mmask8) __U);
4306 *(__m512d*)
__P = __A;
4312 __builtin_ia32_storeaps512_mask ((__v16sf *)
__P, (__v16sf) __A,
4319 *(__m512*)
__P = __A;
4325 *(__m512i *)
__P = __A;
4331 *(__m512i *)
__P = __A;
4337 *(__m512i *)
__P = __A;
4344 return __builtin_ia32_knothi(__M);
4349#define _mm512_cmpeq_epi32_mask(A, B) \
4350 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4351#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4352 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4353#define _mm512_cmpge_epi32_mask(A, B) \
4354 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4355#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4356 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4357#define _mm512_cmpgt_epi32_mask(A, B) \
4358 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4359#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4360 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4361#define _mm512_cmple_epi32_mask(A, B) \
4362 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4363#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4364 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4365#define _mm512_cmplt_epi32_mask(A, B) \
4366 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4367#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4368 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4369#define _mm512_cmpneq_epi32_mask(A, B) \
4370 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4371#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4372 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4374#define _mm512_cmpeq_epu32_mask(A, B) \
4375 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4376#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4377 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4378#define _mm512_cmpge_epu32_mask(A, B) \
4379 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4380#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4381 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4382#define _mm512_cmpgt_epu32_mask(A, B) \
4383 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4384#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4385 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4386#define _mm512_cmple_epu32_mask(A, B) \
4387 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4388#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4389 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4390#define _mm512_cmplt_epu32_mask(A, B) \
4391 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4392#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4393 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4394#define _mm512_cmpneq_epu32_mask(A, B) \
4395 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4396#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4397 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4399#define _mm512_cmpeq_epi64_mask(A, B) \
4400 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4401#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4402 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4403#define _mm512_cmpge_epi64_mask(A, B) \
4404 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4405#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4406 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4407#define _mm512_cmpgt_epi64_mask(A, B) \
4408 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4409#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4410 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4411#define _mm512_cmple_epi64_mask(A, B) \
4412 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4413#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4414 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4415#define _mm512_cmplt_epi64_mask(A, B) \
4416 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4417#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4418 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4419#define _mm512_cmpneq_epi64_mask(A, B) \
4420 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4421#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4422 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4424#define _mm512_cmpeq_epu64_mask(A, B) \
4425 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4426#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4427 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4428#define _mm512_cmpge_epu64_mask(A, B) \
4429 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4430#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4431 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4432#define _mm512_cmpgt_epu64_mask(A, B) \
4433 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4434#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4435 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4436#define _mm512_cmple_epu64_mask(A, B) \
4437 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4438#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4439 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4440#define _mm512_cmplt_epu64_mask(A, B) \
4441 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4442#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4443 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4444#define _mm512_cmpneq_epu64_mask(A, B) \
4445 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4446#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4447 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4453 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4458 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4465 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4474 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4479 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4486 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4493 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4498 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4505 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4512 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4517 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4524 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4531 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4536 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4543 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4550 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4555 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4562 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4569 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4574 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4581 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4588 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4593 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4600 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4607 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4612 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4619 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4626 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4631 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4638 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4646 return (__m512i)__builtin_elementwise_fshr((__v16su)__A,(__v16su)__A, (__v16su)__B);
4652 return (__m512i)__builtin_ia32_selectd_512(__U,
4660 return (__m512i)__builtin_ia32_selectd_512(__U,
4668 return (__m512i)__builtin_elementwise_fshr((__v8du)__A, (__v8du)__A, (__v8du)__B);
4674 return (__m512i)__builtin_ia32_selectq_512(__U,
4682 return (__m512i)__builtin_ia32_selectq_512(__U,
4689#define _mm512_cmp_epi32_mask(a, b, p) \
4690 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4691 (__v16si)(__m512i)(b), (int)(p), \
4694#define _mm512_cmp_epu32_mask(a, b, p) \
4695 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4696 (__v16si)(__m512i)(b), (int)(p), \
4699#define _mm512_cmp_epi64_mask(a, b, p) \
4700 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4701 (__v8di)(__m512i)(b), (int)(p), \
4704#define _mm512_cmp_epu64_mask(a, b, p) \
4705 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4706 (__v8di)(__m512i)(b), (int)(p), \
4709#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
4710 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4711 (__v16si)(__m512i)(b), (int)(p), \
4714#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
4715 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4716 (__v16si)(__m512i)(b), (int)(p), \
4719#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
4720 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4721 (__v8di)(__m512i)(b), (int)(p), \
4724#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
4725 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4726 (__v8di)(__m512i)(b), (int)(p), \
4729#define _mm512_rol_epi32(a, b) \
4730 ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)))
4732#define _mm512_mask_rol_epi32(W, U, a, b) \
4733 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4734 (__v16si)_mm512_rol_epi32((a), (b)), \
4735 (__v16si)(__m512i)(W)))
4737#define _mm512_maskz_rol_epi32(U, a, b) \
4738 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4739 (__v16si)_mm512_rol_epi32((a), (b)), \
4740 (__v16si)_mm512_setzero_si512()))
4742#define _mm512_rol_epi64(a, b) \
4743 ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)))
4745#define _mm512_mask_rol_epi64(W, U, a, b) \
4746 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4747 (__v8di)_mm512_rol_epi64((a), (b)), \
4748 (__v8di)(__m512i)(W)))
4750#define _mm512_maskz_rol_epi64(U, a, b) \
4751 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4752 (__v8di)_mm512_rol_epi64((a), (b)), \
4753 (__v8di)_mm512_setzero_si512()))
4758 return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__A, (__v16su)__B);
4764 return (__m512i)__builtin_ia32_selectd_512(__U,
4772 return (__m512i)__builtin_ia32_selectd_512(__U,
4780 return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__A, (__v8du)__B);
4786 return (__m512i)__builtin_ia32_selectq_512(__U,
4794 return (__m512i)__builtin_ia32_selectq_512(__U,
4799#define _mm512_ror_epi32(A, B) \
4800 ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)))
4802#define _mm512_mask_ror_epi32(W, U, A, B) \
4803 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4804 (__v16si)_mm512_ror_epi32((A), (B)), \
4805 (__v16si)(__m512i)(W)))
4807#define _mm512_maskz_ror_epi32(U, A, B) \
4808 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4809 (__v16si)_mm512_ror_epi32((A), (B)), \
4810 (__v16si)_mm512_setzero_si512()))
4812#define _mm512_ror_epi64(A, B) \
4813 ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)))
4815#define _mm512_mask_ror_epi64(W, U, A, B) \
4816 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4817 (__v8di)_mm512_ror_epi64((A), (B)), \
4818 (__v8di)(__m512i)(W)))
4820#define _mm512_maskz_ror_epi64(U, A, B) \
4821 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4822 (__v8di)_mm512_ror_epi64((A), (B)), \
4823 (__v8di)_mm512_setzero_si512()))
4827 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (
int)__B);
4833 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4840 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4847 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (
int)__B);
4853 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4860 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4867 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (
int)__B);
4873 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4880 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4887 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (
int)__B);
4893 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4900 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4908 return (__m512i) __builtin_ia32_movdqa32load512_mask ((
const __v16si *)
__P,
4916 return (__m512i) __builtin_ia32_movdqa32load512_mask ((
const __v16si *)
__P,
4925 __builtin_ia32_movdqa32store512_mask ((__v16si *)
__P, (__v16si) __A,
4932 return (__m512i) __builtin_ia32_selectd_512 ((
__mmask16) __U,
4940 return (__m512i) __builtin_ia32_selectd_512 ((
__mmask16) __U,
4948 return (__m512i) __builtin_ia32_selectq_512 ((
__mmask8) __U,
4956 return (__m512i) __builtin_ia32_selectq_512 ((
__mmask8) __U,
4964 return (__m512i) __builtin_ia32_movdqa64load512_mask ((
const __v8di *)
__P,
4972 return (__m512i) __builtin_ia32_movdqa64load512_mask ((
const __v8di *)
__P,
4981 __builtin_ia32_movdqa64store512_mask ((__v8di *)
__P, (__v8di) __A,
4988 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
4989 0, 0, 2, 2, 4, 4, 6, 6);
4995 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
5003 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
5008#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5009 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5010 (__v8df)(__m512d)(B), \
5011 (__v8di)(__m512i)(C), (int)(imm), \
5012 (__mmask8)-1, (int)(R)))
5014#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5015 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5016 (__v8df)(__m512d)(B), \
5017 (__v8di)(__m512i)(C), (int)(imm), \
5018 (__mmask8)(U), (int)(R)))
5020#define _mm512_fixupimm_pd(A, B, C, imm) \
5021 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5022 (__v8df)(__m512d)(B), \
5023 (__v8di)(__m512i)(C), (int)(imm), \
5025 _MM_FROUND_CUR_DIRECTION))
5027#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5028 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5029 (__v8df)(__m512d)(B), \
5030 (__v8di)(__m512i)(C), (int)(imm), \
5032 _MM_FROUND_CUR_DIRECTION))
5034#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5035 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5036 (__v8df)(__m512d)(B), \
5037 (__v8di)(__m512i)(C), \
5038 (int)(imm), (__mmask8)(U), \
5041#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5042 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5043 (__v8df)(__m512d)(B), \
5044 (__v8di)(__m512i)(C), \
5045 (int)(imm), (__mmask8)(U), \
5046 _MM_FROUND_CUR_DIRECTION))
5048#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5049 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5050 (__v16sf)(__m512)(B), \
5051 (__v16si)(__m512i)(C), (int)(imm), \
5052 (__mmask16)-1, (int)(R)))
5054#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5055 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5056 (__v16sf)(__m512)(B), \
5057 (__v16si)(__m512i)(C), (int)(imm), \
5058 (__mmask16)(U), (int)(R)))
5060#define _mm512_fixupimm_ps(A, B, C, imm) \
5061 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5062 (__v16sf)(__m512)(B), \
5063 (__v16si)(__m512i)(C), (int)(imm), \
5065 _MM_FROUND_CUR_DIRECTION))
5067#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5068 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5069 (__v16sf)(__m512)(B), \
5070 (__v16si)(__m512i)(C), (int)(imm), \
5072 _MM_FROUND_CUR_DIRECTION))
5074#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5075 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5076 (__v16sf)(__m512)(B), \
5077 (__v16si)(__m512i)(C), \
5078 (int)(imm), (__mmask16)(U), \
5081#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5082 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5083 (__v16sf)(__m512)(B), \
5084 (__v16si)(__m512i)(C), \
5085 (int)(imm), (__mmask16)(U), \
5086 _MM_FROUND_CUR_DIRECTION))
5088#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5089 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5090 (__v2df)(__m128d)(B), \
5091 (__v2di)(__m128i)(C), (int)(imm), \
5092 (__mmask8)-1, (int)(R)))
5094#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5095 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5096 (__v2df)(__m128d)(B), \
5097 (__v2di)(__m128i)(C), (int)(imm), \
5098 (__mmask8)(U), (int)(R)))
5100#define _mm_fixupimm_sd(A, B, C, imm) \
5101 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5102 (__v2df)(__m128d)(B), \
5103 (__v2di)(__m128i)(C), (int)(imm), \
5105 _MM_FROUND_CUR_DIRECTION))
5107#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5108 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5109 (__v2df)(__m128d)(B), \
5110 (__v2di)(__m128i)(C), (int)(imm), \
5112 _MM_FROUND_CUR_DIRECTION))
5114#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5115 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5116 (__v2df)(__m128d)(B), \
5117 (__v2di)(__m128i)(C), (int)(imm), \
5118 (__mmask8)(U), (int)(R)))
5120#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5121 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5122 (__v2df)(__m128d)(B), \
5123 (__v2di)(__m128i)(C), (int)(imm), \
5125 _MM_FROUND_CUR_DIRECTION))
5127#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5128 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5129 (__v4sf)(__m128)(B), \
5130 (__v4si)(__m128i)(C), (int)(imm), \
5131 (__mmask8)-1, (int)(R)))
5133#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5134 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5135 (__v4sf)(__m128)(B), \
5136 (__v4si)(__m128i)(C), (int)(imm), \
5137 (__mmask8)(U), (int)(R)))
5139#define _mm_fixupimm_ss(A, B, C, imm) \
5140 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5141 (__v4sf)(__m128)(B), \
5142 (__v4si)(__m128i)(C), (int)(imm), \
5144 _MM_FROUND_CUR_DIRECTION))
5146#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5147 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5148 (__v4sf)(__m128)(B), \
5149 (__v4si)(__m128i)(C), (int)(imm), \
5151 _MM_FROUND_CUR_DIRECTION))
5153#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5154 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5155 (__v4sf)(__m128)(B), \
5156 (__v4si)(__m128i)(C), (int)(imm), \
5157 (__mmask8)(U), (int)(R)))
5159#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5160 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5161 (__v4sf)(__m128)(B), \
5162 (__v4si)(__m128i)(C), (int)(imm), \
5164 _MM_FROUND_CUR_DIRECTION))
5166#define _mm_getexp_round_sd(A, B, R) \
5167 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5168 (__v2df)(__m128d)(B), \
5169 (__v2df)_mm_setzero_pd(), \
5170 (__mmask8)-1, (int)(R)))
5176 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5183 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5190#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5191 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5192 (__v2df)(__m128d)(B), \
5193 (__v2df)(__m128d)(W), \
5194 (__mmask8)(U), (int)(R)))
5199 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5206#define _mm_maskz_getexp_round_sd(U, A, B, R) \
5207 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5208 (__v2df)(__m128d)(B), \
5209 (__v2df)_mm_setzero_pd(), \
5210 (__mmask8)(U), (int)(R)))
5212#define _mm_getexp_round_ss(A, B, R) \
5213 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5214 (__v4sf)(__m128)(B), \
5215 (__v4sf)_mm_setzero_ps(), \
5216 (__mmask8)-1, (int)(R)))
5221 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5228 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5235#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5236 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5237 (__v4sf)(__m128)(B), \
5238 (__v4sf)(__m128)(W), \
5239 (__mmask8)(U), (int)(R)))
5244 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5251#define _mm_maskz_getexp_round_ss(U, A, B, R) \
5252 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5253 (__v4sf)(__m128)(B), \
5254 (__v4sf)_mm_setzero_ps(), \
5255 (__mmask8)(U), (int)(R)))
5257#define _mm_getmant_round_sd(A, B, C, D, R) \
5258 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5259 (__v2df)(__m128d)(B), \
5260 (int)(((D)<<2) | (C)), \
5261 (__v2df)_mm_setzero_pd(), \
5262 (__mmask8)-1, (int)(R)))
5264#define _mm_getmant_sd(A, B, C, D) \
5265 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5266 (__v2df)(__m128d)(B), \
5267 (int)(((D)<<2) | (C)), \
5268 (__v2df)_mm_setzero_pd(), \
5270 _MM_FROUND_CUR_DIRECTION))
5272#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5273 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5274 (__v2df)(__m128d)(B), \
5275 (int)(((D)<<2) | (C)), \
5276 (__v2df)(__m128d)(W), \
5278 _MM_FROUND_CUR_DIRECTION))
5280#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5281 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5282 (__v2df)(__m128d)(B), \
5283 (int)(((D)<<2) | (C)), \
5284 (__v2df)(__m128d)(W), \
5285 (__mmask8)(U), (int)(R)))
5287#define _mm_maskz_getmant_sd(U, A, B, C, D) \
5288 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5289 (__v2df)(__m128d)(B), \
5290 (int)(((D)<<2) | (C)), \
5291 (__v2df)_mm_setzero_pd(), \
5293 _MM_FROUND_CUR_DIRECTION))
5295#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5296 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5297 (__v2df)(__m128d)(B), \
5298 (int)(((D)<<2) | (C)), \
5299 (__v2df)_mm_setzero_pd(), \
5300 (__mmask8)(U), (int)(R)))
5302#define _mm_getmant_round_ss(A, B, C, D, R) \
5303 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5304 (__v4sf)(__m128)(B), \
5305 (int)(((D)<<2) | (C)), \
5306 (__v4sf)_mm_setzero_ps(), \
5307 (__mmask8)-1, (int)(R)))
5309#define _mm_getmant_ss(A, B, C, D) \
5310 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5311 (__v4sf)(__m128)(B), \
5312 (int)(((D)<<2) | (C)), \
5313 (__v4sf)_mm_setzero_ps(), \
5315 _MM_FROUND_CUR_DIRECTION))
5317#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5318 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5319 (__v4sf)(__m128)(B), \
5320 (int)(((D)<<2) | (C)), \
5321 (__v4sf)(__m128)(W), \
5323 _MM_FROUND_CUR_DIRECTION))
5325#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5326 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5327 (__v4sf)(__m128)(B), \
5328 (int)(((D)<<2) | (C)), \
5329 (__v4sf)(__m128)(W), \
5330 (__mmask8)(U), (int)(R)))
5332#define _mm_maskz_getmant_ss(U, A, B, C, D) \
5333 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5334 (__v4sf)(__m128)(B), \
5335 (int)(((D)<<2) | (C)), \
5336 (__v4sf)_mm_setzero_ps(), \
5338 _MM_FROUND_CUR_DIRECTION))
5340#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5341 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5342 (__v4sf)(__m128)(B), \
5343 (int)(((D)<<2) | (C)), \
5344 (__v4sf)_mm_setzero_ps(), \
5345 (__mmask8)(U), (int)(R)))
5352#define _mm_comi_round_sd(A, B, P, R) \
5353 ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5354 (int)(P), (int)(R)))
5356#define _mm_comi_round_ss(A, B, P, R) \
5357 ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5358 (int)(P), (int)(R)))
5361#define _mm_cvt_roundsd_si64(A, R) \
5362 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5365static __inline__ __m512i
5367 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5372 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5379 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5386 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5391 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5398 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5405 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)
__Y);
5410 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5417 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5425 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)
__Y);
5431 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5439 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5446 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5451 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5458 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5465 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5470 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5477 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5484 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)
__Y);
5489 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5496 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5504 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)
__Y);
5510 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5518 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5525 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5530 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5537 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5544 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5549 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5556 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5563 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)
__Y);
5568 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5575 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5583 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)
__Y);
5589 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5597 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5611#define _mm512_ternarylogic_epi32(A, B, C, imm) \
5612 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5613 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5614 (unsigned char)(imm), (__mmask16)-1))
5616#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
5617 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5618 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5619 (unsigned char)(imm), (__mmask16)(U)))
5621#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
5622 ((__m512i)__builtin_ia32_pternlogd512_maskz( \
5623 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5624 (unsigned char)(imm), (__mmask16)(U)))
5626#define _mm512_ternarylogic_epi64(A, B, C, imm) \
5627 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5628 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5629 (unsigned char)(imm), (__mmask8)-1))
5631#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5632 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5633 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5634 (unsigned char)(imm), (__mmask8)(U)))
5636#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5637 ((__m512i)__builtin_ia32_pternlogq512_maskz( \
5638 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5639 (unsigned char)(imm), (__mmask8)(U)))
5642#define _mm_cvt_roundsd_i64(A, R) \
5643 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5646#define _mm_cvt_roundsd_si32(A, R) \
5647 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5649#define _mm_cvt_roundsd_i32(A, R) \
5650 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5652#define _mm_cvt_roundsd_u32(A, R) \
5653 ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5658 return (
unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5663#define _mm_cvt_roundsd_u64(A, R) \
5664 ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
5668_mm_cvtsd_u64 (__m128d __A)
5670 return (
unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
5676#define _mm_cvt_roundss_si32(A, R) \
5677 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5679#define _mm_cvt_roundss_i32(A, R) \
5680 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5683#define _mm_cvt_roundss_si64(A, R) \
5684 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5686#define _mm_cvt_roundss_i64(A, R) \
5687 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5690#define _mm_cvt_roundss_u32(A, R) \
5691 ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)))
5696 return (
unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
5701#define _mm_cvt_roundss_u64(A, R) \
5702 ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
5706_mm_cvtss_u64 (__m128 __A)
5708 return (
unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
5714#define _mm_cvtt_roundsd_i32(A, R) \
5715 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
5717#define _mm_cvtt_roundsd_si32(A, R) \
5718 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
5723 return (
int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
5728#define _mm_cvtt_roundsd_si64(A, R) \
5729 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
5731#define _mm_cvtt_roundsd_i64(A, R) \
5732 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
5735_mm_cvttsd_i64 (__m128d __A)
5737 return (
long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
5742#define _mm_cvtt_roundsd_u32(A, R) \
5743 ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5748 return (
unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
5753#define _mm_cvtt_roundsd_u64(A, R) \
5754 ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
5758_mm_cvttsd_u64 (__m128d __A)
5760 return (
unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
5766#define _mm_cvtt_roundss_i32(A, R) \
5767 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
5769#define _mm_cvtt_roundss_si32(A, R) \
5770 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
5775 return (
int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
5780#define _mm_cvtt_roundss_i64(A, R) \
5781 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
5783#define _mm_cvtt_roundss_si64(A, R) \
5784 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
5787_mm_cvttss_i64 (__m128 __A)
5789 return (
long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
5794#define _mm_cvtt_roundss_u32(A, R) \
5795 ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)))
5800 return (
unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
5805#define _mm_cvtt_roundss_u64(A, R) \
5806 ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
5810_mm_cvttss_u64 (__m128 __A)
5812 return (
unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
5818#define _mm512_permute_pd(X, C) \
5819 ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)))
5821#define _mm512_mask_permute_pd(W, U, X, C) \
5822 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
5823 (__v8df)_mm512_permute_pd((X), (C)), \
5824 (__v8df)(__m512d)(W)))
5826#define _mm512_maskz_permute_pd(U, X, C) \
5827 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
5828 (__v8df)_mm512_permute_pd((X), (C)), \
5829 (__v8df)_mm512_setzero_pd()))
5831#define _mm512_permute_ps(X, C) \
5832 ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)))
5834#define _mm512_mask_permute_ps(W, U, X, C) \
5835 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
5836 (__v16sf)_mm512_permute_ps((X), (C)), \
5837 (__v16sf)(__m512)(W)))
5839#define _mm512_maskz_permute_ps(U, X, C) \
5840 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
5841 (__v16sf)_mm512_permute_ps((X), (C)), \
5842 (__v16sf)_mm512_setzero_ps()))
5846 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
5851 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
5858 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
5865 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
5870 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
5877 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
5884 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
5891 return (__m512d)__builtin_ia32_selectpd_512(__U,
5899 return (__m512d)__builtin_ia32_selectpd_512(__U,
5901 (__v8df)(__m512d)__I);
5907 return (__m512d)__builtin_ia32_selectpd_512(__U,
5914 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
5921 return (__m512)__builtin_ia32_selectps_512(__U,
5929 return (__m512)__builtin_ia32_selectps_512(__U,
5931 (__v16sf)(__m512)__I);
5937 return (__m512)__builtin_ia32_selectps_512(__U,
5942#define _mm512_cvtt_roundpd_epu32(A, R) \
5943 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5944 (__v8si)_mm256_undefined_si256(), \
5945 (__mmask8)-1, (int)(R)))
5947#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
5948 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5949 (__v8si)(__m256i)(W), \
5950 (__mmask8)(U), (int)(R)))
5952#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
5953 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5954 (__v8si)_mm256_setzero_si256(), \
5955 (__mmask8)(U), (int)(R)))
5960 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5970 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5979 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5986#define _mm_roundscale_round_sd(A, B, imm, R) \
5987 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
5988 (__v2df)(__m128d)(B), \
5989 (__v2df)_mm_setzero_pd(), \
5990 (__mmask8)-1, (int)(imm), \
5993#define _mm_roundscale_sd(A, B, imm) \
5994 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
5995 (__v2df)(__m128d)(B), \
5996 (__v2df)_mm_setzero_pd(), \
5997 (__mmask8)-1, (int)(imm), \
5998 _MM_FROUND_CUR_DIRECTION))
6000#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6001 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6002 (__v2df)(__m128d)(B), \
6003 (__v2df)(__m128d)(W), \
6004 (__mmask8)(U), (int)(imm), \
6005 _MM_FROUND_CUR_DIRECTION))
6007#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6008 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6009 (__v2df)(__m128d)(B), \
6010 (__v2df)(__m128d)(W), \
6011 (__mmask8)(U), (int)(I), \
6014#define _mm_maskz_roundscale_sd(U, A, B, I) \
6015 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6016 (__v2df)(__m128d)(B), \
6017 (__v2df)_mm_setzero_pd(), \
6018 (__mmask8)(U), (int)(I), \
6019 _MM_FROUND_CUR_DIRECTION))
6021#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6022 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6023 (__v2df)(__m128d)(B), \
6024 (__v2df)_mm_setzero_pd(), \
6025 (__mmask8)(U), (int)(I), \
6028#define _mm_roundscale_round_ss(A, B, imm, R) \
6029 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6030 (__v4sf)(__m128)(B), \
6031 (__v4sf)_mm_setzero_ps(), \
6032 (__mmask8)-1, (int)(imm), \
6035#define _mm_roundscale_ss(A, B, imm) \
6036 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6037 (__v4sf)(__m128)(B), \
6038 (__v4sf)_mm_setzero_ps(), \
6039 (__mmask8)-1, (int)(imm), \
6040 _MM_FROUND_CUR_DIRECTION))
6042#define _mm_mask_roundscale_ss(W, U, A, B, I) \
6043 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6044 (__v4sf)(__m128)(B), \
6045 (__v4sf)(__m128)(W), \
6046 (__mmask8)(U), (int)(I), \
6047 _MM_FROUND_CUR_DIRECTION))
6049#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6050 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6051 (__v4sf)(__m128)(B), \
6052 (__v4sf)(__m128)(W), \
6053 (__mmask8)(U), (int)(I), \
6056#define _mm_maskz_roundscale_ss(U, A, B, I) \
6057 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6058 (__v4sf)(__m128)(B), \
6059 (__v4sf)_mm_setzero_ps(), \
6060 (__mmask8)(U), (int)(I), \
6061 _MM_FROUND_CUR_DIRECTION))
6063#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6064 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6065 (__v4sf)(__m128)(B), \
6066 (__v4sf)_mm_setzero_ps(), \
6067 (__mmask8)(U), (int)(I), \
6070#define _mm512_scalef_round_pd(A, B, R) \
6071 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6072 (__v8df)(__m512d)(B), \
6073 (__v8df)_mm512_undefined_pd(), \
6074 (__mmask8)-1, (int)(R)))
6076#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6077 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6078 (__v8df)(__m512d)(B), \
6079 (__v8df)(__m512d)(W), \
6080 (__mmask8)(U), (int)(R)))
6082#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6083 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6084 (__v8df)(__m512d)(B), \
6085 (__v8df)_mm512_setzero_pd(), \
6086 (__mmask8)(U), (int)(R)))
6091 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6102 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6112 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6120#define _mm512_scalef_round_ps(A, B, R) \
6121 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6122 (__v16sf)(__m512)(B), \
6123 (__v16sf)_mm512_undefined_ps(), \
6124 (__mmask16)-1, (int)(R)))
6126#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6127 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6128 (__v16sf)(__m512)(B), \
6129 (__v16sf)(__m512)(W), \
6130 (__mmask16)(U), (int)(R)))
6132#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6133 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6134 (__v16sf)(__m512)(B), \
6135 (__v16sf)_mm512_setzero_ps(), \
6136 (__mmask16)(U), (int)(R)))
6141 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6152 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6162 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6170#define _mm_scalef_round_sd(A, B, R) \
6171 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6172 (__v2df)(__m128d)(B), \
6173 (__v2df)_mm_setzero_pd(), \
6174 (__mmask8)-1, (int)(R)))
6179 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6188 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6195#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6196 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6197 (__v2df)(__m128d)(B), \
6198 (__v2df)(__m128d)(W), \
6199 (__mmask8)(U), (int)(R)))
6204 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6211#define _mm_maskz_scalef_round_sd(U, A, B, R) \
6212 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6213 (__v2df)(__m128d)(B), \
6214 (__v2df)_mm_setzero_pd(), \
6215 (__mmask8)(U), (int)(R)))
6217#define _mm_scalef_round_ss(A, B, R) \
6218 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6219 (__v4sf)(__m128)(B), \
6220 (__v4sf)_mm_setzero_ps(), \
6221 (__mmask8)-1, (int)(R)))
6226 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6235 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6242#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6243 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6244 (__v4sf)(__m128)(B), \
6245 (__v4sf)(__m128)(W), \
6246 (__mmask8)(U), (int)(R)))
6251 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6258#define _mm_maskz_scalef_round_ss(U, A, B, R) \
6259 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6260 (__v4sf)(__m128)(B), \
6261 (__v4sf)_mm_setzero_ps(), \
6267 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (
int)__B);
6273 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
6280 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
6287 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (
int)__B);
6293 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
6300 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
6305#define _mm512_shuffle_f32x4(A, B, imm) \
6306 ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6307 (__v16sf)(__m512)(B), (int)(imm)))
6309#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6310 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6311 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6312 (__v16sf)(__m512)(W)))
6314#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6315 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6316 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6317 (__v16sf)_mm512_setzero_ps()))
6319#define _mm512_shuffle_f64x2(A, B, imm) \
6320 ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6321 (__v8df)(__m512d)(B), (int)(imm)))
6323#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6324 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6325 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6326 (__v8df)(__m512d)(W)))
6328#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6329 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6330 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6331 (__v8df)_mm512_setzero_pd()))
6333#define _mm512_shuffle_i32x4(A, B, imm) \
6334 ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6335 (__v16si)(__m512i)(B), (int)(imm)))
6337#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6338 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6339 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6340 (__v16si)(__m512i)(W)))
6342#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6343 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6344 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6345 (__v16si)_mm512_setzero_si512()))
6347#define _mm512_shuffle_i64x2(A, B, imm) \
6348 ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6349 (__v8di)(__m512i)(B), (int)(imm)))
6351#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6352 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6353 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6354 (__v8di)(__m512i)(W)))
6356#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6357 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6358 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6359 (__v8di)_mm512_setzero_si512()))
6361#define _mm512_shuffle_pd(A, B, M) \
6362 ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6363 (__v8df)(__m512d)(B), (int)(M)))
6365#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6366 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6367 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6368 (__v8df)(__m512d)(W)))
6370#define _mm512_maskz_shuffle_pd(U, A, B, M) \
6371 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6372 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6373 (__v8df)_mm512_setzero_pd()))
6375#define _mm512_shuffle_ps(A, B, M) \
6376 ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6377 (__v16sf)(__m512)(B), (int)(M)))
6379#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6380 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6381 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6382 (__v16sf)(__m512)(W)))
6384#define _mm512_maskz_shuffle_ps(U, A, B, M) \
6385 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6386 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6387 (__v16sf)_mm512_setzero_ps()))
6389#define _mm_sqrt_round_sd(A, B, R) \
6390 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6391 (__v2df)(__m128d)(B), \
6392 (__v2df)_mm_setzero_pd(), \
6393 (__mmask8)-1, (int)(R)))
6398 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6405#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6406 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6407 (__v2df)(__m128d)(B), \
6408 (__v2df)(__m128d)(W), \
6409 (__mmask8)(U), (int)(R)))
6414 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6421#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6422 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6423 (__v2df)(__m128d)(B), \
6424 (__v2df)_mm_setzero_pd(), \
6425 (__mmask8)(U), (int)(R)))
6427#define _mm_sqrt_round_ss(A, B, R) \
6428 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6429 (__v4sf)(__m128)(B), \
6430 (__v4sf)_mm_setzero_ps(), \
6431 (__mmask8)-1, (int)(R)))
6436 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6443#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6444 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6445 (__v4sf)(__m128)(B), \
6446 (__v4sf)(__m128)(W), (__mmask8)(U), \
6452 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6459#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6460 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6461 (__v4sf)(__m128)(B), \
6462 (__v4sf)_mm_setzero_ps(), \
6463 (__mmask8)(U), (int)(R)))
6467 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6468 0, 1, 2, 3, 0, 1, 2, 3,
6469 0, 1, 2, 3, 0, 1, 2, 3);
6474 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__M,
6481 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__M,
6488 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6489 0, 1, 2, 3, 0, 1, 2, 3);
6495 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__M,
6503 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__M,
6510 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6511 0, 1, 2, 3, 0, 1, 2, 3,
6512 0, 1, 2, 3, 0, 1, 2, 3);
6517 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
6524 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
6531 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6532 0, 1, 2, 3, 0, 1, 2, 3);
6538 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
6546 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
6553 return (__m512d)__builtin_ia32_selectpd_512(__M,
6560 return (__m512d)__builtin_ia32_selectpd_512(__M,
6567 return (__m512)__builtin_ia32_selectps_512(__M,
6574 return (__m512)__builtin_ia32_selectps_512(__M,
6582 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6590 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6591 (__v16qi) __O, __M);
6597 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6605 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *)
__P, (__v16si) __A, __M);
6611 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6619 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6620 (__v16hi) __O, __M);
6626 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6634 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*)
__P, (__v16si) __A, __M);
6640 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6648 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6649 (__v16qi) __O, __M);
6655 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6663 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *)
__P, (__v8di) __A, __M);
6669 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6677 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6684 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6692 __builtin_ia32_pmovsqd512mem_mask ((__v8si *)
__P, (__v8di) __A, __M);
6698 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6706 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6713 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6721 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *)
__P, (__v8di) __A, __M);
6727 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6735 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6743 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6751 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *)
__P, (__v16si) __A, __M);
6757 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6765 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6773 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6781 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*)
__P, (__v16si) __A, __M);
6787 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6795 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6803 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6811 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *)
__P, (__v8di) __A, __M);
6817 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6825 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6832 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6840 __builtin_ia32_pmovusqd512mem_mask ((__v8si*)
__P, (__v8di) __A, __M);
6846 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6854 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6861 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6869 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*)
__P, (__v8di) __A, __M);
6875 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6883 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6884 (__v16qi) __O, __M);
6890 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6898 __builtin_ia32_pmovdb512mem_mask ((__v16qi *)
__P, (__v16si) __A, __M);
6904 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6912 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6913 (__v16hi) __O, __M);
6919 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6927 __builtin_ia32_pmovdw512mem_mask ((__v16hi *)
__P, (__v16si) __A, __M);
6933 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6941 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6942 (__v16qi) __O, __M);
6948 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6956 __builtin_ia32_pmovqb512mem_mask ((__v16qi *)
__P, (__v8di) __A, __M);
6962 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6970 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6977 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6985 __builtin_ia32_pmovqd512mem_mask ((__v8si *)
__P, (__v8di) __A, __M);
6991 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
6999 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7006 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7014 __builtin_ia32_pmovqw512mem_mask ((__v8hi *)
__P, (__v8di) __A, __M);
7017#define _mm512_extracti32x4_epi32(A, imm) \
7018 ((__m128i)__builtin_ia32_extracti32x4_mask( \
7019 (__v16si)(__m512i)(A), (int)(imm), (__v4si)_mm_setzero_si128(), \
7022#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7023 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7024 (__v4si)(__m128i)(W), \
7027#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7028 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7029 (__v4si)_mm_setzero_si128(), \
7032#define _mm512_extracti64x4_epi64(A, imm) \
7033 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7034 (__v4di)_mm256_setzero_si256(), \
7037#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7038 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7039 (__v4di)(__m256i)(W), \
7042#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7043 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7044 (__v4di)_mm256_setzero_si256(), \
7047#define _mm512_insertf64x4(A, B, imm) \
7048 ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7049 (__v4df)(__m256d)(B), (int)(imm)))
7051#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7052 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7053 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7054 (__v8df)(__m512d)(W)))
7056#define _mm512_maskz_insertf64x4(U, A, B, imm) \
7057 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7058 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7059 (__v8df)_mm512_setzero_pd()))
7061#define _mm512_inserti64x4(A, B, imm) \
7062 ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7063 (__v4di)(__m256i)(B), (int)(imm)))
7065#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7066 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7067 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7068 (__v8di)(__m512i)(W)))
7070#define _mm512_maskz_inserti64x4(U, A, B, imm) \
7071 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7072 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7073 (__v8di)_mm512_setzero_si512()))
7075#define _mm512_insertf32x4(A, B, imm) \
7076 ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7077 (__v4sf)(__m128)(B), (int)(imm)))
7079#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7080 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7081 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7082 (__v16sf)(__m512)(W)))
7084#define _mm512_maskz_insertf32x4(U, A, B, imm) \
7085 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7086 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7087 (__v16sf)_mm512_setzero_ps()))
7089#define _mm512_inserti32x4(A, B, imm) \
7090 ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7091 (__v4si)(__m128i)(B), (int)(imm)))
7093#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7094 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7095 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7096 (__v16si)(__m512i)(W)))
7098#define _mm512_maskz_inserti32x4(U, A, B, imm) \
7099 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7100 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7101 (__v16si)_mm512_setzero_si512()))
7103#define _mm512_getmant_round_pd(A, B, C, R) \
7104 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7105 (int)(((C)<<2) | (B)), \
7106 (__v8df)_mm512_undefined_pd(), \
7107 (__mmask8)-1, (int)(R)))
7109#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7110 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7111 (int)(((C)<<2) | (B)), \
7112 (__v8df)(__m512d)(W), \
7113 (__mmask8)(U), (int)(R)))
7115#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7116 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7117 (int)(((C)<<2) | (B)), \
7118 (__v8df)_mm512_setzero_pd(), \
7119 (__mmask8)(U), (int)(R)))
7121#define _mm512_getmant_pd(A, B, C) \
7122 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7123 (int)(((C)<<2) | (B)), \
7124 (__v8df)_mm512_setzero_pd(), \
7126 _MM_FROUND_CUR_DIRECTION))
7128#define _mm512_mask_getmant_pd(W, U, A, B, C) \
7129 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7130 (int)(((C)<<2) | (B)), \
7131 (__v8df)(__m512d)(W), \
7133 _MM_FROUND_CUR_DIRECTION))
7135#define _mm512_maskz_getmant_pd(U, A, B, C) \
7136 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7137 (int)(((C)<<2) | (B)), \
7138 (__v8df)_mm512_setzero_pd(), \
7140 _MM_FROUND_CUR_DIRECTION))
7142#define _mm512_getmant_round_ps(A, B, C, R) \
7143 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7144 (int)(((C)<<2) | (B)), \
7145 (__v16sf)_mm512_undefined_ps(), \
7146 (__mmask16)-1, (int)(R)))
7148#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7149 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7150 (int)(((C)<<2) | (B)), \
7151 (__v16sf)(__m512)(W), \
7152 (__mmask16)(U), (int)(R)))
7154#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7155 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7156 (int)(((C)<<2) | (B)), \
7157 (__v16sf)_mm512_setzero_ps(), \
7158 (__mmask16)(U), (int)(R)))
7160#define _mm512_getmant_ps(A, B, C) \
7161 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7162 (int)(((C)<<2)|(B)), \
7163 (__v16sf)_mm512_undefined_ps(), \
7165 _MM_FROUND_CUR_DIRECTION))
7167#define _mm512_mask_getmant_ps(W, U, A, B, C) \
7168 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7169 (int)(((C)<<2)|(B)), \
7170 (__v16sf)(__m512)(W), \
7172 _MM_FROUND_CUR_DIRECTION))
7174#define _mm512_maskz_getmant_ps(U, A, B, C) \
7175 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7176 (int)(((C)<<2)|(B)), \
7177 (__v16sf)_mm512_setzero_ps(), \
7179 _MM_FROUND_CUR_DIRECTION))
7181#define _mm512_getexp_round_pd(A, R) \
7182 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7183 (__v8df)_mm512_undefined_pd(), \
7184 (__mmask8)-1, (int)(R)))
7186#define _mm512_mask_getexp_round_pd(W, U, A, R) \
7187 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7188 (__v8df)(__m512d)(W), \
7189 (__mmask8)(U), (int)(R)))
7191#define _mm512_maskz_getexp_round_pd(U, A, R) \
7192 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7193 (__v8df)_mm512_setzero_pd(), \
7194 (__mmask8)(U), (int)(R)))
7199 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7208 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7217 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7223#define _mm512_getexp_round_ps(A, R) \
7224 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7225 (__v16sf)_mm512_undefined_ps(), \
7226 (__mmask16)-1, (int)(R)))
7228#define _mm512_mask_getexp_round_ps(W, U, A, R) \
7229 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7230 (__v16sf)(__m512)(W), \
7231 (__mmask16)(U), (int)(R)))
7233#define _mm512_maskz_getexp_round_ps(U, A, R) \
7234 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7235 (__v16sf)_mm512_setzero_ps(), \
7236 (__mmask16)(U), (int)(R)))
7241 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7250 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7259 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7265#define _mm512_i64gather_ps(index, addr, scale) \
7266 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7267 (void const *)(addr), \
7268 (__v8di)(__m512i)(index), (__mmask8)-1, \
7271#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7272 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7273 (void const *)(addr), \
7274 (__v8di)(__m512i)(index), \
7275 (__mmask8)(mask), (int)(scale)))
7277#define _mm512_i64gather_epi32(index, addr, scale) \
7278 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7279 (void const *)(addr), \
7280 (__v8di)(__m512i)(index), \
7281 (__mmask8)-1, (int)(scale)))
7283#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7284 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7285 (void const *)(addr), \
7286 (__v8di)(__m512i)(index), \
7287 (__mmask8)(mask), (int)(scale)))
7289#define _mm512_i64gather_pd(index, addr, scale) \
7290 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7291 (void const *)(addr), \
7292 (__v8di)(__m512i)(index), (__mmask8)-1, \
7295#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7296 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7297 (void const *)(addr), \
7298 (__v8di)(__m512i)(index), \
7299 (__mmask8)(mask), (int)(scale)))
7301#define _mm512_i64gather_epi64(index, addr, scale) \
7302 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7303 (void const *)(addr), \
7304 (__v8di)(__m512i)(index), (__mmask8)-1, \
7307#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7308 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7309 (void const *)(addr), \
7310 (__v8di)(__m512i)(index), \
7311 (__mmask8)(mask), (int)(scale)))
7313#define _mm512_i32gather_ps(index, addr, scale) \
7314 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7315 (void const *)(addr), \
7316 (__v16si)(__m512)(index), \
7317 (__mmask16)-1, (int)(scale)))
7319#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7320 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7321 (void const *)(addr), \
7322 (__v16si)(__m512)(index), \
7323 (__mmask16)(mask), (int)(scale)))
7325#define _mm512_i32gather_epi32(index, addr, scale) \
7326 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7327 (void const *)(addr), \
7328 (__v16si)(__m512i)(index), \
7329 (__mmask16)-1, (int)(scale)))
7331#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7332 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7333 (void const *)(addr), \
7334 (__v16si)(__m512i)(index), \
7335 (__mmask16)(mask), (int)(scale)))
7337#define _mm512_i32gather_pd(index, addr, scale) \
7338 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7339 (void const *)(addr), \
7340 (__v8si)(__m256i)(index), (__mmask8)-1, \
7343#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7344 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7345 (void const *)(addr), \
7346 (__v8si)(__m256i)(index), \
7347 (__mmask8)(mask), (int)(scale)))
7349#define _mm512_i32gather_epi64(index, addr, scale) \
7350 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7351 (void const *)(addr), \
7352 (__v8si)(__m256i)(index), (__mmask8)-1, \
7355#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7356 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7357 (void const *)(addr), \
7358 (__v8si)(__m256i)(index), \
7359 (__mmask8)(mask), (int)(scale)))
7361#define _mm512_i64scatter_ps(addr, index, v1, scale) \
7362 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7363 (__v8di)(__m512i)(index), \
7364 (__v8sf)(__m256)(v1), (int)(scale))
7366#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7367 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7368 (__v8di)(__m512i)(index), \
7369 (__v8sf)(__m256)(v1), (int)(scale))
7371#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7372 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7373 (__v8di)(__m512i)(index), \
7374 (__v8si)(__m256i)(v1), (int)(scale))
7376#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7377 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7378 (__v8di)(__m512i)(index), \
7379 (__v8si)(__m256i)(v1), (int)(scale))
7381#define _mm512_i64scatter_pd(addr, index, v1, scale) \
7382 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7383 (__v8di)(__m512i)(index), \
7384 (__v8df)(__m512d)(v1), (int)(scale))
7386#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7387 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7388 (__v8di)(__m512i)(index), \
7389 (__v8df)(__m512d)(v1), (int)(scale))
7391#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7392 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7393 (__v8di)(__m512i)(index), \
7394 (__v8di)(__m512i)(v1), (int)(scale))
7396#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7397 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7398 (__v8di)(__m512i)(index), \
7399 (__v8di)(__m512i)(v1), (int)(scale))
7401#define _mm512_i32scatter_ps(addr, index, v1, scale) \
7402 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7403 (__v16si)(__m512i)(index), \
7404 (__v16sf)(__m512)(v1), (int)(scale))
7406#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7407 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7408 (__v16si)(__m512i)(index), \
7409 (__v16sf)(__m512)(v1), (int)(scale))
7411#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7412 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7413 (__v16si)(__m512i)(index), \
7414 (__v16si)(__m512i)(v1), (int)(scale))
7416#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7417 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7418 (__v16si)(__m512i)(index), \
7419 (__v16si)(__m512i)(v1), (int)(scale))
7421#define _mm512_i32scatter_pd(addr, index, v1, scale) \
7422 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7423 (__v8si)(__m256i)(index), \
7424 (__v8df)(__m512d)(v1), (int)(scale))
7426#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7427 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7428 (__v8si)(__m256i)(index), \
7429 (__v8df)(__m512d)(v1), (int)(scale))
7431#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7432 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7433 (__v8si)(__m256i)(index), \
7434 (__v8di)(__m512i)(v1), (int)(scale))
7436#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7437 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7438 (__v8si)(__m256i)(index), \
7439 (__v8di)(__m512i)(v1), (int)(scale))
7444 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7451#define _mm_fmadd_round_ss(A, B, C, R) \
7452 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7453 (__v4sf)(__m128)(B), \
7454 (__v4sf)(__m128)(C), (__mmask8)-1, \
7457#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7458 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7459 (__v4sf)(__m128)(A), \
7460 (__v4sf)(__m128)(B), (__mmask8)(U), \
7466 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7473#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7474 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7475 (__v4sf)(__m128)(B), \
7476 (__v4sf)(__m128)(C), (__mmask8)(U), \
7482 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7489#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7490 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7491 (__v4sf)(__m128)(X), \
7492 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7498 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7505#define _mm_fmsub_round_ss(A, B, C, R) \
7506 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7507 (__v4sf)(__m128)(B), \
7508 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7511#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7512 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7513 (__v4sf)(__m128)(A), \
7514 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7520 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7527#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7528 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7529 (__v4sf)(__m128)(B), \
7530 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7536 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7543#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7544 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7545 (__v4sf)(__m128)(X), \
7546 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7552 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7559#define _mm_fnmadd_round_ss(A, B, C, R) \
7560 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7561 -(__v4sf)(__m128)(B), \
7562 (__v4sf)(__m128)(C), (__mmask8)-1, \
7565#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7566 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7567 -(__v4sf)(__m128)(A), \
7568 (__v4sf)(__m128)(B), (__mmask8)(U), \
7574 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7581#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7582 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7583 -(__v4sf)(__m128)(B), \
7584 (__v4sf)(__m128)(C), (__mmask8)(U), \
7590 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7597#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7598 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7599 -(__v4sf)(__m128)(X), \
7600 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7606 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7613#define _mm_fnmsub_round_ss(A, B, C, R) \
7614 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7615 -(__v4sf)(__m128)(B), \
7616 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7619#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7620 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7621 -(__v4sf)(__m128)(A), \
7622 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7628 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7635#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
7636 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7637 -(__v4sf)(__m128)(B), \
7638 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7644 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7651#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
7652 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7653 -(__v4sf)(__m128)(X), \
7654 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7660 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7667#define _mm_fmadd_round_sd(A, B, C, R) \
7668 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7669 (__v2df)(__m128d)(B), \
7670 (__v2df)(__m128d)(C), (__mmask8)-1, \
7673#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
7674 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7675 (__v2df)(__m128d)(A), \
7676 (__v2df)(__m128d)(B), (__mmask8)(U), \
7682 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7689#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
7690 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7691 (__v2df)(__m128d)(B), \
7692 (__v2df)(__m128d)(C), (__mmask8)(U), \
7698 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
7705#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
7706 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
7707 (__v2df)(__m128d)(X), \
7708 (__v2df)(__m128d)(Y), (__mmask8)(U), \
7714 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7721#define _mm_fmsub_round_sd(A, B, C, R) \
7722 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7723 (__v2df)(__m128d)(B), \
7724 -(__v2df)(__m128d)(C), (__mmask8)-1, \
7727#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
7728 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7729 (__v2df)(__m128d)(A), \
7730 -(__v2df)(__m128d)(B), (__mmask8)(U), \
7736 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7743#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
7744 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7745 (__v2df)(__m128d)(B), \
7746 -(__v2df)(__m128d)(C), \
7747 (__mmask8)(U), (int)(R)))
7752 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
7759#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
7760 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
7761 (__v2df)(__m128d)(X), \
7762 (__v2df)(__m128d)(Y), \
7763 (__mmask8)(U), (int)(R)))
7768 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7775#define _mm_fnmadd_round_sd(A, B, C, R) \
7776 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7777 -(__v2df)(__m128d)(B), \
7778 (__v2df)(__m128d)(C), (__mmask8)-1, \
7781#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
7782 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7783 -(__v2df)(__m128d)(A), \
7784 (__v2df)(__m128d)(B), (__mmask8)(U), \
7790 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7797#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
7798 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7799 -(__v2df)(__m128d)(B), \
7800 (__v2df)(__m128d)(C), (__mmask8)(U), \
7806 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
7813#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
7814 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
7815 -(__v2df)(__m128d)(X), \
7816 (__v2df)(__m128d)(Y), (__mmask8)(U), \
7822 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7829#define _mm_fnmsub_round_sd(A, B, C, R) \
7830 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7831 -(__v2df)(__m128d)(B), \
7832 -(__v2df)(__m128d)(C), (__mmask8)-1, \
7835#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
7836 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7837 -(__v2df)(__m128d)(A), \
7838 -(__v2df)(__m128d)(B), (__mmask8)(U), \
7844 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7851#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
7852 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7853 -(__v2df)(__m128d)(B), \
7854 -(__v2df)(__m128d)(C), \
7861 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
7868#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
7869 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
7870 -(__v2df)(__m128d)(X), \
7871 (__v2df)(__m128d)(Y), \
7872 (__mmask8)(U), (int)(R)))
7874#define _mm512_permutex_pd(X, C) \
7875 ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)))
7877#define _mm512_mask_permutex_pd(W, U, X, C) \
7878 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7879 (__v8df)_mm512_permutex_pd((X), (C)), \
7880 (__v8df)(__m512d)(W)))
7882#define _mm512_maskz_permutex_pd(U, X, C) \
7883 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7884 (__v8df)_mm512_permutex_pd((X), (C)), \
7885 (__v8df)_mm512_setzero_pd()))
7887#define _mm512_permutex_epi64(X, C) \
7888 ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)))
7890#define _mm512_mask_permutex_epi64(W, U, X, C) \
7891 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7892 (__v8di)_mm512_permutex_epi64((X), (C)), \
7893 (__v8di)(__m512i)(W)))
7895#define _mm512_maskz_permutex_epi64(U, X, C) \
7896 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7897 (__v8di)_mm512_permutex_epi64((X), (C)), \
7898 (__v8di)_mm512_setzero_si512()))
7902 return (__m512d)__builtin_ia32_permvardf512((__v8df)
__Y, (__v8di) __X);
7908 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
7915 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
7922 return (__m512i)__builtin_ia32_permvardi512((__v8di)
__Y, (__v8di)__X);
7927 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
7935 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
7942 return (__m512)__builtin_ia32_permvarsf512((__v16sf)
__Y, (__v16si)__X);
7947 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
7954 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
7961 return (__m512i)__builtin_ia32_permvarsi512((__v16si)
__Y, (__v16si)__X);
7964#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
7968 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
7976 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
7981#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8010 return (
unsigned char)__builtin_ia32_kortestchi(__A, __B);
8015 return (
unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8020 *__C = (
unsigned char)__builtin_ia32_kortestchi(__A, __B);
8021 return (
unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8039#define _kand_mask16 _mm512_kand
8040#define _kandn_mask16 _mm512_kandn
8041#define _knot_mask16 _mm512_knot
8042#define _kor_mask16 _mm512_kor
8043#define _kxnor_mask16 _mm512_kxnor
8044#define _kxor_mask16 _mm512_kxor
8046#define _kshiftli_mask16(A, I) \
8047 ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)))
8049#define _kshiftri_mask16(A, I) \
8050 ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)))
8052static __inline__
unsigned int
8054 return (
unsigned int)__builtin_ia32_kmovw((
__mmask16)__A);
8076 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)
__P);
8083 return (__m512i) __builtin_nontemporal_load((
const __v8di_aligned *)
__P);
8090 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)
__P);
8096 typedef __v16sf __v16sf_aligned
__attribute__((aligned(64)));
8097 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)
__P);
8102 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8109 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8117 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8124 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8132 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8139 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8147 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8154 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8160#define _mm_cmp_round_ss_mask(X, Y, P, R) \
8161 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8162 (__v4sf)(__m128)(Y), (int)(P), \
8163 (__mmask8)-1, (int)(R)))
8165#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8166 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8167 (__v4sf)(__m128)(Y), (int)(P), \
8168 (__mmask8)(M), (int)(R)))
8170#define _mm_cmp_ss_mask(X, Y, P) \
8171 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8172 (__v4sf)(__m128)(Y), (int)(P), \
8174 _MM_FROUND_CUR_DIRECTION))
8176#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8177 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8178 (__v4sf)(__m128)(Y), (int)(P), \
8180 _MM_FROUND_CUR_DIRECTION))
8182#define _mm_cmp_round_sd_mask(X, Y, P, R) \
8183 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8184 (__v2df)(__m128d)(Y), (int)(P), \
8185 (__mmask8)-1, (int)(R)))
8187#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8188 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8189 (__v2df)(__m128d)(Y), (int)(P), \
8190 (__mmask8)(M), (int)(R)))
8192#define _mm_cmp_sd_mask(X, Y, P) \
8193 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8194 (__v2df)(__m128d)(Y), (int)(P), \
8196 _MM_FROUND_CUR_DIRECTION))
8198#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8199 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8200 (__v2df)(__m128d)(Y), (int)(P), \
8202 _MM_FROUND_CUR_DIRECTION))
8257 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8258 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8263 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8270 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8278 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8279 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8284 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8291 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8298 return __builtin_ia32_selectss_128(__U,
_mm_move_ss(__A, __B), __W);
8303 return __builtin_ia32_selectss_128(__U,
_mm_move_ss(__A, __B),
8309 return __builtin_ia32_selectsd_128(__U,
_mm_move_sd(__A, __B), __W);
8314 return __builtin_ia32_selectsd_128(__U,
_mm_move_sd(__A, __B),
8321 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8327 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8333 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8337 return (__m128) __builtin_ia32_loadss128_mask ((
const __v4sf *) __A, src, __U & 1);
8343 return (__m128)__builtin_ia32_loadss128_mask ((
const __v4sf *) __A,
8351 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8355 return (__m128d) __builtin_ia32_loadsd128_mask ((
const __v2df *) __A, src, __U & 1);
8361 return (__m128d) __builtin_ia32_loadsd128_mask ((
const __v2df *) __A,
8366#define _mm512_shuffle_epi32(A, I) \
8367 ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)))
8369#define _mm512_mask_shuffle_epi32(W, U, A, I) \
8370 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8371 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8372 (__v16si)(__m512i)(W)))
8374#define _mm512_maskz_shuffle_epi32(U, A, I) \
8375 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8376 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8377 (__v16si)_mm512_setzero_si512()))
8382 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8390 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8398 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8406 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8414 return (__m512d) __builtin_ia32_expandloaddf512_mask ((
const __v8df *)
__P,
8422 return (__m512d) __builtin_ia32_expandloaddf512_mask ((
const __v8df *)
__P,
8430 return (__m512i) __builtin_ia32_expandloaddi512_mask ((
const __v8di *)
__P,
8438 return (__m512i) __builtin_ia32_expandloaddi512_mask ((
const __v8di *)
__P,
8446 return (__m512) __builtin_ia32_expandloadsf512_mask ((
const __v16sf *)
__P,
8454 return (__m512) __builtin_ia32_expandloadsf512_mask ((
const __v16sf *)
__P,
8462 return (__m512i) __builtin_ia32_expandloadsi512_mask ((
const __v16si *)
__P,
8470 return (__m512i) __builtin_ia32_expandloadsi512_mask ((
const __v16si *)
__P,
8478 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8486 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8494 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8502 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8507#define _mm512_cvt_roundps_pd(A, R) \
8508 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8509 (__v8df)_mm512_undefined_pd(), \
8510 (__mmask8)-1, (int)(R)))
8512#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8513 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8514 (__v8df)(__m512d)(W), \
8515 (__mmask8)(U), (int)(R)))
8517#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8518 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8519 (__v8df)_mm512_setzero_pd(), \
8520 (__mmask8)(U), (int)(R)))
8522static __inline__ __m512d
8524 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8529 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
8536 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
8553 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U, (__v8df)__A,
8559 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U, (__v8df)__A,
8565 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U, (__v16sf)__A,
8571 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U, (__v16sf)__A,
8578 __builtin_ia32_compressstoredf512_mask ((__v8df *)
__P, (__v8df) __A,
8585 __builtin_ia32_compressstoredi512_mask ((__v8di *)
__P, (__v8di) __A,
8592 __builtin_ia32_compressstoresf512_mask ((__v16sf *)
__P, (__v16sf) __A,
8599 __builtin_ia32_compressstoresi512_mask ((__v16si *)
__P, (__v16si) __A,
8603#define _mm_cvt_roundsd_ss(A, B, R) \
8604 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8605 (__v2df)(__m128d)(B), \
8606 (__v4sf)_mm_undefined_ps(), \
8607 (__mmask8)-1, (int)(R)))
8609#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
8610 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8611 (__v2df)(__m128d)(B), \
8612 (__v4sf)(__m128)(W), \
8613 (__mmask8)(U), (int)(R)))
8615#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
8616 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8617 (__v2df)(__m128d)(B), \
8618 (__v4sf)_mm_setzero_ps(), \
8619 (__mmask8)(U), (int)(R)))
8623 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8631 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8637#define _mm_cvtss_i32 _mm_cvtss_si32
8638#define _mm_cvtsd_i32 _mm_cvtsd_si32
8639#define _mm_cvti32_sd _mm_cvtsi32_sd
8640#define _mm_cvti32_ss _mm_cvtsi32_ss
8642#define _mm_cvtss_i64 _mm_cvtss_si64
8643#define _mm_cvtsd_i64 _mm_cvtsd_si64
8644#define _mm_cvti64_sd _mm_cvtsi64_sd
8645#define _mm_cvti64_ss _mm_cvtsi64_ss
8649#define _mm_cvt_roundi64_sd(A, B, R) \
8650 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
8653#define _mm_cvt_roundsi64_sd(A, B, R) \
8654 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
8658#define _mm_cvt_roundsi32_ss(A, B, R) \
8659 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
8661#define _mm_cvt_roundi32_ss(A, B, R) \
8662 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
8665#define _mm_cvt_roundsi64_ss(A, B, R) \
8666 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
8669#define _mm_cvt_roundi64_ss(A, B, R) \
8670 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
8674#define _mm_cvt_roundss_sd(A, B, R) \
8675 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8676 (__v4sf)(__m128)(B), \
8677 (__v2df)_mm_undefined_pd(), \
8678 (__mmask8)-1, (int)(R)))
8680#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
8681 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8682 (__v4sf)(__m128)(B), \
8683 (__v2df)(__m128d)(W), \
8684 (__mmask8)(U), (int)(R)))
8686#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
8687 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8688 (__v4sf)(__m128)(B), \
8689 (__v2df)_mm_setzero_pd(), \
8690 (__mmask8)(U), (int)(R)))
8695 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
8704 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
8718#define _mm_cvt_roundu64_sd(A, B, R) \
8719 ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
8720 (unsigned long long)(B), (int)(R)))
8723_mm_cvtu64_sd (__m128d __A,
unsigned long long __B)
8730#define _mm_cvt_roundu32_ss(A, B, R) \
8731 ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
8742#define _mm_cvt_roundu64_ss(A, B, R) \
8743 ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
8744 (unsigned long long)(B), (int)(R)))
8747_mm_cvtu64_ss (__m128 __A,
unsigned long long __B)
8756 return (__m512i) __builtin_ia32_selectd_512(__M,
8763 return (__m512i) __builtin_ia32_selectq_512(__M,
8769 char __e63,
char __e62,
char __e61,
char __e60,
char __e59,
char __e58,
8770 char __e57,
char __e56,
char __e55,
char __e54,
char __e53,
char __e52,
8771 char __e51,
char __e50,
char __e49,
char __e48,
char __e47,
char __e46,
8772 char __e45,
char __e44,
char __e43,
char __e42,
char __e41,
char __e40,
8773 char __e39,
char __e38,
char __e37,
char __e36,
char __e35,
char __e34,
8774 char __e33,
char __e32,
char __e31,
char __e30,
char __e29,
char __e28,
8775 char __e27,
char __e26,
char __e25,
char __e24,
char __e23,
char __e22,
8776 char __e21,
char __e20,
char __e19,
char __e18,
char __e17,
char __e16,
8777 char __e15,
char __e14,
char __e13,
char __e12,
char __e11,
char __e10,
8778 char __e9,
char __e8,
char __e7,
char __e6,
char __e5,
char __e4,
char __e3,
8779 char __e2,
char __e1,
char __e0) {
8781 return __extension__ (__m512i)(__v64qi)
8782 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
8783 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
8784 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
8785 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
8786 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
8787 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
8788 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
8789 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
8793 short __e31,
short __e30,
short __e29,
short __e28,
short __e27,
8794 short __e26,
short __e25,
short __e24,
short __e23,
short __e22,
8795 short __e21,
short __e20,
short __e19,
short __e18,
short __e17,
8796 short __e16,
short __e15,
short __e14,
short __e13,
short __e12,
8797 short __e11,
short __e10,
short __e9,
short __e8,
short __e7,
short __e6,
8798 short __e5,
short __e4,
short __e3,
short __e2,
short __e1,
short __e0) {
8799 return __extension__ (__m512i)(__v32hi)
8800 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
8801 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
8802 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
8803 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
8807 int __A,
int __B,
int __C,
int __D,
int __E,
int __F,
int __G,
int __H,
8808 int __I,
int __J,
int __K,
int __L,
int __M,
int __N,
int __O,
int __P) {
8809 return __extension__ (__m512i)(__v16si)
8810 {
__P, __O, __N, __M,
__L, __K, __J, __I,
8811 __H, __G, __F, __E,
__D, __C, __B, __A };
8815 int e0,
int e1,
int e2,
int e3,
int e4,
int e5,
int e6,
int e7,
int e8,
8816 int e9,
int e10,
int e11,
int e12,
int e13,
int e14,
int e15) {
8817 return _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4,
8823 long long __E,
long long __F,
long long __G,
long long __H) {
8824 return __extension__ (__m512i) (__v8di)
8825 { __H, __G, __F, __E,
__D, __C, __B, __A };
8830 long long e4,
long long e5,
long long e6,
long long e7) {
8836 double __F,
double __G,
double __H) {
8837 return __extension__ (__m512d)
8838 { __H, __G, __F, __E,
__D, __C, __B, __A };
8843 double e6,
double e7) {
8849 float __G,
float __H,
float __I,
float __J,
float __K,
float __L,
8850 float __M,
float __N,
float __O,
float __P) {
8851 return __extension__ (__m512)
8852 {
__P, __O, __N, __M,
__L, __K, __J, __I,
8853 __H, __G, __F, __E,
__D, __C, __B, __A };
8858 float e6,
float e7,
float e8,
float e9,
float e10,
float e11,
8859 float e12,
float e13,
float e14,
float e15) {
8860 return _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3,
8902 return __builtin_reduce_add((__v8di)__W);
8907 return __builtin_reduce_mul((__v8di)__W);
8912 return __builtin_reduce_and((__v8di)__W);
8917 return __builtin_reduce_or((__v8di)__W);
8923 return __builtin_reduce_add((__v8di)__W);
8929 return __builtin_reduce_mul((__v8di)__W);
8935 return __builtin_reduce_and((__v8di)__W);
8941 return __builtin_reduce_or((__v8di)__W);
8948 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
8952 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
8958 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
8964 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
8969 return __builtin_reduce_add((__v16si)__W);
8974 return __builtin_reduce_mul((__v16si)__W);
8979 return __builtin_reduce_and((__v16si)__W);
8984 return __builtin_reduce_or((__v16si)__W);
8990 return __builtin_reduce_add((__v16si)__W);
8996 return __builtin_reduce_mul((__v16si)__W);
9002 return __builtin_reduce_and((__v16si)__W);
9008 return __builtin_reduce_or((__v16si)__W);
9013 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9018 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9024 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9030 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9035 return __builtin_reduce_max((__v8di)__V);
9040 return __builtin_reduce_max((__v8du)__V);
9045 return __builtin_reduce_min((__v8di)__V);
9050 return __builtin_reduce_min((__v8du)__V);
9056 return __builtin_reduce_max((__v8di)__V);
9062 return __builtin_reduce_max((__v8du)__V);
9068 return __builtin_reduce_min((__v8di)__V);
9074 return __builtin_reduce_min((__v8du)__V);
9078 return __builtin_reduce_max((__v16si)__V);
9083 return __builtin_reduce_max((__v16su)__V);
9088 return __builtin_reduce_min((__v16si)__V);
9093 return __builtin_reduce_min((__v16su)__V);
9099 return __builtin_reduce_max((__v16si)__V);
9105 return __builtin_reduce_max((__v16su)__V);
9111 return __builtin_reduce_min((__v16si)__V);
9117 return __builtin_reduce_min((__v16su)__V);
9122 return __builtin_ia32_reduce_fmax_pd512(__V);
9127 return __builtin_ia32_reduce_fmin_pd512(__V);
9133 return __builtin_ia32_reduce_fmax_pd512(__V);
9139 return __builtin_ia32_reduce_fmin_pd512(__V);
9144 return __builtin_ia32_reduce_fmax_ps512(__V);
9149 return __builtin_ia32_reduce_fmin_ps512(__V);
9155 return __builtin_ia32_reduce_fmax_ps512(__V);
9161 return __builtin_ia32_reduce_fmin_ps512(__V);
9177 __v16si
__b = (__v16si)__A;
9196#define _mm512_i32logather_pd(vindex, base_addr, scale) \
9197 _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9220#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale) \
9221 _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex), \
9222 (base_addr), (scale))
9239#define _mm512_i32logather_epi64(vindex, base_addr, scale) \
9240 _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9262#define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale) \
9263 _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex), \
9264 (base_addr), (scale))
9280#define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale) \
9281 _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale))
9301#define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale) \
9302 _mm512_mask_i32scatter_pd((base_addr), (mask), \
9303 _mm512_castsi512_si256(vindex), (v1), (scale))
9319#define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale) \
9320 _mm512_i32scatter_epi64((base_addr), \
9321 _mm512_castsi512_si256(vindex), (v1), (scale))
9340#define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale) \
9341 _mm512_mask_i32scatter_epi64((base_addr), (mask), \
9342 _mm512_castsi512_si256(vindex), (v1), (scale))
9344#undef __DEFAULT_FN_ATTRS512
9345#undef __DEFAULT_FN_ATTRS128
9346#undef __DEFAULT_FN_ATTRS
9347#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
9348#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
9349#undef __DEFAULT_FN_ATTRS_CONSTEXPR
#define __DEFAULT_FN_ATTRS
static __inline__ vector float vector float __b
static __inline__ uint32_t volatile uint32_t * __p
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
#define __DEFAULT_FN_ATTRS512_CONSTEXPR
#define __DEFAULT_FN_ATTRS512
#define __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32(__m128d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR _cvtmask16_u32(__mmask16 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _cvtu32_mask16(unsigned int __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi64(long long __d)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi64(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f32x4(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd_si512(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi64(__m512i __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi64(__m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi64(__m512i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_cvtsi512_si32(__m512i __A)
Moves the least significant 32 bits of a vector of [16 x i32] to a 32-bit signed integer value.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_ps(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512(__m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi32(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_pslo(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_ps(__m512 __A, __m512 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi64(__m512i __W)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS _store_mask16(__mmask16 *__A, __mmask16 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd(void *__P, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss(__mmask8 __U, const float *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi32(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullox_epi64(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64(void *__P, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi32(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi32(int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7, int e8, int e9, int e10, int e11, int e12, int e13, int e14, int e15)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi64(__m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline __m256i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_si256(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movehdup_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi64(__m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_ps(float e0, float e1, float e2, float e3, float e4, float e5, float e6, float e7, float e8, float e9, float e10, float e11, float e12, float e13, float e14, float e15)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_pd(double __w)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi64(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastd_epi32(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_ps(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline __m128 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps512_ps128(__m512 __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i64x4(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_pd(double e0, double e1, double e2, double e3, double e4, double e5, double e6, double e7)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps512_ps256(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8(__m512i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi8(char __w)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi64(long long e0, long long e1, long long e2, long long e3, long long e4, long long e5, long long e6, long long e7)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline __m128i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_si128(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_int2mask(int __a)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask2int(__mmask16 __a)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m256d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd512_pd256(__m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_pd(__m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __mmask8 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_test_epi64_mask(__m512i __A, __m512i __B)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
char __v64qi __attribute__((__vector_size__(64)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi64(__m128i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512(void *__P, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movedup_pd(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_pd(__m512d __a, __m512d __b)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtps_pd(__m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_knot(__mmask16 __M)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastq_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64(void *__P, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kortestc(__mmask16 __A, __mmask16 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_pd(__m256i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _load_mask16(__mmask16 *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_pd(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32(__m128d __A)
_MM_TERNLOG_ENUM
A helper to represent the ternary logic operations among vector A, B and C.
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi32(__m512i __W)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps(void *__P, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_ps(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_ps(__m512 __a, __m512 __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_epi32(int e0, int e1, int e2, int e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512(void const *__P)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu64(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_ps(float __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu64(__m512i __V)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f64x4(__m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sra_epi32(__m512i __A, __m128i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd(__m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi32(int __s)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64(void const *__P)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32(void *__P, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_pd(double e0, double e1, double e2, double e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastss_ps(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m128d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd512_pd128(__m512d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i32x4(__m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi32(__m512i __W)
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps(__m512d __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32lo_pd(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi32(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps_pd(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kmov(__mmask16 __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_si512(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32lo_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps_si512(__m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_si512(__m512i __a, __m512i __b)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu32(__m512i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64(void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_ps(float e0, float e1, float e2, float e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi32(__m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi16(short __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_moveldup_ps(__m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd_ps(__m512d __A)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi32(__m512i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ void int __a
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
static __inline__ void short __D
static __inline__ void const void * __src
__inline unsigned int unsigned int unsigned int * __P
__inline unsigned int unsigned int __Y
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...