11 "Never use <avx10_2convertintrin.h> directly; include <immintrin.h> instead."
16#ifndef __AVX10_2CONVERTINTRIN_H
17#define __AVX10_2CONVERTINTRIN_H
20#define __DEFAULT_FN_ATTRS128 \
21 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \
22 __min_vector_width__(128)))
23#define __DEFAULT_FN_ATTRS256 \
24 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \
25 __min_vector_width__(256)))
60 return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
61 (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (
__mmask8)(-1));
103_mm_mask_cvtx2ps_ph(__m128h __W,
__mmask8 __U, __m128 __A, __m128 __B) {
104 return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
105 (__v4sf)__A, (__v4sf)__B, (__v8hf)__W, (
__mmask8)__U);
145_mm_maskz_cvtx2ps_ph(
__mmask8 __U, __m128 __A, __m128 __B) {
146 return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
147 (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
180 return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
181 (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (
__mmask16)(-1));
223_mm256_mask_cvtx2ps_ph(__m256h __W,
__mmask16 __U, __m256 __A, __m256 __B) {
224 return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
225 (__v8sf)__A, (__v8sf)__B, (__v16hf)__W, (
__mmask16)__U);
265_mm256_maskz_cvtx2ps_ph(
__mmask16 __U, __m256 __A, __m256 __B) {
266 return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
267 (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (
__mmask16)__U);
295_mm_cvtbiasph_bf8(__m128i __A, __m128h __B) {
296 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask(
335_mm_mask_cvtbiasph_bf8(__m128i __W,
__mmask8 __U, __m128i __A, __m128h __B) {
336 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask(
337 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (
__mmask8)__U);
373_mm_maskz_cvtbiasph_bf8(
__mmask8 __U, __m128i __A, __m128h __B) {
374 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask(
403_mm256_cvtbiasph_bf8(__m256i __A, __m256h __B) {
404 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask(
443 __m128i __W,
__mmask16 __U, __m256i __A, __m256h __B) {
444 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask(
445 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (
__mmask16)__U);
480_mm256_maskz_cvtbiasph_bf8(
__mmask16 __U, __m256i __A, __m256h __B) {
481 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask(
511_mm_cvts_biasph_bf8(__m128i __A, __m128h __B) {
512 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
551 __W,
__mmask8 __U, __m128i __A, __m128h __B) {
return
552 (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( (__v16qi)__A,
553 (__v8hf)__B, (__v16qi)(__m128i)__W, (
__mmask8)__U); }
588_mm_maskz_cvts_biasph_bf8(
__mmask8 __U, __m128i __A, __m128h __B) {
589 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
619_mm256_cvts_biasph_bf8(__m256i __A, __m256h __B) {
620 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
659 __m128i __W,
__mmask16 __U, __m256i __A, __m256h __B) {
660 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
661 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (
__mmask16)__U);
696_mm256_maskz_cvts_biasph_bf8(
__mmask16 __U, __m256i __A, __m256h __B) {
697 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
727_mm_cvtbiasph_hf8(__m128i __A, __m128h __B) {
728 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask(
767_mm_mask_cvtbiasph_hf8(__m128i __W,
__mmask8 __U, __m128i __A, __m128h __B) {
768 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask(
769 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (
__mmask8)__U);
805_mm_maskz_cvtbiasph_hf8(
__mmask8 __U, __m128i __A, __m128h __B) {
806 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask(
835_mm256_cvtbiasph_hf8(__m256i __A, __m256h __B) {
836 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask(
875 __m128i __W,
__mmask16 __U, __m256i __A, __m256h __B) {
876 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask(
877 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (
__mmask16)__U);
912_mm256_maskz_cvtbiasph_hf8(
__mmask16 __U, __m256i __A, __m256h __B) {
913 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask(
943_mm_cvts_biasph_hf8(__m128i __A, __m128h __B) {
944 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
983_mm_mask_cvts_biasph_hf8(__m128i __W,
__mmask8 __U, __m128i __A, __m128h __B) {
984 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
985 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (
__mmask8)__U);
1021_mm_maskz_cvts_biasph_hf8(
__mmask8 __U, __m128i __A, __m128h __B) {
1022 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
1051_mm256_cvts_biasph_hf8(__m256i __A, __m256h __B) {
1052 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
1091 __m128i __W,
__mmask16 __U, __m256i __A, __m256h __B) {
1092 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
1093 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (
__mmask16)__U);
1128_mm256_maskz_cvts_biasph_hf8(
__mmask16 __U, __m256i __A, __m256h __B) {
1129 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
1163 return (__m128i)__builtin_ia32_vcvt2ph2bf8_128((__v8hf)(__A),
1206_mm_mask_cvt2ph_bf8(__m128i __W,
__mmask16 __U, __m128h __A, __m128h __B) {
1207 return (__m128i)__builtin_ia32_selectb_128(
1208 (
__mmask16)__U, (__v16qi)_mm_cvt2ph_bf8(__A, __B), (__v16qi)__W);
1248_mm_maskz_cvt2ph_bf8(
__mmask16 __U, __m128h __A, __m128h __B) {
1249 return (__m128i)__builtin_ia32_selectb_128(
1250 (
__mmask16)__U, (__v16qi)_mm_cvt2ph_bf8(__A, __B),
1282_mm256_cvt2ph_bf8(__m256h __A, __m256h __B) {
1283 return (__m256i)__builtin_ia32_vcvt2ph2bf8_256((__v16hf)(__A),
1326 __m256i __W,
__mmask32 __U, __m256h __A, __m256h __B) {
1327 return (__m256i)__builtin_ia32_selectb_256(
1328 (
__mmask32)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B), (__v32qi)__W);
1368_mm256_maskz_cvt2ph_bf8(
__mmask32 __U, __m256h __A, __m256h __B) {
1369 return (__m256i)__builtin_ia32_selectb_256(
1370 (
__mmask32)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B),
1403_mm_cvts_2ph_bf8(__m128h __A, __m128h __B) {
1404 return (__m128i)__builtin_ia32_vcvt2ph2bf8s_128((__v8hf)(__A),
1447_mm_mask_cvts_2ph_bf8(__m128i __W,
__mmask16 __U, __m128h __A, __m128h __B) {
1448 return (__m128i)__builtin_ia32_selectb_128(
1449 (
__mmask16)__U, (__v16qi)_mm_cvts_2ph_bf8(__A, __B), (__v16qi)__W);
1489_mm_maskz_cvts_2ph_bf8(
__mmask16 __U, __m128h __A, __m128h __B) {
1490 return (__m128i)__builtin_ia32_selectb_128(
1491 (
__mmask16)__U, (__v16qi)_mm_cvts_2ph_bf8(__A, __B),
1524_mm256_cvts_2ph_bf8(__m256h __A, __m256h __B) {
1525 return (__m256i)__builtin_ia32_vcvt2ph2bf8s_256((__v16hf)(__A),
1568 __m256i __W,
__mmask32 __U, __m256h __A, __m256h __B) {
1569 return (__m256i)__builtin_ia32_selectb_256(
1570 (
__mmask32)__U, (__v32qi)_mm256_cvts_2ph_bf8(__A, __B), (__v32qi)__W);
1610_mm256_maskz_cvts_2ph_bf8(
__mmask32 __U, __m256h __A, __m256h __B) {
1611 return (__m256i)__builtin_ia32_selectb_256(
1612 (
__mmask32)__U, (__v32qi)_mm256_cvts_2ph_bf8(__A, __B),
1645 return (__m128i)__builtin_ia32_vcvt2ph2hf8_128((__v8hf)(__A),
1688_mm_mask_cvt2ph_hf8(__m128i __W,
__mmask16 __U, __m128h __A, __m128h __B) {
1689 return (__m128i)__builtin_ia32_selectb_128(
1690 (
__mmask16)__U, (__v16qi)_mm_cvt2ph_hf8(__A, __B), (__v16qi)__W);
1730_mm_maskz_cvt2ph_hf8(
__mmask16 __U, __m128h __A, __m128h __B) {
1731 return (__m128i)__builtin_ia32_selectb_128(
1732 (
__mmask16)__U, (__v16qi)_mm_cvt2ph_hf8(__A, __B),
1764_mm256_cvt2ph_hf8(__m256h __A, __m256h __B) {
1765 return (__m256i)__builtin_ia32_vcvt2ph2hf8_256((__v16hf)(__A),
1808 __m256i __W,
__mmask32 __U, __m256h __A, __m256h __B) {
1809 return (__m256i)__builtin_ia32_selectb_256(
1810 (
__mmask32)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B), (__v32qi)__W);
1850_mm256_maskz_cvt2ph_hf8(
__mmask32 __U, __m256h __A, __m256h __B) {
1851 return (__m256i)__builtin_ia32_selectb_256(
1852 (
__mmask32)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B),
1885_mm_cvts_2ph_hf8(__m128h __A, __m128h __B) {
1886 return (__m128i)__builtin_ia32_vcvt2ph2hf8s_128((__v8hf)(__A),
1929_mm_mask_cvts_2ph_hf8(__m128i __W,
__mmask16 __U, __m128h __A, __m128h __B) {
1930 return (__m128i)__builtin_ia32_selectb_128(
1931 (
__mmask16)__U, (__v16qi)_mm_cvts_2ph_hf8(__A, __B), (__v16qi)__W);
1971_mm_maskz_cvts_2ph_hf8(
__mmask16 __U, __m128h __A, __m128h __B) {
1972 return (__m128i)__builtin_ia32_selectb_128(
1973 (
__mmask16)__U, (__v16qi)_mm_cvts_2ph_hf8(__A, __B),
2006_mm256_cvts_2ph_hf8(__m256h __A, __m256h __B) {
2007 return (__m256i)__builtin_ia32_vcvt2ph2hf8s_256((__v16hf)(__A),
2050 __m256i __W,
__mmask32 __U, __m256h __A, __m256h __B) {
2051 return (__m256i)__builtin_ia32_selectb_256(
2052 (
__mmask32)__U, (__v32qi)_mm256_cvts_2ph_hf8(__A, __B), (__v32qi)__W);
2092_mm256_maskz_cvts_2ph_hf8(
__mmask32 __U, __m256h __A, __m256h __B) {
2093 return (__m256i)__builtin_ia32_selectb_256(
2094 (
__mmask32)__U, (__v32qi)_mm256_cvts_2ph_hf8(__A, __B),
2119 return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask(
2120 (__v16qi)__A, (__v8hf)(__m128h)_mm_undefined_ph(), (
__mmask8)-1);
2155_mm_mask_cvthf8_ph(__m128h __W,
__mmask8 __U, __m128i __A) {
2156 return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask(
2157 (__v16qi)__A, (__v8hf)(__m128h)__W, (
__mmask8)__U);
2190_mm_maskz_cvthf8_ph(
__mmask8 __U, __m128i __A) {
2191 return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask(
2192 (__v16qi)__A, (__v8hf)(__m128h)_mm_setzero_ph(), (
__mmask8)__U);
2216 return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask(
2217 (__v16qi)__A, (__v16hf)(__m256h)_mm256_undefined_ph(), (
__mmask16)-1);
2252_mm256_mask_cvthf8_ph(__m256h __W,
__mmask16 __U, __m128i __A) {
2253 return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask(
2254 (__v16qi)__A, (__v16hf)(__m256h)__W, (
__mmask16)__U);
2287_mm256_maskz_cvthf8_ph(
__mmask16 __U, __m128i __A) {
2288 return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask(
2289 (__v16qi)__A, (__v16hf)(__m256h)_mm256_setzero_ph(), (
__mmask16)__U);
2314 return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask(
2350_mm_mask_cvtph_bf8(__m128i __W,
__mmask8 __U, __m128h __A) {
2351 return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask(
2352 (__v8hf)__A, (__v16qi)(__m128i)__W, (
__mmask8)__U);
2385_mm_maskz_cvtph_bf8(
__mmask8 __U, __m128h __A) {
2386 return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask(
2411_mm256_cvtph_bf8(__m256h __A) {
2412 return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask(
2447_mm256_mask_cvtph_bf8(__m128i __W,
__mmask16 __U, __m256h __A) {
2448 return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask(
2449 (__v16hf)__A, (__v16qi)(__m128i)__W, (
__mmask16)__U);
2481_mm256_maskz_cvtph_bf8(
__mmask16 __U, __m256h __A) {
2482 return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask(
2508 return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask(
2544_mm_mask_cvts_ph_bf8(__m128i __W,
__mmask8 __U, __m128h __A) {
2545 return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask(
2546 (__v8hf)__A, (__v16qi)(__m128i)__W, (
__mmask8)__U);
2579_mm_maskz_cvts_ph_bf8(
__mmask8 __U, __m128h __A) {
2580 return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask(
2605_mm256_cvts_ph_bf8(__m256h __A) {
2606 return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask(
2642_mm256_mask_cvts_ph_bf8(__m128i __W,
__mmask16 __U, __m256h __A) {
2643 return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask(
2644 (__v16hf)__A, (__v16qi)(__m128i)__W, (
__mmask16)__U);
2677_mm256_maskz_cvts_ph_bf8(
__mmask16 __U, __m256h __A) {
2678 return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask(
2704 return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask(
2740_mm_mask_cvtph_hf8(__m128i __W,
__mmask8 __U, __m128h __A) {
2741 return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask(
2742 (__v8hf)__A, (__v16qi)(__m128i)__W, (
__mmask8)__U);
2775_mm_maskz_cvtph_hf8(
__mmask8 __U, __m128h __A) {
2776 return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask(
2801_mm256_cvtph_hf8(__m256h __A) {
2802 return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask(
2837_mm256_mask_cvtph_hf8(__m128i __W,
__mmask16 __U, __m256h __A) {
2838 return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask(
2839 (__v16hf)__A, (__v16qi)(__m128i)__W, (
__mmask16)__U);
2871_mm256_maskz_cvtph_hf8(
__mmask16 __U, __m256h __A) {
2872 return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask(
2898 return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask(
2934_mm_mask_cvts_ph_hf8(__m128i __W,
__mmask8 __U, __m128h __A) {
2935 return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask(
2936 (__v8hf)__A, (__v16qi)(__m128i)__W, (
__mmask8)__U);
2969_mm_maskz_cvts_ph_hf8(
__mmask8 __U, __m128h __A) {
2970 return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask(
2995_mm256_cvts_ph_hf8(__m256h __A) {
2996 return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask(
3032_mm256_mask_cvts_ph_hf8(__m128i __W,
__mmask16 __U, __m256h __A) {
3033 return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask(
3034 (__v16hf)__A, (__v16qi)(__m128i)__W, (
__mmask16)__U);
3067_mm256_maskz_cvts_ph_hf8(
__mmask16 __U, __m256h __A) {
3068 return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask(
3124_mm_mask_cvtbf8_ph(__m128h __W,
__mmask8 __U, __m128i __A) {
3125 return _mm_castsi128_ph(
3157_mm_maskz_cvtbf8_ph(
__mmask8 __U, __m128i __A) {
3213_mm256_mask_cvtbf8_ph(__m256h __W,
__mmask16 __U, __m128i __A) {
3214 return _mm256_castsi256_ph(
3246_mm256_maskz_cvtbf8_ph(
__mmask16 __U, __m128i __A) {
3247 return _mm256_castsi256_ph(
3253#undef __DEFAULT_FN_ATTRS128
3254#undef __DEFAULT_FN_ATTRS256
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi8_epi16(__m128i __V)
Sign-extends bytes from the 128-bit integer vector in __V and returns the 16-bit values in the corres...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_slli_epi16(__m256i __a, int __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a left by __count bits,...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, unsigned int __B)
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_slli_epi16(__m128i __a, int __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi8_epi16(__m128i __V)
Sign-extends each of the lower eight 8-bit integer elements of a 128-bit vector of [16 x i8] to 16-bi...