11 "Never use <avx10_2convertintrin.h> directly; include <immintrin.h> instead."
16#ifndef __AVX10_2CONVERTINTRIN_H
17#define __AVX10_2CONVERTINTRIN_H
20#define __DEFAULT_FN_ATTRS128 \
21 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
22 __min_vector_width__(128)))
23#define __DEFAULT_FN_ATTRS256 \
24 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
25 __min_vector_width__(256)))
29 return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
30 (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (
__mmask8)(-1));
34_mm_mask_cvtx2ps_ph(__m128h __W,
__mmask8 __U, __m128 __A, __m128 __B) {
35 return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
36 (__v4sf)__A, (__v4sf)__B, (__v8hf)__W, (
__mmask8)__U);
40_mm_maskz_cvtx2ps_ph(
__mmask8 __U, __m128 __A, __m128 __B) {
41 return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
42 (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
47 return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
48 (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (
__mmask16)(-1),
53_mm256_mask_cvtx2ps_ph(__m256h __W,
__mmask16 __U, __m256 __A, __m256 __B) {
54 return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
55 (__v8sf)__A, (__v8sf)__B, (__v16hf)__W, (
__mmask16)__U,
60_mm256_maskz_cvtx2ps_ph(
__mmask16 __U, __m256 __A, __m256 __B) {
61 return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
62 (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (
__mmask16)__U,
66#define _mm256_cvtx_round2ps_ph(A, B, R) \
67 ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \
68 (__v8sf)(A), (__v8sf)(B), (__v16hf)_mm256_undefined_ph(), \
69 (__mmask16)(-1), (const int)(R)))
71#define _mm256_mask_cvtx_round2ps_ph(W, U, A, B, R) \
72 ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \
73 (__v8sf)(A), (__v8sf)(B), (__v16hf)(W), (__mmask16)(U), (const int)(R)))
75#define _mm256_maskz_cvtx_round2ps_ph(U, A, B, R) \
76 ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \
77 (__v8sf)(A), (__v8sf)(B), (__v16hf)(_mm256_setzero_ph()), \
78 (__mmask16)(U), (const int)(R)))
82 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask(
87_mm_mask_cvtbiasph_bf8(__m128i __W,
__mmask8 __U, __m128i __A, __m128h __B) {
88 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask(
89 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (
__mmask8)__U);
93_mm_maskz_cvtbiasph_bf8(
__mmask8 __U, __m128i __A, __m128h __B) {
94 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask(
100_mm256_cvtbiasph_bf8(__m256i __A, __m256h __B) {
101 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask(
107 __m128i __W,
__mmask16 __U, __m256i __A, __m256h __B) {
108 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask(
109 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (
__mmask16)__U);
113_mm256_maskz_cvtbiasph_bf8(
__mmask16 __U, __m256i __A, __m256h __B) {
114 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask(
120_mm_cvtbiassph_bf8(__m128i __A, __m128h __B) {
121 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
126_mm_mask_cvtbiassph_bf8(__m128i __W,
__mmask8 __U, __m128i __A, __m128h __B) {
127 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
128 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (
__mmask8)__U);
132_mm_maskz_cvtbiassph_bf8(
__mmask8 __U, __m128i __A, __m128h __B) {
133 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
139_mm256_cvtbiassph_bf8(__m256i __A, __m256h __B) {
140 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
146 __m128i __W,
__mmask16 __U, __m256i __A, __m256h __B) {
147 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
148 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (
__mmask16)__U);
152_mm256_maskz_cvtbiassph_bf8(
__mmask16 __U, __m256i __A, __m256h __B) {
153 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
160 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask(
165_mm_mask_cvtbiasph_hf8(__m128i __W,
__mmask8 __U, __m128i __A, __m128h __B) {
166 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask(
167 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (
__mmask8)__U);
171_mm_maskz_cvtbiasph_hf8(
__mmask8 __U, __m128i __A, __m128h __B) {
172 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask(
178_mm256_cvtbiasph_hf8(__m256i __A, __m256h __B) {
179 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask(
185 __m128i __W,
__mmask16 __U, __m256i __A, __m256h __B) {
186 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask(
187 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (
__mmask16)__U);
191_mm256_maskz_cvtbiasph_hf8(
__mmask16 __U, __m256i __A, __m256h __B) {
192 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask(
198_mm_cvtbiassph_hf8(__m128i __A, __m128h __B) {
199 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
204_mm_mask_cvtbiassph_hf8(__m128i __W,
__mmask8 __U, __m128i __A, __m128h __B) {
205 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
206 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (
__mmask8)__U);
210_mm_maskz_cvtbiassph_hf8(
__mmask8 __U, __m128i __A, __m128h __B) {
211 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
217_mm256_cvtbiassph_hf8(__m256i __A, __m256h __B) {
218 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
224 __m128i __W,
__mmask16 __U, __m256i __A, __m256h __B) {
225 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
226 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (
__mmask16)__U);
230_mm256_maskz_cvtbiassph_hf8(
__mmask16 __U, __m256i __A, __m256h __B) {
231 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
238 return (__m128i)__builtin_ia32_vcvt2ph2bf8_128((__v8hf)(__A), (__v8hf)(__B));
242_mm_mask_cvt2ph_bf8(__m128i __W,
__mmask16 __U, __m128h __A, __m128h __B) {
243 return (__m128i)__builtin_ia32_selectb_128(
244 (
__mmask16)__U, (__v16qi)_mm_cvt2ph_bf8(__A, __B), (__v16qi)__W);
248_mm_maskz_cvt2ph_bf8(
__mmask16 __U, __m128h __A, __m128h __B) {
249 return (__m128i)__builtin_ia32_selectb_128(
250 (
__mmask16)__U, (__v16qi)_mm_cvt2ph_bf8(__A, __B),
256 return (__m256i)__builtin_ia32_vcvt2ph2bf8_256((__v16hf)(__A),
261_mm256_mask_cvt2ph_bf8(__m256i __W,
__mmask32 __U, __m256h __A, __m256h __B) {
262 return (__m256i)__builtin_ia32_selectb_256(
263 (
__mmask16)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B), (__v32qi)__W);
267_mm256_maskz_cvt2ph_bf8(
__mmask32 __U, __m256h __A, __m256h __B) {
268 return (__m256i)__builtin_ia32_selectb_256(
269 (
__mmask16)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B),
275 return (__m128i)__builtin_ia32_vcvt2ph2bf8s_128((__v8hf)(__A), (__v8hf)(__B));
279_mm_mask_cvts2ph_bf8(__m128i __W,
__mmask16 __U, __m128h __A, __m128h __B) {
280 return (__m128i)__builtin_ia32_selectb_128(
281 (
__mmask16)__U, (__v16qi)_mm_cvts2ph_bf8(__A, __B), (__v16qi)__W);
285_mm_maskz_cvts2ph_bf8(
__mmask16 __U, __m128h __A, __m128h __B) {
286 return (__m128i)__builtin_ia32_selectb_128(
287 (
__mmask16)__U, (__v16qi)_mm_cvts2ph_bf8(__A, __B),
292_mm256_cvts2ph_bf8(__m256h __A, __m256h __B) {
293 return (__m256i)__builtin_ia32_vcvt2ph2bf8s_256((__v16hf)(__A),
298_mm256_mask_cvts2ph_bf8(__m256i __W,
__mmask32 __U, __m256h __A, __m256h __B) {
299 return (__m256i)__builtin_ia32_selectb_256(
300 (
__mmask16)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B), (__v32qi)__W);
304_mm256_maskz_cvts2ph_bf8(
__mmask32 __U, __m256h __A, __m256h __B) {
305 return (__m256i)__builtin_ia32_selectb_256(
306 (
__mmask16)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B),
312 return (__m128i)__builtin_ia32_vcvt2ph2hf8_128((__v8hf)(__A), (__v8hf)(__B));
316_mm_mask_cvt2ph_hf8(__m128i __W,
__mmask16 __U, __m128h __A, __m128h __B) {
317 return (__m128i)__builtin_ia32_selectb_128(
318 (
__mmask16)__U, (__v16qi)_mm_cvt2ph_hf8(__A, __B), (__v16qi)__W);
322_mm_maskz_cvt2ph_hf8(
__mmask16 __U, __m128h __A, __m128h __B) {
323 return (__m128i)__builtin_ia32_selectb_128(
324 (
__mmask16)__U, (__v16qi)_mm_cvt2ph_hf8(__A, __B),
330 return (__m256i)__builtin_ia32_vcvt2ph2hf8_256((__v16hf)(__A),
335_mm256_mask_cvt2ph_hf8(__m256i __W,
__mmask32 __U, __m256h __A, __m256h __B) {
336 return (__m256i)__builtin_ia32_selectb_256(
337 (
__mmask16)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B), (__v32qi)__W);
341_mm256_maskz_cvt2ph_hf8(
__mmask32 __U, __m256h __A, __m256h __B) {
342 return (__m256i)__builtin_ia32_selectb_256(
343 (
__mmask16)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B),
349 return (__m128i)__builtin_ia32_vcvt2ph2hf8s_128((__v8hf)(__A), (__v8hf)(__B));
353_mm_mask_cvts2ph_hf8(__m128i __W,
__mmask16 __U, __m128h __A, __m128h __B) {
354 return (__m128i)__builtin_ia32_selectb_128(
355 (
__mmask16)__U, (__v16qi)_mm_cvts2ph_hf8(__A, __B), (__v16qi)__W);
359_mm_maskz_cvts2ph_hf8(
__mmask16 __U, __m128h __A, __m128h __B) {
360 return (__m128i)__builtin_ia32_selectb_128(
361 (
__mmask16)__U, (__v16qi)_mm_cvts2ph_hf8(__A, __B),
366_mm256_cvts2ph_hf8(__m256h __A, __m256h __B) {
367 return (__m256i)__builtin_ia32_vcvt2ph2hf8s_256((__v16hf)(__A),
372_mm256_mask_cvts2ph_hf8(__m256i __W,
__mmask32 __U, __m256h __A, __m256h __B) {
373 return (__m256i)__builtin_ia32_selectb_256(
374 (
__mmask16)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B), (__v32qi)__W);
378_mm256_maskz_cvts2ph_hf8(
__mmask32 __U, __m256h __A, __m256h __B) {
379 return (__m256i)__builtin_ia32_selectb_256(
380 (
__mmask16)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B),
385 return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask(
386 (__v16qi)__A, (__v8hf)(__m128h)_mm_undefined_ph(), (
__mmask8)-1);
392 return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask(
393 (__v16qi)__A, (__v8hf)(__m128h)__W, (
__mmask8)__U);
398 return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask(
399 (__v16qi)__A, (__v8hf)(__m128h)_mm_setzero_ph(), (
__mmask8)__U);
403 return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask(
404 (__v16qi)__A, (__v16hf)(__m256h)_mm256_undefined_ph(), (
__mmask16)-1);
408_mm256_mask_cvthf8(__m256h __W,
__mmask16 __U, __m128i __A) {
409 return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask(
410 (__v16qi)__A, (__v16hf)(__m256h)__W, (
__mmask16)__U);
414_mm256_maskz_cvthf8(
__mmask16 __U, __m128i __A) {
415 return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask(
416 (__v16qi)__A, (__v16hf)(__m256h)_mm256_setzero_ph(), (
__mmask16)__U);
420 return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask(
425_mm_mask_cvtph_bf8(__m128i __W,
__mmask8 __U, __m128h __A) {
426 return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask(
427 (__v8hf)__A, (__v16qi)(__m128i)__W, (
__mmask8)__U);
431_mm_maskz_cvtph_bf8(
__mmask8 __U, __m128h __A) {
432 return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask(
437 return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask(
442_mm256_mask_cvtph_bf8(__m128i __W,
__mmask16 __U, __m256h __A) {
443 return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask(
444 (__v16hf)__A, (__v16qi)(__m128i)__W, (
__mmask16)__U);
448_mm256_maskz_cvtph_bf8(
__mmask16 __U, __m256h __A) {
449 return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask(
454 return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask(
459_mm_mask_cvtsph_bf8(__m128i __W,
__mmask8 __U, __m128h __A) {
460 return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask(
461 (__v8hf)__A, (__v16qi)(__m128i)__W, (
__mmask8)__U);
465_mm_maskz_cvtsph_bf8(
__mmask8 __U, __m128h __A) {
466 return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask(
471 return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask(
476_mm256_mask_cvtsph_bf8(__m128i __W,
__mmask16 __U, __m256h __A) {
477 return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask(
478 (__v16hf)__A, (__v16qi)(__m128i)__W, (
__mmask16)__U);
482_mm256_maskz_cvtsph_bf8(
__mmask16 __U, __m256h __A) {
483 return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask(
488 return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask(
493_mm_mask_cvtph_hf8(__m128i __W,
__mmask8 __U, __m128h __A) {
494 return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask(
495 (__v8hf)__A, (__v16qi)(__m128i)__W, (
__mmask8)__U);
499_mm_maskz_cvtph_hf8(
__mmask8 __U, __m128h __A) {
500 return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask(
505 return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask(
510_mm256_mask_cvtph_hf8(__m128i __W,
__mmask16 __U, __m256h __A) {
511 return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask(
512 (__v16hf)__A, (__v16qi)(__m128i)__W, (
__mmask16)__U);
516_mm256_maskz_cvtph_hf8(
__mmask16 __U, __m256h __A) {
517 return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask(
522 return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask(
527_mm_mask_cvtsph_hf8(__m128i __W,
__mmask8 __U, __m128h __A) {
528 return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask(
529 (__v8hf)__A, (__v16qi)(__m128i)__W, (
__mmask8)__U);
533_mm_maskz_cvtsph_hf8(
__mmask8 __U, __m128h __A) {
534 return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask(
539 return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask(
544_mm256_mask_cvtsph_hf8(__m128i __W,
__mmask16 __U, __m256h __A) {
545 return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask(
546 (__v16hf)__A, (__v16qi)(__m128i)__W, (
__mmask16)__U);
550_mm256_maskz_cvtsph_hf8(
__mmask16 __U, __m256h __A) {
551 return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask(
560_mm_mask_cvtbf8_ph(__m128h __S,
__mmask8 __U, __m128i __A) {
561 return _mm_castsi128_ph(
566_mm_maskz_cvtbf8_ph(
__mmask8 __U, __m128i __A) {
575_mm256_mask_cvtbf8_ph(__m256h __S,
__mmask16 __U, __m128i __A) {
576 return _mm256_castsi256_ph(
581_mm256_maskz_cvtbf8_ph(
__mmask16 __U, __m128i __A) {
582 return _mm256_castsi256_ph(
586#undef __DEFAULT_FN_ATTRS128
587#undef __DEFAULT_FN_ATTRS256
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi16(__m128i __V)
Sign-extends bytes from the 128-bit integer vector in __V and returns the 16-bit values in the corres...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi16(__m256i __a, int __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a left by __count bits,...
#define _MM_FROUND_CUR_DIRECTION
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A)
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, int __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V)
Sign-extends each of the lower eight 8-bit integer elements of a 128-bit vector of [16 x i8] to 16-bi...