11 "Never use <avx512vlfp16intrin.h> directly; include <immintrin.h> instead."
16#ifndef __AVX512VLFP16INTRIN_H
17#define __AVX512VLFP16INTRIN_H
20#define __DEFAULT_FN_ATTRS256 \
21 __attribute__((__always_inline__, __nodebug__, \
22 __target__("avx512fp16,avx512vl"), \
23 __min_vector_width__(256)))
24#define __DEFAULT_FN_ATTRS128 \
25 __attribute__((__always_inline__, __nodebug__, \
26 __target__("avx512fp16,avx512vl"), \
27 __min_vector_width__(128)))
29#if defined(__cplusplus) && (__cplusplus >= 201103L)
30#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
31#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
33#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
34#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
38_mm_cvtsh_h(__m128h
__a) {
43_mm256_cvtsh_h(__m256h
__a) {
49 return __extension__(__m128h){__h, 0, 0, 0, 0, 0, 0, 0};
54 return (__m128h)(__v8hf){__h, __h, __h, __h, __h, __h, __h, __h};
59 return (__m256h)(__v16hf){__h, __h, __h, __h, __h, __h, __h, __h,
60 __h, __h, __h, __h, __h, __h, __h, __h};
66 return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1};
70_mm256_set1_pch(
_Float16 _Complex h) {
76 return (__m128h)
_mm_set1_ps(__builtin_bit_cast(
float, h));
84 return (__m256h)(__v16hf){__h16, __h15, __h14, __h13, __h12, __h11,
85 __h10, __h9, __h8, __h7, __h6, __h5,
86 __h4, __h3, __h2, __h1};
92 return _mm_set_ph(e7, e6, e5, e4, e3, e2, e1, e0);
100 return _mm256_set_ph(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3,
106 return (__m256h)((__v16hf)__A + (__v16hf)__B);
110_mm256_mask_add_ph(__m256h __W,
__mmask16 __U, __m256h __A, __m256h __B) {
111 return (__m256h)__builtin_ia32_selectph_256(
112 __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)__W);
116_mm256_maskz_add_ph(
__mmask16 __U, __m256h __A, __m256h __B) {
117 return (__m256h)__builtin_ia32_selectph_256(
118 __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
123 return (__m128h)((__v8hf)__A + (__v8hf)__B);
130 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B),
137 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B),
138 (__v8hf)_mm_setzero_ph());
143 return (__m256h)((__v16hf)__A - (__v16hf)__B);
147_mm256_mask_sub_ph(__m256h __W,
__mmask16 __U, __m256h __A, __m256h __B) {
148 return (__m256h)__builtin_ia32_selectph_256(
149 __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)__W);
153_mm256_maskz_sub_ph(
__mmask16 __U, __m256h __A, __m256h __B) {
154 return (__m256h)__builtin_ia32_selectph_256(
155 __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
160 return (__m128h)((__v8hf)__A - (__v8hf)__B);
167 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B),
174 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B),
175 (__v8hf)_mm_setzero_ph());
180 return (__m256h)((__v16hf)__A * (__v16hf)__B);
184_mm256_mask_mul_ph(__m256h __W,
__mmask16 __U, __m256h __A, __m256h __B) {
185 return (__m256h)__builtin_ia32_selectph_256(
186 __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)__W);
190_mm256_maskz_mul_ph(
__mmask16 __U, __m256h __A, __m256h __B) {
191 return (__m256h)__builtin_ia32_selectph_256(
192 __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
197 return (__m128h)((__v8hf)__A * (__v8hf)__B);
204 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B),
211 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B),
212 (__v8hf)_mm_setzero_ph());
217 return (__m256h)((__v16hf)__A / (__v16hf)__B);
221_mm256_mask_div_ph(__m256h __W,
__mmask16 __U, __m256h __A, __m256h __B) {
222 return (__m256h)__builtin_ia32_selectph_256(
223 __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)__W);
227_mm256_maskz_div_ph(
__mmask16 __U, __m256h __A, __m256h __B) {
228 return (__m256h)__builtin_ia32_selectph_256(
229 __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
234 return (__m128h)((__v8hf)__A / (__v8hf)__B);
241 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B),
248 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B),
249 (__v8hf)_mm_setzero_ph());
253_mm256_min_ph(__m256h __A, __m256h __B) {
254 return (__m256h)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B);
258_mm256_mask_min_ph(__m256h __W,
__mmask16 __U, __m256h __A, __m256h __B) {
259 return (__m256h)__builtin_ia32_selectph_256(
261 (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B),
266_mm256_maskz_min_ph(
__mmask16 __U, __m256h __A, __m256h __B) {
267 return (__m256h)__builtin_ia32_selectph_256(
269 (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B),
270 (__v16hf)_mm256_setzero_ph());
274_mm_min_ph(__m128h __A, __m128h __B) {
275 return (__m128h)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B);
279_mm_mask_min_ph(__m128h __W,
__mmask8 __U, __m128h __A, __m128h __B) {
280 return (__m128h)__builtin_ia32_selectph_128(
281 (
__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
286_mm_maskz_min_ph(
__mmask8 __U, __m128h __A, __m128h __B) {
287 return (__m128h)__builtin_ia32_selectph_128(
288 (
__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
289 (__v8hf)_mm_setzero_ph());
293_mm256_max_ph(__m256h __A, __m256h __B) {
294 return (__m256h)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B);
298_mm256_mask_max_ph(__m256h __W,
__mmask16 __U, __m256h __A, __m256h __B) {
299 return (__m256h)__builtin_ia32_selectph_256(
301 (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B),
306_mm256_maskz_max_ph(
__mmask16 __U, __m256h __A, __m256h __B) {
307 return (__m256h)__builtin_ia32_selectph_256(
309 (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B),
310 (__v16hf)_mm256_setzero_ph());
314_mm_max_ph(__m128h __A, __m128h __B) {
315 return (__m128h)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B);
319_mm_mask_max_ph(__m128h __W,
__mmask8 __U, __m128h __A, __m128h __B) {
320 return (__m128h)__builtin_ia32_selectph_128(
321 (
__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
326_mm_maskz_max_ph(
__mmask8 __U, __m128h __A, __m128h __B) {
327 return (__m128h)__builtin_ia32_selectph_128(
328 (
__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
329 (__v8hf)_mm_setzero_ph());
333_mm256_abs_ph(__m256h __A) {
338_mm_abs_ph(__m128h __A) {
347_mm256_mask_conj_pch(__m256h __W,
__mmask8 __U, __m256h __A) {
348 return (__m256h)__builtin_ia32_selectps_256(
349 (
__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)__W);
353_mm256_maskz_conj_pch(
__mmask8 __U, __m256h __A) {
354 return (__m256h)__builtin_ia32_selectps_256(
365 return (__m128h)__builtin_ia32_selectps_128(
366 (
__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)__W);
370_mm_maskz_conj_pch(
__mmask8 __U, __m128h __A) {
371 return (__m128h)__builtin_ia32_selectps_128(
375#define _mm256_cmp_ph_mask(a, b, p) \
376 ((__mmask16)__builtin_ia32_cmpph256_mask( \
377 (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)-1))
379#define _mm256_mask_cmp_ph_mask(m, a, b, p) \
380 ((__mmask16)__builtin_ia32_cmpph256_mask( \
381 (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)(m)))
383#define _mm_cmp_ph_mask(a, b, p) \
384 ((__mmask8)__builtin_ia32_cmpph128_mask( \
385 (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)-1))
387#define _mm_mask_cmp_ph_mask(m, a, b, p) \
388 ((__mmask8)__builtin_ia32_cmpph128_mask( \
389 (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)(m)))
392 return (__m256h)__builtin_ia32_rcpph256_mask(
393 (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (
__mmask16)-1);
397_mm256_mask_rcp_ph(__m256h __W,
__mmask16 __U, __m256h __A) {
398 return (__m256h)__builtin_ia32_rcpph256_mask((__v16hf)__A, (__v16hf)__W,
403_mm256_maskz_rcp_ph(
__mmask16 __U, __m256h __A) {
404 return (__m256h)__builtin_ia32_rcpph256_mask(
405 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (
__mmask16)__U);
409 return (__m128h)__builtin_ia32_rcpph128_mask(
410 (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
416 return (__m128h)__builtin_ia32_rcpph128_mask((__v8hf)__A, (__v8hf)__W,
422 return (__m128h)__builtin_ia32_rcpph128_mask(
423 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
427 return (__m256h)__builtin_ia32_rsqrtph256_mask(
428 (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (
__mmask16)-1);
432_mm256_mask_rsqrt_ph(__m256h __W,
__mmask16 __U, __m256h __A) {
433 return (__m256h)__builtin_ia32_rsqrtph256_mask((__v16hf)__A, (__v16hf)__W,
438_mm256_maskz_rsqrt_ph(
__mmask16 __U, __m256h __A) {
439 return (__m256h)__builtin_ia32_rsqrtph256_mask(
440 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (
__mmask16)__U);
444 return (__m128h)__builtin_ia32_rsqrtph128_mask(
445 (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
451 return (__m128h)__builtin_ia32_rsqrtph128_mask((__v8hf)__A, (__v8hf)__W,
456_mm_maskz_rsqrt_ph(
__mmask8 __U, __m128h __A) {
457 return (__m128h)__builtin_ia32_rsqrtph128_mask(
458 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
462 return (__m128h)__builtin_ia32_getexpph128_mask(
463 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)-1);
467_mm_mask_getexp_ph(__m128h __W,
__mmask8 __U, __m128h __A) {
468 return (__m128h)__builtin_ia32_getexpph128_mask((__v8hf)__A, (__v8hf)__W,
473_mm_maskz_getexp_ph(
__mmask8 __U, __m128h __A) {
474 return (__m128h)__builtin_ia32_getexpph128_mask(
475 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
479 return (__m256h)__builtin_ia32_getexpph256_mask(
480 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (
__mmask16)-1);
484_mm256_mask_getexp_ph(__m256h __W,
__mmask16 __U, __m256h __A) {
485 return (__m256h)__builtin_ia32_getexpph256_mask((__v16hf)__A, (__v16hf)__W,
490_mm256_maskz_getexp_ph(
__mmask16 __U, __m256h __A) {
491 return (__m256h)__builtin_ia32_getexpph256_mask(
492 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (
__mmask16)__U);
495#define _mm_getmant_ph(A, B, C) \
496 ((__m128h)__builtin_ia32_getmantph128_mask( \
497 (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \
500#define _mm_mask_getmant_ph(W, U, A, B, C) \
501 ((__m128h)__builtin_ia32_getmantph128_mask( \
502 (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)(__m128h)(W), \
505#define _mm_maskz_getmant_ph(U, A, B, C) \
506 ((__m128h)__builtin_ia32_getmantph128_mask( \
507 (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \
510#define _mm256_getmant_ph(A, B, C) \
511 ((__m256h)__builtin_ia32_getmantph256_mask( \
512 (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \
513 (__v16hf)_mm256_setzero_ph(), (__mmask16)-1))
515#define _mm256_mask_getmant_ph(W, U, A, B, C) \
516 ((__m256h)__builtin_ia32_getmantph256_mask( \
517 (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), (__v16hf)(__m256h)(W), \
520#define _mm256_maskz_getmant_ph(U, A, B, C) \
521 ((__m256h)__builtin_ia32_getmantph256_mask( \
522 (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \
523 (__v16hf)_mm256_setzero_ph(), (__mmask16)(U)))
527 return (__m128h)__builtin_ia32_scalefph128_mask(
528 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (
__mmask8)-1);
532_mm_mask_scalef_ph(__m128h __W,
__mmask8 __U, __m128h __A, __m128h __B) {
533 return (__m128h)__builtin_ia32_scalefph128_mask((__v8hf)__A, (__v8hf)__B,
538_mm_maskz_scalef_ph(
__mmask8 __U, __m128h __A, __m128h __B) {
539 return (__m128h)__builtin_ia32_scalefph128_mask(
540 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
545 return (__m256h)__builtin_ia32_scalefph256_mask(
546 (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (
__mmask16)-1);
550_mm256_mask_scalef_ph(__m256h __W,
__mmask16 __U, __m256h __A, __m256h __B) {
551 return (__m256h)__builtin_ia32_scalefph256_mask((__v16hf)__A, (__v16hf)__B,
556_mm256_maskz_scalef_ph(
__mmask16 __U, __m256h __A, __m256h __B) {
557 return (__m256h)__builtin_ia32_scalefph256_mask(
558 (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (
__mmask16)__U);
561#define _mm_roundscale_ph(A, imm) \
562 ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
563 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \
566#define _mm_mask_roundscale_ph(W, U, A, imm) \
567 ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
568 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U)))
570#define _mm_maskz_roundscale_ph(U, A, imm) \
571 ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
572 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \
575#define _mm256_roundscale_ph(A, imm) \
576 ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
577 (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \
580#define _mm256_mask_roundscale_ph(W, U, A, imm) \
581 ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
582 (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)(__m256h)(W), \
585#define _mm256_maskz_roundscale_ph(U, A, imm) \
586 ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
587 (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \
590#define _mm_reduce_ph(A, imm) \
591 ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \
592 (__v8hf)_mm_setzero_ph(), \
595#define _mm_mask_reduce_ph(W, U, A, imm) \
596 ((__m128h)__builtin_ia32_reduceph128_mask( \
597 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U)))
599#define _mm_maskz_reduce_ph(U, A, imm) \
600 ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \
601 (__v8hf)_mm_setzero_ph(), \
604#define _mm256_reduce_ph(A, imm) \
605 ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
606 (__v16hf)_mm256_setzero_ph(), \
609#define _mm256_mask_reduce_ph(W, U, A, imm) \
610 ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
611 (__v16hf)(__m256h)(W), \
614#define _mm256_maskz_reduce_ph(U, A, imm) \
615 ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
616 (__v16hf)_mm256_setzero_ph(), \
620 return __builtin_elementwise_sqrt(
__a);
626 return (__m128h)__builtin_ia32_selectph_128(
627 (
__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)__W);
632 return (__m128h)__builtin_ia32_selectph_128(
633 (
__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)_mm_setzero_ph());
637 return __builtin_elementwise_sqrt(
__a);
641_mm256_mask_sqrt_ph(__m256h __W,
__mmask16 __U, __m256h __A) {
642 return (__m256h)__builtin_ia32_selectph_256(
643 (
__mmask16)__U, (__v16hf)_mm256_sqrt_ph(__A), (__v16hf)__W);
647_mm256_maskz_sqrt_ph(
__mmask16 __U, __m256h __A) {
648 return (__m256h)__builtin_ia32_selectph_256((
__mmask16)__U,
649 (__v16hf)_mm256_sqrt_ph(__A),
650 (__v16hf)_mm256_setzero_ph());
653#define _mm_mask_fpclass_ph_mask(U, A, imm) \
654 ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \
655 (int)(imm), (__mmask8)(U)))
657#define _mm_fpclass_ph_mask(A, imm) \
658 ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \
659 (int)(imm), (__mmask8)-1))
661#define _mm256_mask_fpclass_ph_mask(U, A, imm) \
662 ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \
663 (int)(imm), (__mmask16)(U)))
665#define _mm256_fpclass_ph_mask(A, imm) \
666 ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \
667 (int)(imm), (__mmask16)-1))
670 return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
671 (__v2df)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
677 return (__m128h)__builtin_ia32_vcvtpd2ph128_mask((__v2df)__A, (__v8hf)__W,
682_mm_maskz_cvtpd_ph(
__mmask8 __U, __m128d __A) {
683 return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
684 (__v2df)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
688 return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
689 (__v4df)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
693_mm256_mask_cvtpd_ph(__m128h __W,
__mmask8 __U, __m256d __A) {
694 return (__m128h)__builtin_ia32_vcvtpd2ph256_mask((__v4df)__A, (__v8hf)__W,
699_mm256_maskz_cvtpd_ph(
__mmask8 __U, __m256d __A) {
700 return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
701 (__v4df)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
705 return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
712 return (__m128d)__builtin_ia32_vcvtph2pd128_mask((__v8hf)__A, (__v2df)__W,
717_mm_maskz_cvtph_pd(
__mmask8 __U, __m128h __A) {
718 return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
723 return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
728_mm256_mask_cvtph_pd(__m256d __W,
__mmask8 __U, __m128h __A) {
729 return (__m256d)__builtin_ia32_vcvtph2pd256_mask((__v8hf)__A, (__v4df)__W,
734_mm256_maskz_cvtph_pd(
__mmask8 __U, __m128h __A) {
735 return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
740 return (__m128i)__builtin_ia32_vcvtph2w128_mask(
745_mm_mask_cvtph_epi16(__m128i __W,
__mmask8 __U, __m128h __A) {
746 return (__m128i)__builtin_ia32_vcvtph2w128_mask((__v8hf)__A, (__v8hi)__W,
751_mm_maskz_cvtph_epi16(
__mmask8 __U, __m128h __A) {
752 return (__m128i)__builtin_ia32_vcvtph2w128_mask(
757_mm256_cvtph_epi16(__m256h __A) {
758 return (__m256i)__builtin_ia32_vcvtph2w256_mask(
763_mm256_mask_cvtph_epi16(__m256i __W,
__mmask16 __U, __m256h __A) {
764 return (__m256i)__builtin_ia32_vcvtph2w256_mask((__v16hf)__A, (__v16hi)__W,
769_mm256_maskz_cvtph_epi16(
__mmask16 __U, __m256h __A) {
770 return (__m256i)__builtin_ia32_vcvtph2w256_mask(
775 return (__m128i)__builtin_ia32_vcvttph2w128_mask(
780_mm_mask_cvttph_epi16(__m128i __W,
__mmask8 __U, __m128h __A) {
781 return (__m128i)__builtin_ia32_vcvttph2w128_mask((__v8hf)__A, (__v8hi)__W,
786_mm_maskz_cvttph_epi16(
__mmask8 __U, __m128h __A) {
787 return (__m128i)__builtin_ia32_vcvttph2w128_mask(
792_mm256_cvttph_epi16(__m256h __A) {
793 return (__m256i)__builtin_ia32_vcvttph2w256_mask(
798_mm256_mask_cvttph_epi16(__m256i __W,
__mmask16 __U, __m256h __A) {
799 return (__m256i)__builtin_ia32_vcvttph2w256_mask((__v16hf)__A, (__v16hi)__W,
804_mm256_maskz_cvttph_epi16(
__mmask16 __U, __m256h __A) {
805 return (__m256i)__builtin_ia32_vcvttph2w256_mask(
810_mm_cvtepi16_ph(__m128i __A) {
811 return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf);
815_mm_mask_cvtepi16_ph(__m128h __W,
__mmask8 __U, __m128i __A) {
816 return (__m128h)__builtin_ia32_selectph_128(
817 (
__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W);
821_mm_maskz_cvtepi16_ph(
__mmask8 __U, __m128i __A) {
822 return (__m128h)__builtin_ia32_selectph_128(
823 (
__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph());
827_mm256_cvtepi16_ph(__m256i __A) {
828 return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf);
832_mm256_mask_cvtepi16_ph(__m256h __W,
__mmask16 __U, __m256i __A) {
833 return (__m256h)__builtin_ia32_selectph_256(
834 (
__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W);
838_mm256_maskz_cvtepi16_ph(
__mmask16 __U, __m256i __A) {
839 return (__m256h)__builtin_ia32_selectph_256((
__mmask16)__U,
840 (__v16hf)_mm256_cvtepi16_ph(__A),
841 (__v16hf)_mm256_setzero_ph());
845 return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
850_mm_mask_cvtph_epu16(__m128i __W,
__mmask8 __U, __m128h __A) {
851 return (__m128i)__builtin_ia32_vcvtph2uw128_mask((__v8hf)__A, (__v8hu)__W,
856_mm_maskz_cvtph_epu16(
__mmask8 __U, __m128h __A) {
857 return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
862_mm256_cvtph_epu16(__m256h __A) {
863 return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
868_mm256_mask_cvtph_epu16(__m256i __W,
__mmask16 __U, __m256h __A) {
869 return (__m256i)__builtin_ia32_vcvtph2uw256_mask((__v16hf)__A, (__v16hu)__W,
874_mm256_maskz_cvtph_epu16(
__mmask16 __U, __m256h __A) {
875 return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
880 return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
885_mm_mask_cvttph_epu16(__m128i __W,
__mmask8 __U, __m128h __A) {
886 return (__m128i)__builtin_ia32_vcvttph2uw128_mask((__v8hf)__A, (__v8hu)__W,
891_mm_maskz_cvttph_epu16(
__mmask8 __U, __m128h __A) {
892 return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
897_mm256_cvttph_epu16(__m256h __A) {
898 return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
903_mm256_mask_cvttph_epu16(__m256i __W,
__mmask16 __U, __m256h __A) {
904 return (__m256i)__builtin_ia32_vcvttph2uw256_mask((__v16hf)__A, (__v16hu)__W,
909_mm256_maskz_cvttph_epu16(
__mmask16 __U, __m256h __A) {
910 return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
915_mm_cvtepu16_ph(__m128i __A) {
916 return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf);
920_mm_mask_cvtepu16_ph(__m128h __W,
__mmask8 __U, __m128i __A) {
921 return (__m128h)__builtin_ia32_selectph_128(
922 (
__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W);
926_mm_maskz_cvtepu16_ph(
__mmask8 __U, __m128i __A) {
927 return (__m128h)__builtin_ia32_selectph_128(
928 (
__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph());
932_mm256_cvtepu16_ph(__m256i __A) {
933 return (__m256h) __builtin_convertvector((__v16hu)__A, __v16hf);
937_mm256_mask_cvtepu16_ph(__m256h __W,
__mmask16 __U, __m256i __A) {
938 return (__m256h)__builtin_ia32_selectph_256(
939 (
__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)__W);
943_mm256_maskz_cvtepu16_ph(
__mmask16 __U, __m256i __A) {
944 return (__m256h)__builtin_ia32_selectph_256((
__mmask16)__U,
945 (__v16hf)_mm256_cvtepu16_ph(__A),
946 (__v16hf)_mm256_setzero_ph());
950 return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
955_mm_mask_cvtph_epi32(__m128i __W,
__mmask8 __U, __m128h __A) {
956 return (__m128i)__builtin_ia32_vcvtph2dq128_mask((__v8hf)__A, (__v4si)__W,
961_mm_maskz_cvtph_epi32(
__mmask8 __U, __m128h __A) {
962 return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
967_mm256_cvtph_epi32(__m128h __A) {
968 return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
973_mm256_mask_cvtph_epi32(__m256i __W,
__mmask8 __U, __m128h __A) {
974 return (__m256i)__builtin_ia32_vcvtph2dq256_mask((__v8hf)__A, (__v8si)__W,
979_mm256_maskz_cvtph_epi32(
__mmask8 __U, __m128h __A) {
980 return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
985 return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
990_mm_mask_cvtph_epu32(__m128i __W,
__mmask8 __U, __m128h __A) {
991 return (__m128i)__builtin_ia32_vcvtph2udq128_mask((__v8hf)__A, (__v4su)__W,
996_mm_maskz_cvtph_epu32(
__mmask8 __U, __m128h __A) {
997 return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
1002_mm256_cvtph_epu32(__m128h __A) {
1003 return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
1008_mm256_mask_cvtph_epu32(__m256i __W,
__mmask8 __U, __m128h __A) {
1009 return (__m256i)__builtin_ia32_vcvtph2udq256_mask((__v8hf)__A, (__v8su)__W,
1014_mm256_maskz_cvtph_epu32(
__mmask8 __U, __m128h __A) {
1015 return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
1020 return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
1021 (__v4si)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
1025_mm_mask_cvtepi32_ph(__m128h __W,
__mmask8 __U, __m128i __A) {
1026 return (__m128h)__builtin_ia32_vcvtdq2ph128_mask((__v4si)__A, (__v8hf)__W,
1031_mm_maskz_cvtepi32_ph(
__mmask8 __U, __m128i __A) {
1032 return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
1033 (__v4si)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
1037_mm256_cvtepi32_ph(__m256i __A) {
1038 return (__m128h) __builtin_convertvector((__v8si)__A, __v8hf);
1042_mm256_mask_cvtepi32_ph(__m128h __W,
__mmask8 __U, __m256i __A) {
1043 return (__m128h)__builtin_ia32_selectph_128(
1044 (
__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)__W);
1048_mm256_maskz_cvtepi32_ph(
__mmask8 __U, __m256i __A) {
1049 return (__m128h)__builtin_ia32_selectph_128(
1050 (
__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)_mm_setzero_ph());
1054 return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
1055 (__v4su)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
1059_mm_mask_cvtepu32_ph(__m128h __W,
__mmask8 __U, __m128i __A) {
1060 return (__m128h)__builtin_ia32_vcvtudq2ph128_mask((__v4su)__A, (__v8hf)__W,
1065_mm_maskz_cvtepu32_ph(
__mmask8 __U, __m128i __A) {
1066 return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
1067 (__v4su)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
1071_mm256_cvtepu32_ph(__m256i __A) {
1072 return (__m128h) __builtin_convertvector((__v8su)__A, __v8hf);
1076_mm256_mask_cvtepu32_ph(__m128h __W,
__mmask8 __U, __m256i __A) {
1077 return (__m128h)__builtin_ia32_selectph_128(
1078 (
__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)__W);
1082_mm256_maskz_cvtepu32_ph(
__mmask8 __U, __m256i __A) {
1083 return (__m128h)__builtin_ia32_selectph_128(
1084 (
__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)_mm_setzero_ph());
1088 return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
1093_mm_mask_cvttph_epi32(__m128i __W,
__mmask8 __U, __m128h __A) {
1094 return (__m128i)__builtin_ia32_vcvttph2dq128_mask((__v8hf)__A, (__v4si)__W,
1099_mm_maskz_cvttph_epi32(
__mmask8 __U, __m128h __A) {
1100 return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
1105_mm256_cvttph_epi32(__m128h __A) {
1106 return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
1111_mm256_mask_cvttph_epi32(__m256i __W,
__mmask8 __U, __m128h __A) {
1112 return (__m256i)__builtin_ia32_vcvttph2dq256_mask((__v8hf)__A, (__v8si)__W,
1117_mm256_maskz_cvttph_epi32(
__mmask8 __U, __m128h __A) {
1118 return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
1123 return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
1128_mm_mask_cvttph_epu32(__m128i __W,
__mmask8 __U, __m128h __A) {
1129 return (__m128i)__builtin_ia32_vcvttph2udq128_mask((__v8hf)__A, (__v4su)__W,
1134_mm_maskz_cvttph_epu32(
__mmask8 __U, __m128h __A) {
1135 return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
1140_mm256_cvttph_epu32(__m128h __A) {
1141 return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
1146_mm256_mask_cvttph_epu32(__m256i __W,
__mmask8 __U, __m128h __A) {
1147 return (__m256i)__builtin_ia32_vcvttph2udq256_mask((__v8hf)__A, (__v8su)__W,
1152_mm256_maskz_cvttph_epu32(
__mmask8 __U, __m128h __A) {
1153 return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
1158 return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
1159 (__v2di)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
1163_mm_mask_cvtepi64_ph(__m128h __W,
__mmask8 __U, __m128i __A) {
1164 return (__m128h)__builtin_ia32_vcvtqq2ph128_mask((__v2di)__A, (__v8hf)__W,
1169_mm_maskz_cvtepi64_ph(
__mmask8 __U, __m128i __A) {
1170 return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
1171 (__v2di)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
1175_mm256_cvtepi64_ph(__m256i __A) {
1176 return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
1177 (__v4di)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
1181_mm256_mask_cvtepi64_ph(__m128h __W,
__mmask8 __U, __m256i __A) {
1182 return (__m128h)__builtin_ia32_vcvtqq2ph256_mask((__v4di)__A, (__v8hf)__W,
1187_mm256_maskz_cvtepi64_ph(
__mmask8 __U, __m256i __A) {
1188 return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
1189 (__v4di)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
1193 return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
1198_mm_mask_cvtph_epi64(__m128i __W,
__mmask8 __U, __m128h __A) {
1199 return (__m128i)__builtin_ia32_vcvtph2qq128_mask((__v8hf)__A, (__v2di)__W,
1204_mm_maskz_cvtph_epi64(
__mmask8 __U, __m128h __A) {
1205 return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
1210_mm256_cvtph_epi64(__m128h __A) {
1211 return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
1216_mm256_mask_cvtph_epi64(__m256i __W,
__mmask8 __U, __m128h __A) {
1217 return (__m256i)__builtin_ia32_vcvtph2qq256_mask((__v8hf)__A, (__v4di)__W,
1222_mm256_maskz_cvtph_epi64(
__mmask8 __U, __m128h __A) {
1223 return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
1228 return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
1229 (__v2du)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
1233_mm_mask_cvtepu64_ph(__m128h __W,
__mmask8 __U, __m128i __A) {
1234 return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask((__v2du)__A, (__v8hf)__W,
1239_mm_maskz_cvtepu64_ph(
__mmask8 __U, __m128i __A) {
1240 return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
1241 (__v2du)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
1245_mm256_cvtepu64_ph(__m256i __A) {
1246 return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
1247 (__v4du)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
1251_mm256_mask_cvtepu64_ph(__m128h __W,
__mmask8 __U, __m256i __A) {
1252 return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask((__v4du)__A, (__v8hf)__W,
1257_mm256_maskz_cvtepu64_ph(
__mmask8 __U, __m256i __A) {
1258 return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
1259 (__v4du)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
1263 return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
1268_mm_mask_cvtph_epu64(__m128i __W,
__mmask8 __U, __m128h __A) {
1269 return (__m128i)__builtin_ia32_vcvtph2uqq128_mask((__v8hf)__A, (__v2du)__W,
1274_mm_maskz_cvtph_epu64(
__mmask8 __U, __m128h __A) {
1275 return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
1280_mm256_cvtph_epu64(__m128h __A) {
1281 return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
1286_mm256_mask_cvtph_epu64(__m256i __W,
__mmask8 __U, __m128h __A) {
1287 return (__m256i)__builtin_ia32_vcvtph2uqq256_mask((__v8hf)__A, (__v4du)__W,
1292_mm256_maskz_cvtph_epu64(
__mmask8 __U, __m128h __A) {
1293 return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
1298 return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
1303_mm_mask_cvttph_epi64(__m128i __W,
__mmask8 __U, __m128h __A) {
1304 return (__m128i)__builtin_ia32_vcvttph2qq128_mask((__v8hf)__A, (__v2di)__W,
1309_mm_maskz_cvttph_epi64(
__mmask8 __U, __m128h __A) {
1310 return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
1315_mm256_cvttph_epi64(__m128h __A) {
1316 return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
1321_mm256_mask_cvttph_epi64(__m256i __W,
__mmask8 __U, __m128h __A) {
1322 return (__m256i)__builtin_ia32_vcvttph2qq256_mask((__v8hf)__A, (__v4di)__W,
1327_mm256_maskz_cvttph_epi64(
__mmask8 __U, __m128h __A) {
1328 return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
1333 return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
1338_mm_mask_cvttph_epu64(__m128i __W,
__mmask8 __U, __m128h __A) {
1339 return (__m128i)__builtin_ia32_vcvttph2uqq128_mask((__v8hf)__A, (__v2du)__W,
1344_mm_maskz_cvttph_epu64(
__mmask8 __U, __m128h __A) {
1345 return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
1350_mm256_cvttph_epu64(__m128h __A) {
1351 return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
1356_mm256_mask_cvttph_epu64(__m256i __W,
__mmask8 __U, __m128h __A) {
1357 return (__m256i)__builtin_ia32_vcvttph2uqq256_mask((__v8hf)__A, (__v4du)__W,
1362_mm256_maskz_cvttph_epu64(
__mmask8 __U, __m128h __A) {
1363 return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
1368 return (__m128)__builtin_ia32_vcvtph2psx128_mask(
1375 return (__m128)__builtin_ia32_vcvtph2psx128_mask((__v8hf)__A, (__v4sf)__W,
1380_mm_maskz_cvtxph_ps(
__mmask8 __U, __m128h __A) {
1381 return (__m128)__builtin_ia32_vcvtph2psx128_mask(
1386 return (__m256)__builtin_ia32_vcvtph2psx256_mask(
1391_mm256_mask_cvtxph_ps(__m256 __W,
__mmask8 __U, __m128h __A) {
1392 return (__m256)__builtin_ia32_vcvtph2psx256_mask((__v8hf)__A, (__v8sf)__W,
1397_mm256_maskz_cvtxph_ps(
__mmask8 __U, __m128h __A) {
1398 return (__m256)__builtin_ia32_vcvtph2psx256_mask(
1403 return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
1404 (__v4sf)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
1410 return (__m128h)__builtin_ia32_vcvtps2phx128_mask((__v4sf)__A, (__v8hf)__W,
1415_mm_maskz_cvtxps_ph(
__mmask8 __U, __m128 __A) {
1416 return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
1417 (__v4sf)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
1421 return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
1422 (__v8sf)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
1426_mm256_mask_cvtxps_ph(__m128h __W,
__mmask8 __U, __m256 __A) {
1427 return (__m128h)__builtin_ia32_vcvtps2phx256_mask((__v8sf)__A, (__v8hf)__W,
1432_mm256_maskz_cvtxps_ph(
__mmask8 __U, __m256 __A) {
1433 return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
1434 (__v8sf)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
1440 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B,
1448 return (__m128h)__builtin_ia32_selectph_128(
1450 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1455_mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C,
__mmask8 __U) {
1456 return (__m128h)__builtin_ia32_selectph_128(
1458 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1463_mm_maskz_fmadd_ph(
__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1464 return (__m128h)__builtin_ia32_selectph_128(
1466 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1467 (__v8hf)_mm_setzero_ph());
1473 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B,
1481 return (__m128h)__builtin_ia32_selectph_128(
1482 (
__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1487_mm_maskz_fmsub_ph(
__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1488 return (__m128h)__builtin_ia32_selectph_128(
1489 (
__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1490 (__v8hf)_mm_setzero_ph());
1494_mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C,
__mmask8 __U) {
1495 return (__m128h)__builtin_ia32_selectph_128(
1497 __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1502_mm_maskz_fnmadd_ph(
__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1503 return (__m128h)__builtin_ia32_selectph_128(
1505 __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1506 (__v8hf)_mm_setzero_ph());
1510_mm_maskz_fnmsub_ph(
__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1511 return (__m128h)__builtin_ia32_selectph_128(
1513 __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1514 (__v8hf)_mm_setzero_ph());
1520 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B,
1525_mm256_mask_fmadd_ph(__m256h __A,
__mmask16 __U, __m256h __B, __m256h __C) {
1526 return (__m256h)__builtin_ia32_selectph_256(
1528 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1533_mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C,
__mmask16 __U) {
1534 return (__m256h)__builtin_ia32_selectph_256(
1536 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1541_mm256_maskz_fmadd_ph(
__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1542 return (__m256h)__builtin_ia32_selectph_256(
1544 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1545 (__v16hf)_mm256_setzero_ph());
1551 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B,
1556_mm256_mask_fmsub_ph(__m256h __A,
__mmask16 __U, __m256h __B, __m256h __C) {
1557 return (__m256h)__builtin_ia32_selectph_256(
1559 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1564_mm256_maskz_fmsub_ph(
__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1565 return (__m256h)__builtin_ia32_selectph_256(
1567 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1568 (__v16hf)_mm256_setzero_ph());
1572_mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C,
__mmask16 __U) {
1573 return (__m256h)__builtin_ia32_selectph_256(
1575 __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1580_mm256_maskz_fnmadd_ph(
__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1581 return (__m256h)__builtin_ia32_selectph_256(
1583 __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1584 (__v16hf)_mm256_setzero_ph());
1588_mm256_maskz_fnmsub_ph(
__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1589 return (__m256h)__builtin_ia32_selectph_256(
1591 __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1592 (__v16hf)_mm256_setzero_ph());
1598 return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
1603_mm_mask_fmaddsub_ph(__m128h __A,
__mmask8 __U, __m128h __B, __m128h __C) {
1604 return (__m128h)__builtin_ia32_selectph_128(
1606 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1611_mm_mask3_fmaddsub_ph(__m128h __A, __m128h __B, __m128h __C,
__mmask8 __U) {
1612 return (__m128h)__builtin_ia32_selectph_128(
1614 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1619_mm_maskz_fmaddsub_ph(
__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1620 return (__m128h)__builtin_ia32_selectph_128(
1622 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1623 (__v8hf)_mm_setzero_ph());
1629 return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
1634_mm_mask_fmsubadd_ph(__m128h __A,
__mmask8 __U, __m128h __B, __m128h __C) {
1635 return (__m128h)__builtin_ia32_selectph_128(
1637 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1642_mm_maskz_fmsubadd_ph(
__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1643 return (__m128h)__builtin_ia32_selectph_128(
1645 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1646 (__v8hf)_mm_setzero_ph());
1650_mm256_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C) {
1651 return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
1656_mm256_mask_fmaddsub_ph(__m256h __A,
__mmask16 __U, __m256h __B, __m256h __C) {
1657 return (__m256h)__builtin_ia32_selectph_256(
1659 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1664_mm256_mask3_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C,
__mmask16 __U) {
1665 return (__m256h)__builtin_ia32_selectph_256(
1667 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1672_mm256_maskz_fmaddsub_ph(
__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1673 return (__m256h)__builtin_ia32_selectph_256(
1675 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1676 (__v16hf)_mm256_setzero_ph());
1680_mm256_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C) {
1681 return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
1686_mm256_mask_fmsubadd_ph(__m256h __A,
__mmask16 __U, __m256h __B, __m256h __C) {
1687 return (__m256h)__builtin_ia32_selectph_256(
1689 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1694_mm256_maskz_fmsubadd_ph(
__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1695 return (__m256h)__builtin_ia32_selectph_256(
1697 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1698 (__v16hf)_mm256_setzero_ph());
1702_mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C,
__mmask8 __U) {
1703 return (__m128h)__builtin_ia32_selectph_128(
1705 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1710_mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C,
__mmask16 __U) {
1711 return (__m256h)__builtin_ia32_selectph_256(
1713 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1718_mm_mask3_fmsubadd_ph(__m128h __A, __m128h __B, __m128h __C,
__mmask8 __U) {
1719 return (__m128h)__builtin_ia32_selectph_128(
1721 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1726_mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C,
__mmask16 __U) {
1727 return (__m256h)__builtin_ia32_selectph_256(
1729 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1736 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B,
1741_mm_mask_fnmadd_ph(__m128h __A,
__mmask8 __U, __m128h __B, __m128h __C) {
1742 return (__m128h)__builtin_ia32_selectph_128(
1744 __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C),
1751 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B,
1756_mm256_mask_fnmadd_ph(__m256h __A,
__mmask16 __U, __m256h __B, __m256h __C) {
1757 return (__m256h)__builtin_ia32_selectph_256(
1759 __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C),
1766 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B,
1771_mm_mask_fnmsub_ph(__m128h __A,
__mmask8 __U, __m128h __B, __m128h __C) {
1772 return (__m128h)__builtin_ia32_selectph_128(
1774 __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
1779_mm_mask3_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C,
__mmask8 __U) {
1780 return (__m128h)__builtin_ia32_selectph_128(
1782 __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
1789 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B,
1794_mm256_mask_fnmsub_ph(__m256h __A,
__mmask16 __U, __m256h __B, __m256h __C) {
1795 return (__m256h)__builtin_ia32_selectph_256(
1797 __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
1802_mm256_mask3_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C,
__mmask16 __U) {
1803 return (__m256h)__builtin_ia32_selectph_256(
1805 __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
1811 return (__m128h)__builtin_ia32_vfcmulcph128_mask(
1812 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (
__mmask8)-1);
1816_mm_mask_fcmul_pch(__m128h __W,
__mmask8 __U, __m128h __A, __m128h __B) {
1817 return (__m128h)__builtin_ia32_vfcmulcph128_mask((__v4sf)__A, (__v4sf)__B,
1822_mm_maskz_fcmul_pch(
__mmask8 __U, __m128h __A, __m128h __B) {
1823 return (__m128h)__builtin_ia32_vfcmulcph128_mask(
1824 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (
__mmask8)__U);
1829 return (__m256h)__builtin_ia32_vfcmulcph256_mask(
1830 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (
__mmask8)-1);
1834_mm256_mask_fcmul_pch(__m256h __W,
__mmask8 __U, __m256h __A, __m256h __B) {
1835 return (__m256h)__builtin_ia32_vfcmulcph256_mask((__v8sf)__A, (__v8sf)__B,
1840_mm256_maskz_fcmul_pch(
__mmask8 __U, __m256h __A, __m256h __B) {
1841 return (__m256h)__builtin_ia32_vfcmulcph256_mask(
1842 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (
__mmask8)__U);
1848 return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1853_mm_mask_fcmadd_pch(__m128h __A,
__mmask8 __U, __m128h __B, __m128h __C) {
1854 return (__m128h)__builtin_ia32_selectps_128(
1856 __builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)(__m128h)__B,
1862_mm_mask3_fcmadd_pch(__m128h __A, __m128h __B, __m128h __C,
__mmask8 __U) {
1863 return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1868_mm_maskz_fcmadd_pch(
__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1869 return (__m128h)__builtin_ia32_vfcmaddcph128_maskz(
1870 (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (
__mmask8)__U);
1876 return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1881_mm256_mask_fcmadd_pch(__m256h __A,
__mmask8 __U, __m256h __B, __m256h __C) {
1882 return (__m256h)__builtin_ia32_selectps_256(
1884 __builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C,
1890_mm256_mask3_fcmadd_pch(__m256h __A, __m256h __B, __m256h __C,
__mmask8 __U) {
1891 return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1896_mm256_maskz_fcmadd_pch(
__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
1897 return (__m256h)__builtin_ia32_vfcmaddcph256_maskz(
1898 (__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (
__mmask8)__U);
1903 return (__m128h)__builtin_ia32_vfmulcph128_mask(
1904 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (
__mmask8)-1);
1911 return (__m128h)__builtin_ia32_vfmulcph128_mask((__v4sf)__A, (__v4sf)__B,
1916_mm_maskz_fmul_pch(
__mmask8 __U, __m128h __A, __m128h __B) {
1917 return (__m128h)__builtin_ia32_vfmulcph128_mask(
1918 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (
__mmask8)__U);
1923 return (__m256h)__builtin_ia32_vfmulcph256_mask(
1924 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (
__mmask8)-1);
1928_mm256_mask_fmul_pch(__m256h __W,
__mmask8 __U, __m256h __A, __m256h __B) {
1929 return (__m256h)__builtin_ia32_vfmulcph256_mask((__v8sf)__A, (__v8sf)__B,
1934_mm256_maskz_fmul_pch(
__mmask8 __U, __m256h __A, __m256h __B) {
1935 return (__m256h)__builtin_ia32_vfmulcph256_mask(
1936 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (
__mmask8)__U);
1942 return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1947_mm_mask_fmadd_pch(__m128h __A,
__mmask8 __U, __m128h __B, __m128h __C) {
1948 return (__m128h)__builtin_ia32_selectps_128(
1950 __builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C,
1956_mm_mask3_fmadd_pch(__m128h __A, __m128h __B, __m128h __C,
__mmask8 __U) {
1957 return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1962_mm_maskz_fmadd_pch(
__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1963 return (__m128h)__builtin_ia32_vfmaddcph128_maskz((__v4sf)__A, (__v4sf)__B,
1970 return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1975_mm256_mask_fmadd_pch(__m256h __A,
__mmask8 __U, __m256h __B, __m256h __C) {
1976 return (__m256h)__builtin_ia32_selectps_256(
1978 __builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C,
1984_mm256_mask3_fmadd_pch(__m256h __A, __m256h __B, __m256h __C,
__mmask8 __U) {
1985 return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1990_mm256_maskz_fmadd_pch(
__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
1991 return (__m256h)__builtin_ia32_vfmaddcph256_maskz((__v8sf)__A, (__v8sf)__B,
1996_mm_mask_blend_ph(
__mmask8 __U, __m128h __A, __m128h __W) {
1997 return (__m128h)__builtin_ia32_selectph_128((
__mmask8)__U, (__v8hf)__W,
2002_mm256_mask_blend_ph(
__mmask16 __U, __m256h __A, __m256h __W) {
2003 return (__m256h)__builtin_ia32_selectph_256((
__mmask16)__U, (__v16hf)__W,
2008_mm_permutex2var_ph(__m128h __A, __m128i __I, __m128h __B) {
2009 return (__m128h)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
2014_mm256_permutex2var_ph(__m256h __A, __m256i __I, __m256h __B) {
2015 return (__m256h)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
2020_mm_permutexvar_ph(__m128i __A, __m128h __B) {
2021 return (__m128h)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A);
2025_mm256_permutexvar_ph(__m256i __A, __m256h __B) {
2026 return (__m256h)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A);
2030_mm256_reduce_add_ph(__m256h __W) {
2031 return __builtin_ia32_reduce_fadd_ph256(-0.0f16, __W);
2035_mm256_reduce_mul_ph(__m256h __W) {
2036 return __builtin_ia32_reduce_fmul_ph256(1.0f16, __W);
2040_mm256_reduce_max_ph(__m256h __V) {
2041 return __builtin_ia32_reduce_fmax_ph256(__V);
2045_mm256_reduce_min_ph(__m256h __V) {
2046 return __builtin_ia32_reduce_fmin_ph256(__V);
2050_mm_reduce_add_ph(__m128h __W) {
2051 return __builtin_ia32_reduce_fadd_ph128(-0.0f16, __W);
2055_mm_reduce_mul_ph(__m128h __W) {
2056 return __builtin_ia32_reduce_fmul_ph128(1.0f16, __W);
2060_mm_reduce_max_ph(__m128h __V) {
2061 return __builtin_ia32_reduce_fmax_ph128(__V);
2065_mm_reduce_min_ph(__m128h __V) {
2066 return __builtin_ia32_reduce_fmin_ph128(__V);
2070#define _mm_mul_pch(A, B) _mm_fmul_pch(A, B)
2071#define _mm_mask_mul_pch(W, U, A, B) _mm_mask_fmul_pch(W, U, A, B)
2072#define _mm_maskz_mul_pch(U, A, B) _mm_maskz_fmul_pch(U, A, B)
2073#define _mm256_mul_pch(A, B) _mm256_fmul_pch(A, B)
2074#define _mm256_mask_mul_pch(W, U, A, B) _mm256_mask_fmul_pch(W, U, A, B)
2075#define _mm256_maskz_mul_pch(U, A, B) _mm256_maskz_fmul_pch(U, A, B)
2077#define _mm_cmul_pch(A, B) _mm_fcmul_pch(A, B)
2078#define _mm_mask_cmul_pch(W, U, A, B) _mm_mask_fcmul_pch(W, U, A, B)
2079#define _mm_maskz_cmul_pch(U, A, B) _mm_maskz_fcmul_pch(U, A, B)
2080#define _mm256_cmul_pch(A, B) _mm256_fcmul_pch(A, B)
2081#define _mm256_mask_cmul_pch(W, U, A, B) _mm256_mask_fcmul_pch(W, U, A, B)
2082#define _mm256_maskz_cmul_pch(U, A, B) _mm256_maskz_fcmul_pch(U, A, B)
2084#undef __DEFAULT_FN_ATTRS128
2085#undef __DEFAULT_FN_ATTRS256
2086#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
2087#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
#define __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_and_epi32(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_and_epi32(__m128i __a, __m128i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_undefined_pd(void)
Create a 256-bit vector of [4 x double] with undefined values.
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_ps(float __w)
Constructs a 256-bit floating-point vector of [8 x float], with each of the eight single-precision fl...
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi32(int __i)
Constructs a 256-bit integer vector of [8 x i32], with each of the 32-bit integral vector elements se...
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_xor_ps(__m256 __a, __m256 __b)
Performs a bitwise XOR of two 256-bit vectors of [8 x float].
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ void int __a
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] with unspecified content.
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_undefined_ps(void)
Create a 128-bit vector of [4 x float] with undefined values.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_xor_ps(__m128 __a, __m128 __b)
Performs a bitwise exclusive OR of two 128-bit vectors of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_ps(float __w)
Constructs a 128-bit floating-point vector of [4 x float], with each of the four single-precision flo...
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.