11 "Never use <avx512vlfp16intrin.h> directly; include <immintrin.h> instead."
16#ifndef __AVX512VLFP16INTRIN_H
17#define __AVX512VLFP16INTRIN_H
20#define __DEFAULT_FN_ATTRS256 \
21 __attribute__((__always_inline__, __nodebug__, \
22 __target__("avx512fp16,avx512vl"), \
23 __min_vector_width__(256)))
24#define __DEFAULT_FN_ATTRS128 \
25 __attribute__((__always_inline__, __nodebug__, \
26 __target__("avx512fp16,avx512vl"), \
27 __min_vector_width__(128)))
29#if defined(__cplusplus) && (__cplusplus >= 201103L)
30#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
31#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
33#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
34#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
46 return __extension__(__m128h){__h, 0, 0, 0, 0, 0, 0, 0};
51 return (__m128h)(__v8hf){__h, __h, __h, __h, __h, __h, __h, __h};
56 return (__m256h)(__v16hf){__h, __h, __h, __h, __h, __h, __h, __h,
57 __h, __h, __h, __h, __h, __h, __h, __h};
63 return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1};
67_mm256_set1_pch(
_Float16 _Complex h) {
73 return (__m128h)
_mm_set1_ps(__builtin_bit_cast(
float, h));
81 return (__m256h)(__v16hf){__h16, __h15, __h14, __h13, __h12, __h11,
82 __h10, __h9, __h8, __h7, __h6, __h5,
83 __h4, __h3, __h2, __h1};
89 return _mm_set_ph(e7, e6, e5, e4, e3, e2, e1, e0);
97 return _mm256_set_ph(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3,
103 return (__m256h)((__v16hf)__A + (__v16hf)__B);
107_mm256_mask_add_ph(__m256h __W,
__mmask16 __U, __m256h __A, __m256h __B) {
108 return (__m256h)__builtin_ia32_selectph_256(
109 __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)__W);
113_mm256_maskz_add_ph(
__mmask16 __U, __m256h __A, __m256h __B) {
114 return (__m256h)__builtin_ia32_selectph_256(
115 __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
120 return (__m128h)((__v8hf)__A + (__v8hf)__B);
127 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B),
134 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B),
135 (__v8hf)_mm_setzero_ph());
140 return (__m256h)((__v16hf)__A - (__v16hf)__B);
144_mm256_mask_sub_ph(__m256h __W,
__mmask16 __U, __m256h __A, __m256h __B) {
145 return (__m256h)__builtin_ia32_selectph_256(
146 __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)__W);
150_mm256_maskz_sub_ph(
__mmask16 __U, __m256h __A, __m256h __B) {
151 return (__m256h)__builtin_ia32_selectph_256(
152 __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
157 return (__m128h)((__v8hf)__A - (__v8hf)__B);
164 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B),
171 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B),
172 (__v8hf)_mm_setzero_ph());
177 return (__m256h)((__v16hf)__A * (__v16hf)__B);
181_mm256_mask_mul_ph(__m256h __W,
__mmask16 __U, __m256h __A, __m256h __B) {
182 return (__m256h)__builtin_ia32_selectph_256(
183 __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)__W);
187_mm256_maskz_mul_ph(
__mmask16 __U, __m256h __A, __m256h __B) {
188 return (__m256h)__builtin_ia32_selectph_256(
189 __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
194 return (__m128h)((__v8hf)__A * (__v8hf)__B);
201 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B),
208 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B),
209 (__v8hf)_mm_setzero_ph());
214 return (__m256h)((__v16hf)__A / (__v16hf)__B);
218_mm256_mask_div_ph(__m256h __W,
__mmask16 __U, __m256h __A, __m256h __B) {
219 return (__m256h)__builtin_ia32_selectph_256(
220 __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)__W);
224_mm256_maskz_div_ph(
__mmask16 __U, __m256h __A, __m256h __B) {
225 return (__m256h)__builtin_ia32_selectph_256(
226 __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
231 return (__m128h)((__v8hf)__A / (__v8hf)__B);
238 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B),
245 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B),
246 (__v8hf)_mm_setzero_ph());
251 return (__m256h)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B);
255_mm256_mask_min_ph(__m256h __W,
__mmask16 __U, __m256h __A, __m256h __B) {
256 return (__m256h)__builtin_ia32_selectph_256(
258 (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B),
263_mm256_maskz_min_ph(
__mmask16 __U, __m256h __A, __m256h __B) {
264 return (__m256h)__builtin_ia32_selectph_256(
266 (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B),
267 (__v16hf)_mm256_setzero_ph());
272 return (__m128h)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B);
279 return (__m128h)__builtin_ia32_selectph_128(
280 (
__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
287 return (__m128h)__builtin_ia32_selectph_128(
288 (
__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
289 (__v8hf)_mm_setzero_ph());
294 return (__m256h)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B);
298_mm256_mask_max_ph(__m256h __W,
__mmask16 __U, __m256h __A, __m256h __B) {
299 return (__m256h)__builtin_ia32_selectph_256(
301 (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B),
306_mm256_maskz_max_ph(
__mmask16 __U, __m256h __A, __m256h __B) {
307 return (__m256h)__builtin_ia32_selectph_256(
309 (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B),
310 (__v16hf)_mm256_setzero_ph());
315 return (__m128h)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B);
322 return (__m128h)__builtin_ia32_selectph_128(
323 (
__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
330 return (__m128h)__builtin_ia32_selectph_128(
331 (
__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
332 (__v8hf)_mm_setzero_ph());
336_mm256_abs_ph(__m256h __A) {
341_mm_abs_ph(__m128h __A) {
350_mm256_mask_conj_pch(__m256h __W,
__mmask8 __U, __m256h __A) {
351 return (__m256h)__builtin_ia32_selectps_256(
352 (
__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)__W);
356_mm256_maskz_conj_pch(
__mmask8 __U, __m256h __A) {
357 return (__m256h)__builtin_ia32_selectps_256(
368 return (__m128h)__builtin_ia32_selectps_128(
369 (
__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)__W);
373_mm_maskz_conj_pch(
__mmask8 __U, __m128h __A) {
374 return (__m128h)__builtin_ia32_selectps_128(
378#define _mm256_cmp_ph_mask(a, b, p) \
379 ((__mmask16)__builtin_ia32_cmpph256_mask( \
380 (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)-1))
382#define _mm256_mask_cmp_ph_mask(m, a, b, p) \
383 ((__mmask16)__builtin_ia32_cmpph256_mask( \
384 (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)(m)))
386#define _mm_cmp_ph_mask(a, b, p) \
387 ((__mmask8)__builtin_ia32_cmpph128_mask( \
388 (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)-1))
390#define _mm_mask_cmp_ph_mask(m, a, b, p) \
391 ((__mmask8)__builtin_ia32_cmpph128_mask( \
392 (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)(m)))
395 return (__m256h)__builtin_ia32_rcpph256_mask(
396 (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (
__mmask16)-1);
400_mm256_mask_rcp_ph(__m256h __W,
__mmask16 __U, __m256h __A) {
401 return (__m256h)__builtin_ia32_rcpph256_mask((__v16hf)__A, (__v16hf)__W,
406_mm256_maskz_rcp_ph(
__mmask16 __U, __m256h __A) {
407 return (__m256h)__builtin_ia32_rcpph256_mask(
408 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (
__mmask16)__U);
412 return (__m128h)__builtin_ia32_rcpph128_mask(
413 (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
419 return (__m128h)__builtin_ia32_rcpph128_mask((__v8hf)__A, (__v8hf)__W,
425 return (__m128h)__builtin_ia32_rcpph128_mask(
426 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
430 return (__m256h)__builtin_ia32_rsqrtph256_mask(
431 (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (
__mmask16)-1);
435_mm256_mask_rsqrt_ph(__m256h __W,
__mmask16 __U, __m256h __A) {
436 return (__m256h)__builtin_ia32_rsqrtph256_mask((__v16hf)__A, (__v16hf)__W,
441_mm256_maskz_rsqrt_ph(
__mmask16 __U, __m256h __A) {
442 return (__m256h)__builtin_ia32_rsqrtph256_mask(
443 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (
__mmask16)__U);
447 return (__m128h)__builtin_ia32_rsqrtph128_mask(
448 (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
454 return (__m128h)__builtin_ia32_rsqrtph128_mask((__v8hf)__A, (__v8hf)__W,
459_mm_maskz_rsqrt_ph(
__mmask8 __U, __m128h __A) {
460 return (__m128h)__builtin_ia32_rsqrtph128_mask(
461 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
465 return (__m128h)__builtin_ia32_getexpph128_mask(
466 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)-1);
470_mm_mask_getexp_ph(__m128h __W,
__mmask8 __U, __m128h __A) {
471 return (__m128h)__builtin_ia32_getexpph128_mask((__v8hf)__A, (__v8hf)__W,
476_mm_maskz_getexp_ph(
__mmask8 __U, __m128h __A) {
477 return (__m128h)__builtin_ia32_getexpph128_mask(
478 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
482 return (__m256h)__builtin_ia32_getexpph256_mask(
483 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (
__mmask16)-1);
487_mm256_mask_getexp_ph(__m256h __W,
__mmask16 __U, __m256h __A) {
488 return (__m256h)__builtin_ia32_getexpph256_mask((__v16hf)__A, (__v16hf)__W,
493_mm256_maskz_getexp_ph(
__mmask16 __U, __m256h __A) {
494 return (__m256h)__builtin_ia32_getexpph256_mask(
495 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (
__mmask16)__U);
498#define _mm_getmant_ph(A, B, C) \
499 ((__m128h)__builtin_ia32_getmantph128_mask( \
500 (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \
503#define _mm_mask_getmant_ph(W, U, A, B, C) \
504 ((__m128h)__builtin_ia32_getmantph128_mask( \
505 (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)(__m128h)(W), \
508#define _mm_maskz_getmant_ph(U, A, B, C) \
509 ((__m128h)__builtin_ia32_getmantph128_mask( \
510 (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \
513#define _mm256_getmant_ph(A, B, C) \
514 ((__m256h)__builtin_ia32_getmantph256_mask( \
515 (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \
516 (__v16hf)_mm256_setzero_ph(), (__mmask16)-1))
518#define _mm256_mask_getmant_ph(W, U, A, B, C) \
519 ((__m256h)__builtin_ia32_getmantph256_mask( \
520 (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), (__v16hf)(__m256h)(W), \
523#define _mm256_maskz_getmant_ph(U, A, B, C) \
524 ((__m256h)__builtin_ia32_getmantph256_mask( \
525 (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \
526 (__v16hf)_mm256_setzero_ph(), (__mmask16)(U)))
530 return (__m128h)__builtin_ia32_scalefph128_mask(
531 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (
__mmask8)-1);
535_mm_mask_scalef_ph(__m128h __W,
__mmask8 __U, __m128h __A, __m128h __B) {
536 return (__m128h)__builtin_ia32_scalefph128_mask((__v8hf)__A, (__v8hf)__B,
541_mm_maskz_scalef_ph(
__mmask8 __U, __m128h __A, __m128h __B) {
542 return (__m128h)__builtin_ia32_scalefph128_mask(
543 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
548 return (__m256h)__builtin_ia32_scalefph256_mask(
549 (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (
__mmask16)-1);
553_mm256_mask_scalef_ph(__m256h __W,
__mmask16 __U, __m256h __A, __m256h __B) {
554 return (__m256h)__builtin_ia32_scalefph256_mask((__v16hf)__A, (__v16hf)__B,
559_mm256_maskz_scalef_ph(
__mmask16 __U, __m256h __A, __m256h __B) {
560 return (__m256h)__builtin_ia32_scalefph256_mask(
561 (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (
__mmask16)__U);
564#define _mm_roundscale_ph(A, imm) \
565 ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
566 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \
569#define _mm_mask_roundscale_ph(W, U, A, imm) \
570 ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
571 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U)))
573#define _mm_maskz_roundscale_ph(U, A, imm) \
574 ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
575 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \
578#define _mm256_roundscale_ph(A, imm) \
579 ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
580 (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \
583#define _mm256_mask_roundscale_ph(W, U, A, imm) \
584 ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
585 (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)(__m256h)(W), \
588#define _mm256_maskz_roundscale_ph(U, A, imm) \
589 ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
590 (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \
593#define _mm_reduce_ph(A, imm) \
594 ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \
595 (__v8hf)_mm_setzero_ph(), \
598#define _mm_mask_reduce_ph(W, U, A, imm) \
599 ((__m128h)__builtin_ia32_reduceph128_mask( \
600 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U)))
602#define _mm_maskz_reduce_ph(U, A, imm) \
603 ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \
604 (__v8hf)_mm_setzero_ph(), \
607#define _mm256_reduce_ph(A, imm) \
608 ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
609 (__v16hf)_mm256_setzero_ph(), \
612#define _mm256_mask_reduce_ph(W, U, A, imm) \
613 ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
614 (__v16hf)(__m256h)(W), \
617#define _mm256_maskz_reduce_ph(U, A, imm) \
618 ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
619 (__v16hf)_mm256_setzero_ph(), \
623 return __builtin_ia32_sqrtph((__v8hf)
__a);
629 return (__m128h)__builtin_ia32_selectph_128(
630 (
__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)__W);
635 return (__m128h)__builtin_ia32_selectph_128(
636 (
__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)_mm_setzero_ph());
640 return (__m256h)__builtin_ia32_sqrtph256((__v16hf)
__a);
644_mm256_mask_sqrt_ph(__m256h __W,
__mmask16 __U, __m256h __A) {
645 return (__m256h)__builtin_ia32_selectph_256(
646 (
__mmask16)__U, (__v16hf)_mm256_sqrt_ph(__A), (__v16hf)__W);
650_mm256_maskz_sqrt_ph(
__mmask16 __U, __m256h __A) {
651 return (__m256h)__builtin_ia32_selectph_256((
__mmask16)__U,
652 (__v16hf)_mm256_sqrt_ph(__A),
653 (__v16hf)_mm256_setzero_ph());
656#define _mm_mask_fpclass_ph_mask(U, A, imm) \
657 ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \
658 (int)(imm), (__mmask8)(U)))
660#define _mm_fpclass_ph_mask(A, imm) \
661 ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \
662 (int)(imm), (__mmask8)-1))
664#define _mm256_mask_fpclass_ph_mask(U, A, imm) \
665 ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \
666 (int)(imm), (__mmask16)(U)))
668#define _mm256_fpclass_ph_mask(A, imm) \
669 ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \
670 (int)(imm), (__mmask16)-1))
673 return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
674 (__v2df)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
680 return (__m128h)__builtin_ia32_vcvtpd2ph128_mask((__v2df)__A, (__v8hf)__W,
685_mm_maskz_cvtpd_ph(
__mmask8 __U, __m128d __A) {
686 return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
687 (__v2df)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
691 return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
692 (__v4df)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
696_mm256_mask_cvtpd_ph(__m128h __W,
__mmask8 __U, __m256d __A) {
697 return (__m128h)__builtin_ia32_vcvtpd2ph256_mask((__v4df)__A, (__v8hf)__W,
702_mm256_maskz_cvtpd_ph(
__mmask8 __U, __m256d __A) {
703 return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
704 (__v4df)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
708 return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
715 return (__m128d)__builtin_ia32_vcvtph2pd128_mask((__v8hf)__A, (__v2df)__W,
720_mm_maskz_cvtph_pd(
__mmask8 __U, __m128h __A) {
721 return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
726 return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
731_mm256_mask_cvtph_pd(__m256d __W,
__mmask8 __U, __m128h __A) {
732 return (__m256d)__builtin_ia32_vcvtph2pd256_mask((__v8hf)__A, (__v4df)__W,
737_mm256_maskz_cvtph_pd(
__mmask8 __U, __m128h __A) {
738 return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
743 return (__m128i)__builtin_ia32_vcvtph2w128_mask(
748_mm_mask_cvtph_epi16(__m128i __W,
__mmask8 __U, __m128h __A) {
749 return (__m128i)__builtin_ia32_vcvtph2w128_mask((__v8hf)__A, (__v8hi)__W,
754_mm_maskz_cvtph_epi16(
__mmask8 __U, __m128h __A) {
755 return (__m128i)__builtin_ia32_vcvtph2w128_mask(
760_mm256_cvtph_epi16(__m256h __A) {
761 return (__m256i)__builtin_ia32_vcvtph2w256_mask(
766_mm256_mask_cvtph_epi16(__m256i __W,
__mmask16 __U, __m256h __A) {
767 return (__m256i)__builtin_ia32_vcvtph2w256_mask((__v16hf)__A, (__v16hi)__W,
772_mm256_maskz_cvtph_epi16(
__mmask16 __U, __m256h __A) {
773 return (__m256i)__builtin_ia32_vcvtph2w256_mask(
778 return (__m128i)__builtin_ia32_vcvttph2w128_mask(
783_mm_mask_cvttph_epi16(__m128i __W,
__mmask8 __U, __m128h __A) {
784 return (__m128i)__builtin_ia32_vcvttph2w128_mask((__v8hf)__A, (__v8hi)__W,
789_mm_maskz_cvttph_epi16(
__mmask8 __U, __m128h __A) {
790 return (__m128i)__builtin_ia32_vcvttph2w128_mask(
795_mm256_cvttph_epi16(__m256h __A) {
796 return (__m256i)__builtin_ia32_vcvttph2w256_mask(
801_mm256_mask_cvttph_epi16(__m256i __W,
__mmask16 __U, __m256h __A) {
802 return (__m256i)__builtin_ia32_vcvttph2w256_mask((__v16hf)__A, (__v16hi)__W,
807_mm256_maskz_cvttph_epi16(
__mmask16 __U, __m256h __A) {
808 return (__m256i)__builtin_ia32_vcvttph2w256_mask(
813_mm_cvtepi16_ph(__m128i __A) {
814 return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf);
818_mm_mask_cvtepi16_ph(__m128h __W,
__mmask8 __U, __m128i __A) {
819 return (__m128h)__builtin_ia32_selectph_128(
820 (
__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W);
824_mm_maskz_cvtepi16_ph(
__mmask8 __U, __m128i __A) {
825 return (__m128h)__builtin_ia32_selectph_128(
826 (
__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph());
830_mm256_cvtepi16_ph(__m256i __A) {
831 return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf);
835_mm256_mask_cvtepi16_ph(__m256h __W,
__mmask16 __U, __m256i __A) {
836 return (__m256h)__builtin_ia32_selectph_256(
837 (
__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W);
841_mm256_maskz_cvtepi16_ph(
__mmask16 __U, __m256i __A) {
842 return (__m256h)__builtin_ia32_selectph_256((
__mmask16)__U,
843 (__v16hf)_mm256_cvtepi16_ph(__A),
844 (__v16hf)_mm256_setzero_ph());
848 return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
853_mm_mask_cvtph_epu16(__m128i __W,
__mmask8 __U, __m128h __A) {
854 return (__m128i)__builtin_ia32_vcvtph2uw128_mask((__v8hf)__A, (__v8hu)__W,
859_mm_maskz_cvtph_epu16(
__mmask8 __U, __m128h __A) {
860 return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
865_mm256_cvtph_epu16(__m256h __A) {
866 return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
871_mm256_mask_cvtph_epu16(__m256i __W,
__mmask16 __U, __m256h __A) {
872 return (__m256i)__builtin_ia32_vcvtph2uw256_mask((__v16hf)__A, (__v16hu)__W,
877_mm256_maskz_cvtph_epu16(
__mmask16 __U, __m256h __A) {
878 return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
883 return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
888_mm_mask_cvttph_epu16(__m128i __W,
__mmask8 __U, __m128h __A) {
889 return (__m128i)__builtin_ia32_vcvttph2uw128_mask((__v8hf)__A, (__v8hu)__W,
894_mm_maskz_cvttph_epu16(
__mmask8 __U, __m128h __A) {
895 return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
900_mm256_cvttph_epu16(__m256h __A) {
901 return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
906_mm256_mask_cvttph_epu16(__m256i __W,
__mmask16 __U, __m256h __A) {
907 return (__m256i)__builtin_ia32_vcvttph2uw256_mask((__v16hf)__A, (__v16hu)__W,
912_mm256_maskz_cvttph_epu16(
__mmask16 __U, __m256h __A) {
913 return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
918_mm_cvtepu16_ph(__m128i __A) {
919 return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf);
923_mm_mask_cvtepu16_ph(__m128h __W,
__mmask8 __U, __m128i __A) {
924 return (__m128h)__builtin_ia32_selectph_128(
925 (
__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W);
929_mm_maskz_cvtepu16_ph(
__mmask8 __U, __m128i __A) {
930 return (__m128h)__builtin_ia32_selectph_128(
931 (
__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph());
935_mm256_cvtepu16_ph(__m256i __A) {
936 return (__m256h) __builtin_convertvector((__v16hu)__A, __v16hf);
940_mm256_mask_cvtepu16_ph(__m256h __W,
__mmask16 __U, __m256i __A) {
941 return (__m256h)__builtin_ia32_selectph_256(
942 (
__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)__W);
946_mm256_maskz_cvtepu16_ph(
__mmask16 __U, __m256i __A) {
947 return (__m256h)__builtin_ia32_selectph_256((
__mmask16)__U,
948 (__v16hf)_mm256_cvtepu16_ph(__A),
949 (__v16hf)_mm256_setzero_ph());
953 return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
958_mm_mask_cvtph_epi32(__m128i __W,
__mmask8 __U, __m128h __A) {
959 return (__m128i)__builtin_ia32_vcvtph2dq128_mask((__v8hf)__A, (__v4si)__W,
964_mm_maskz_cvtph_epi32(
__mmask8 __U, __m128h __A) {
965 return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
970_mm256_cvtph_epi32(__m128h __A) {
971 return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
976_mm256_mask_cvtph_epi32(__m256i __W,
__mmask8 __U, __m128h __A) {
977 return (__m256i)__builtin_ia32_vcvtph2dq256_mask((__v8hf)__A, (__v8si)__W,
982_mm256_maskz_cvtph_epi32(
__mmask8 __U, __m128h __A) {
983 return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
988 return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
993_mm_mask_cvtph_epu32(__m128i __W,
__mmask8 __U, __m128h __A) {
994 return (__m128i)__builtin_ia32_vcvtph2udq128_mask((__v8hf)__A, (__v4su)__W,
999_mm_maskz_cvtph_epu32(
__mmask8 __U, __m128h __A) {
1000 return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
1005_mm256_cvtph_epu32(__m128h __A) {
1006 return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
1011_mm256_mask_cvtph_epu32(__m256i __W,
__mmask8 __U, __m128h __A) {
1012 return (__m256i)__builtin_ia32_vcvtph2udq256_mask((__v8hf)__A, (__v8su)__W,
1017_mm256_maskz_cvtph_epu32(
__mmask8 __U, __m128h __A) {
1018 return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
1023 return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
1024 (__v4si)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
1028_mm_mask_cvtepi32_ph(__m128h __W,
__mmask8 __U, __m128i __A) {
1029 return (__m128h)__builtin_ia32_vcvtdq2ph128_mask((__v4si)__A, (__v8hf)__W,
1034_mm_maskz_cvtepi32_ph(
__mmask8 __U, __m128i __A) {
1035 return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
1036 (__v4si)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
1040_mm256_cvtepi32_ph(__m256i __A) {
1041 return (__m128h) __builtin_convertvector((__v8si)__A, __v8hf);
1045_mm256_mask_cvtepi32_ph(__m128h __W,
__mmask8 __U, __m256i __A) {
1046 return (__m128h)__builtin_ia32_selectph_128(
1047 (
__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)__W);
1051_mm256_maskz_cvtepi32_ph(
__mmask8 __U, __m256i __A) {
1052 return (__m128h)__builtin_ia32_selectph_128(
1053 (
__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)_mm_setzero_ph());
1057 return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
1058 (__v4su)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
1062_mm_mask_cvtepu32_ph(__m128h __W,
__mmask8 __U, __m128i __A) {
1063 return (__m128h)__builtin_ia32_vcvtudq2ph128_mask((__v4su)__A, (__v8hf)__W,
1068_mm_maskz_cvtepu32_ph(
__mmask8 __U, __m128i __A) {
1069 return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
1070 (__v4su)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
1074_mm256_cvtepu32_ph(__m256i __A) {
1075 return (__m128h) __builtin_convertvector((__v8su)__A, __v8hf);
1079_mm256_mask_cvtepu32_ph(__m128h __W,
__mmask8 __U, __m256i __A) {
1080 return (__m128h)__builtin_ia32_selectph_128(
1081 (
__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)__W);
1085_mm256_maskz_cvtepu32_ph(
__mmask8 __U, __m256i __A) {
1086 return (__m128h)__builtin_ia32_selectph_128(
1087 (
__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)_mm_setzero_ph());
1091 return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
1096_mm_mask_cvttph_epi32(__m128i __W,
__mmask8 __U, __m128h __A) {
1097 return (__m128i)__builtin_ia32_vcvttph2dq128_mask((__v8hf)__A, (__v4si)__W,
1102_mm_maskz_cvttph_epi32(
__mmask8 __U, __m128h __A) {
1103 return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
1108_mm256_cvttph_epi32(__m128h __A) {
1109 return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
1114_mm256_mask_cvttph_epi32(__m256i __W,
__mmask8 __U, __m128h __A) {
1115 return (__m256i)__builtin_ia32_vcvttph2dq256_mask((__v8hf)__A, (__v8si)__W,
1120_mm256_maskz_cvttph_epi32(
__mmask8 __U, __m128h __A) {
1121 return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
1126 return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
1131_mm_mask_cvttph_epu32(__m128i __W,
__mmask8 __U, __m128h __A) {
1132 return (__m128i)__builtin_ia32_vcvttph2udq128_mask((__v8hf)__A, (__v4su)__W,
1137_mm_maskz_cvttph_epu32(
__mmask8 __U, __m128h __A) {
1138 return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
1143_mm256_cvttph_epu32(__m128h __A) {
1144 return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
1149_mm256_mask_cvttph_epu32(__m256i __W,
__mmask8 __U, __m128h __A) {
1150 return (__m256i)__builtin_ia32_vcvttph2udq256_mask((__v8hf)__A, (__v8su)__W,
1155_mm256_maskz_cvttph_epu32(
__mmask8 __U, __m128h __A) {
1156 return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
1161 return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
1162 (__v2di)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
1166_mm_mask_cvtepi64_ph(__m128h __W,
__mmask8 __U, __m128i __A) {
1167 return (__m128h)__builtin_ia32_vcvtqq2ph128_mask((__v2di)__A, (__v8hf)__W,
1172_mm_maskz_cvtepi64_ph(
__mmask8 __U, __m128i __A) {
1173 return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
1174 (__v2di)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
1178_mm256_cvtepi64_ph(__m256i __A) {
1179 return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
1180 (__v4di)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
1184_mm256_mask_cvtepi64_ph(__m128h __W,
__mmask8 __U, __m256i __A) {
1185 return (__m128h)__builtin_ia32_vcvtqq2ph256_mask((__v4di)__A, (__v8hf)__W,
1190_mm256_maskz_cvtepi64_ph(
__mmask8 __U, __m256i __A) {
1191 return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
1192 (__v4di)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
1196 return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
1201_mm_mask_cvtph_epi64(__m128i __W,
__mmask8 __U, __m128h __A) {
1202 return (__m128i)__builtin_ia32_vcvtph2qq128_mask((__v8hf)__A, (__v2di)__W,
1207_mm_maskz_cvtph_epi64(
__mmask8 __U, __m128h __A) {
1208 return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
1213_mm256_cvtph_epi64(__m128h __A) {
1214 return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
1219_mm256_mask_cvtph_epi64(__m256i __W,
__mmask8 __U, __m128h __A) {
1220 return (__m256i)__builtin_ia32_vcvtph2qq256_mask((__v8hf)__A, (__v4di)__W,
1225_mm256_maskz_cvtph_epi64(
__mmask8 __U, __m128h __A) {
1226 return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
1231 return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
1232 (__v2du)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
1236_mm_mask_cvtepu64_ph(__m128h __W,
__mmask8 __U, __m128i __A) {
1237 return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask((__v2du)__A, (__v8hf)__W,
1242_mm_maskz_cvtepu64_ph(
__mmask8 __U, __m128i __A) {
1243 return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
1244 (__v2du)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
1248_mm256_cvtepu64_ph(__m256i __A) {
1249 return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
1250 (__v4du)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
1254_mm256_mask_cvtepu64_ph(__m128h __W,
__mmask8 __U, __m256i __A) {
1255 return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask((__v4du)__A, (__v8hf)__W,
1260_mm256_maskz_cvtepu64_ph(
__mmask8 __U, __m256i __A) {
1261 return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
1262 (__v4du)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
1266 return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
1271_mm_mask_cvtph_epu64(__m128i __W,
__mmask8 __U, __m128h __A) {
1272 return (__m128i)__builtin_ia32_vcvtph2uqq128_mask((__v8hf)__A, (__v2du)__W,
1277_mm_maskz_cvtph_epu64(
__mmask8 __U, __m128h __A) {
1278 return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
1283_mm256_cvtph_epu64(__m128h __A) {
1284 return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
1289_mm256_mask_cvtph_epu64(__m256i __W,
__mmask8 __U, __m128h __A) {
1290 return (__m256i)__builtin_ia32_vcvtph2uqq256_mask((__v8hf)__A, (__v4du)__W,
1295_mm256_maskz_cvtph_epu64(
__mmask8 __U, __m128h __A) {
1296 return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
1301 return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
1306_mm_mask_cvttph_epi64(__m128i __W,
__mmask8 __U, __m128h __A) {
1307 return (__m128i)__builtin_ia32_vcvttph2qq128_mask((__v8hf)__A, (__v2di)__W,
1312_mm_maskz_cvttph_epi64(
__mmask8 __U, __m128h __A) {
1313 return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
1318_mm256_cvttph_epi64(__m128h __A) {
1319 return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
1324_mm256_mask_cvttph_epi64(__m256i __W,
__mmask8 __U, __m128h __A) {
1325 return (__m256i)__builtin_ia32_vcvttph2qq256_mask((__v8hf)__A, (__v4di)__W,
1330_mm256_maskz_cvttph_epi64(
__mmask8 __U, __m128h __A) {
1331 return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
1336 return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
1341_mm_mask_cvttph_epu64(__m128i __W,
__mmask8 __U, __m128h __A) {
1342 return (__m128i)__builtin_ia32_vcvttph2uqq128_mask((__v8hf)__A, (__v2du)__W,
1347_mm_maskz_cvttph_epu64(
__mmask8 __U, __m128h __A) {
1348 return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
1353_mm256_cvttph_epu64(__m128h __A) {
1354 return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
1359_mm256_mask_cvttph_epu64(__m256i __W,
__mmask8 __U, __m128h __A) {
1360 return (__m256i)__builtin_ia32_vcvttph2uqq256_mask((__v8hf)__A, (__v4du)__W,
1365_mm256_maskz_cvttph_epu64(
__mmask8 __U, __m128h __A) {
1366 return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
1371 return (__m128)__builtin_ia32_vcvtph2psx128_mask(
1378 return (__m128)__builtin_ia32_vcvtph2psx128_mask((__v8hf)__A, (__v4sf)__W,
1383_mm_maskz_cvtxph_ps(
__mmask8 __U, __m128h __A) {
1384 return (__m128)__builtin_ia32_vcvtph2psx128_mask(
1389 return (__m256)__builtin_ia32_vcvtph2psx256_mask(
1394_mm256_mask_cvtxph_ps(__m256 __W,
__mmask8 __U, __m128h __A) {
1395 return (__m256)__builtin_ia32_vcvtph2psx256_mask((__v8hf)__A, (__v8sf)__W,
1400_mm256_maskz_cvtxph_ps(
__mmask8 __U, __m128h __A) {
1401 return (__m256)__builtin_ia32_vcvtph2psx256_mask(
1406 return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
1407 (__v4sf)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
1413 return (__m128h)__builtin_ia32_vcvtps2phx128_mask((__v4sf)__A, (__v8hf)__W,
1418_mm_maskz_cvtxps_ph(
__mmask8 __U, __m128 __A) {
1419 return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
1420 (__v4sf)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
1424 return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
1425 (__v8sf)__A, (__v8hf)_mm_undefined_ph(), (
__mmask8)-1);
1429_mm256_mask_cvtxps_ph(__m128h __W,
__mmask8 __U, __m256 __A) {
1430 return (__m128h)__builtin_ia32_vcvtps2phx256_mask((__v8sf)__A, (__v8hf)__W,
1435_mm256_maskz_cvtxps_ph(
__mmask8 __U, __m256 __A) {
1436 return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
1437 (__v8sf)__A, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
1443 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B,
1451 return (__m128h)__builtin_ia32_selectph_128(
1453 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1458_mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C,
__mmask8 __U) {
1459 return (__m128h)__builtin_ia32_selectph_128(
1461 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1466_mm_maskz_fmadd_ph(
__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1467 return (__m128h)__builtin_ia32_selectph_128(
1469 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1470 (__v8hf)_mm_setzero_ph());
1476 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B,
1484 return (__m128h)__builtin_ia32_selectph_128(
1485 (
__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1490_mm_maskz_fmsub_ph(
__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1491 return (__m128h)__builtin_ia32_selectph_128(
1492 (
__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1493 (__v8hf)_mm_setzero_ph());
1497_mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C,
__mmask8 __U) {
1498 return (__m128h)__builtin_ia32_selectph_128(
1500 __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1505_mm_maskz_fnmadd_ph(
__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1506 return (__m128h)__builtin_ia32_selectph_128(
1508 __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1509 (__v8hf)_mm_setzero_ph());
1513_mm_maskz_fnmsub_ph(
__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1514 return (__m128h)__builtin_ia32_selectph_128(
1516 __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1517 (__v8hf)_mm_setzero_ph());
1523 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B,
1528_mm256_mask_fmadd_ph(__m256h __A,
__mmask16 __U, __m256h __B, __m256h __C) {
1529 return (__m256h)__builtin_ia32_selectph_256(
1531 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1536_mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C,
__mmask16 __U) {
1537 return (__m256h)__builtin_ia32_selectph_256(
1539 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1544_mm256_maskz_fmadd_ph(
__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1545 return (__m256h)__builtin_ia32_selectph_256(
1547 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1548 (__v16hf)_mm256_setzero_ph());
1554 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B,
1559_mm256_mask_fmsub_ph(__m256h __A,
__mmask16 __U, __m256h __B, __m256h __C) {
1560 return (__m256h)__builtin_ia32_selectph_256(
1562 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1567_mm256_maskz_fmsub_ph(
__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1568 return (__m256h)__builtin_ia32_selectph_256(
1570 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1571 (__v16hf)_mm256_setzero_ph());
1575_mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C,
__mmask16 __U) {
1576 return (__m256h)__builtin_ia32_selectph_256(
1578 __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1583_mm256_maskz_fnmadd_ph(
__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1584 return (__m256h)__builtin_ia32_selectph_256(
1586 __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1587 (__v16hf)_mm256_setzero_ph());
1591_mm256_maskz_fnmsub_ph(
__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1592 return (__m256h)__builtin_ia32_selectph_256(
1594 __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1595 (__v16hf)_mm256_setzero_ph());
1601 return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
1606_mm_mask_fmaddsub_ph(__m128h __A,
__mmask8 __U, __m128h __B, __m128h __C) {
1607 return (__m128h)__builtin_ia32_selectph_128(
1609 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1614_mm_mask3_fmaddsub_ph(__m128h __A, __m128h __B, __m128h __C,
__mmask8 __U) {
1615 return (__m128h)__builtin_ia32_selectph_128(
1617 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1622_mm_maskz_fmaddsub_ph(
__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1623 return (__m128h)__builtin_ia32_selectph_128(
1625 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1626 (__v8hf)_mm_setzero_ph());
1632 return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
1637_mm_mask_fmsubadd_ph(__m128h __A,
__mmask8 __U, __m128h __B, __m128h __C) {
1638 return (__m128h)__builtin_ia32_selectph_128(
1640 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1645_mm_maskz_fmsubadd_ph(
__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1646 return (__m128h)__builtin_ia32_selectph_128(
1648 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1649 (__v8hf)_mm_setzero_ph());
1653_mm256_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C) {
1654 return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
1659_mm256_mask_fmaddsub_ph(__m256h __A,
__mmask16 __U, __m256h __B, __m256h __C) {
1660 return (__m256h)__builtin_ia32_selectph_256(
1662 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1667_mm256_mask3_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C,
__mmask16 __U) {
1668 return (__m256h)__builtin_ia32_selectph_256(
1670 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1675_mm256_maskz_fmaddsub_ph(
__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1676 return (__m256h)__builtin_ia32_selectph_256(
1678 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1679 (__v16hf)_mm256_setzero_ph());
1683_mm256_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C) {
1684 return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
1689_mm256_mask_fmsubadd_ph(__m256h __A,
__mmask16 __U, __m256h __B, __m256h __C) {
1690 return (__m256h)__builtin_ia32_selectph_256(
1692 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1697_mm256_maskz_fmsubadd_ph(
__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1698 return (__m256h)__builtin_ia32_selectph_256(
1700 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1701 (__v16hf)_mm256_setzero_ph());
1705_mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C,
__mmask8 __U) {
1706 return (__m128h)__builtin_ia32_selectph_128(
1708 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1713_mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C,
__mmask16 __U) {
1714 return (__m256h)__builtin_ia32_selectph_256(
1716 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1721_mm_mask3_fmsubadd_ph(__m128h __A, __m128h __B, __m128h __C,
__mmask8 __U) {
1722 return (__m128h)__builtin_ia32_selectph_128(
1724 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1729_mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C,
__mmask16 __U) {
1730 return (__m256h)__builtin_ia32_selectph_256(
1732 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1739 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B,
1744_mm_mask_fnmadd_ph(__m128h __A,
__mmask8 __U, __m128h __B, __m128h __C) {
1745 return (__m128h)__builtin_ia32_selectph_128(
1747 __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C),
1754 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B,
1759_mm256_mask_fnmadd_ph(__m256h __A,
__mmask16 __U, __m256h __B, __m256h __C) {
1760 return (__m256h)__builtin_ia32_selectph_256(
1762 __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C),
1769 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B,
1774_mm_mask_fnmsub_ph(__m128h __A,
__mmask8 __U, __m128h __B, __m128h __C) {
1775 return (__m128h)__builtin_ia32_selectph_128(
1777 __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
1782_mm_mask3_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C,
__mmask8 __U) {
1783 return (__m128h)__builtin_ia32_selectph_128(
1785 __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
1792 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B,
1797_mm256_mask_fnmsub_ph(__m256h __A,
__mmask16 __U, __m256h __B, __m256h __C) {
1798 return (__m256h)__builtin_ia32_selectph_256(
1800 __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
1805_mm256_mask3_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C,
__mmask16 __U) {
1806 return (__m256h)__builtin_ia32_selectph_256(
1808 __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
1814 return (__m128h)__builtin_ia32_vfcmulcph128_mask(
1815 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (
__mmask8)-1);
1819_mm_mask_fcmul_pch(__m128h __W,
__mmask8 __U, __m128h __A, __m128h __B) {
1820 return (__m128h)__builtin_ia32_vfcmulcph128_mask((__v4sf)__A, (__v4sf)__B,
1825_mm_maskz_fcmul_pch(
__mmask8 __U, __m128h __A, __m128h __B) {
1826 return (__m128h)__builtin_ia32_vfcmulcph128_mask(
1827 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (
__mmask8)__U);
1832 return (__m256h)__builtin_ia32_vfcmulcph256_mask(
1833 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (
__mmask8)-1);
1837_mm256_mask_fcmul_pch(__m256h __W,
__mmask8 __U, __m256h __A, __m256h __B) {
1838 return (__m256h)__builtin_ia32_vfcmulcph256_mask((__v8sf)__A, (__v8sf)__B,
1843_mm256_maskz_fcmul_pch(
__mmask8 __U, __m256h __A, __m256h __B) {
1844 return (__m256h)__builtin_ia32_vfcmulcph256_mask(
1845 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (
__mmask8)__U);
1851 return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1856_mm_mask_fcmadd_pch(__m128h __A,
__mmask8 __U, __m128h __B, __m128h __C) {
1857 return (__m128h)__builtin_ia32_selectps_128(
1859 __builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)(__m128h)__B,
1865_mm_mask3_fcmadd_pch(__m128h __A, __m128h __B, __m128h __C,
__mmask8 __U) {
1866 return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1871_mm_maskz_fcmadd_pch(
__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1872 return (__m128h)__builtin_ia32_vfcmaddcph128_maskz(
1873 (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (
__mmask8)__U);
1879 return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1884_mm256_mask_fcmadd_pch(__m256h __A,
__mmask8 __U, __m256h __B, __m256h __C) {
1885 return (__m256h)__builtin_ia32_selectps_256(
1887 __builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C,
1893_mm256_mask3_fcmadd_pch(__m256h __A, __m256h __B, __m256h __C,
__mmask8 __U) {
1894 return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1899_mm256_maskz_fcmadd_pch(
__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
1900 return (__m256h)__builtin_ia32_vfcmaddcph256_maskz(
1901 (__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (
__mmask8)__U);
1906 return (__m128h)__builtin_ia32_vfmulcph128_mask(
1907 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (
__mmask8)-1);
1914 return (__m128h)__builtin_ia32_vfmulcph128_mask((__v4sf)__A, (__v4sf)__B,
1919_mm_maskz_fmul_pch(
__mmask8 __U, __m128h __A, __m128h __B) {
1920 return (__m128h)__builtin_ia32_vfmulcph128_mask(
1921 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (
__mmask8)__U);
1926 return (__m256h)__builtin_ia32_vfmulcph256_mask(
1927 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (
__mmask8)-1);
1931_mm256_mask_fmul_pch(__m256h __W,
__mmask8 __U, __m256h __A, __m256h __B) {
1932 return (__m256h)__builtin_ia32_vfmulcph256_mask((__v8sf)__A, (__v8sf)__B,
1937_mm256_maskz_fmul_pch(
__mmask8 __U, __m256h __A, __m256h __B) {
1938 return (__m256h)__builtin_ia32_vfmulcph256_mask(
1939 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (
__mmask8)__U);
1945 return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1950_mm_mask_fmadd_pch(__m128h __A,
__mmask8 __U, __m128h __B, __m128h __C) {
1951 return (__m128h)__builtin_ia32_selectps_128(
1953 __builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C,
1959_mm_mask3_fmadd_pch(__m128h __A, __m128h __B, __m128h __C,
__mmask8 __U) {
1960 return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1965_mm_maskz_fmadd_pch(
__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1966 return (__m128h)__builtin_ia32_vfmaddcph128_maskz((__v4sf)__A, (__v4sf)__B,
1973 return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1978_mm256_mask_fmadd_pch(__m256h __A,
__mmask8 __U, __m256h __B, __m256h __C) {
1979 return (__m256h)__builtin_ia32_selectps_256(
1981 __builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C,
1987_mm256_mask3_fmadd_pch(__m256h __A, __m256h __B, __m256h __C,
__mmask8 __U) {
1988 return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1993_mm256_maskz_fmadd_pch(
__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
1994 return (__m256h)__builtin_ia32_vfmaddcph256_maskz((__v8sf)__A, (__v8sf)__B,
1999_mm_mask_blend_ph(
__mmask8 __U, __m128h __A, __m128h __W) {
2000 return (__m128h)__builtin_ia32_selectph_128((
__mmask8)__U, (__v8hf)__W,
2005_mm256_mask_blend_ph(
__mmask16 __U, __m256h __A, __m256h __W) {
2006 return (__m256h)__builtin_ia32_selectph_256((
__mmask16)__U, (__v16hf)__W,
2011_mm_permutex2var_ph(__m128h __A, __m128i __I, __m128h __B) {
2012 return (__m128h)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
2017_mm256_permutex2var_ph(__m256h __A, __m256i __I, __m256h __B) {
2018 return (__m256h)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
2023_mm_permutexvar_ph(__m128i __A, __m128h __B) {
2024 return (__m128h)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A);
2028_mm256_permutexvar_ph(__m256i __A, __m256h __B) {
2029 return (__m256h)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A);
2033_mm256_reduce_add_ph(__m256h __W) {
2034 return __builtin_ia32_reduce_fadd_ph256(-0.0f16, __W);
2038_mm256_reduce_mul_ph(__m256h __W) {
2039 return __builtin_ia32_reduce_fmul_ph256(1.0f16, __W);
2043_mm256_reduce_max_ph(__m256h __V) {
2044 return __builtin_ia32_reduce_fmax_ph256(__V);
2048_mm256_reduce_min_ph(__m256h __V) {
2049 return __builtin_ia32_reduce_fmin_ph256(__V);
2053_mm_reduce_add_ph(__m128h __W) {
2054 return __builtin_ia32_reduce_fadd_ph128(-0.0f16, __W);
2058_mm_reduce_mul_ph(__m128h __W) {
2059 return __builtin_ia32_reduce_fmul_ph128(1.0f16, __W);
2063_mm_reduce_max_ph(__m128h __V) {
2064 return __builtin_ia32_reduce_fmax_ph128(__V);
2068_mm_reduce_min_ph(__m128h __V) {
2069 return __builtin_ia32_reduce_fmin_ph128(__V);
2073#define _mm_mul_pch(A, B) _mm_fmul_pch(A, B)
2074#define _mm_mask_mul_pch(W, U, A, B) _mm_mask_fmul_pch(W, U, A, B)
2075#define _mm_maskz_mul_pch(U, A, B) _mm_maskz_fmul_pch(U, A, B)
2076#define _mm256_mul_pch(A, B) _mm256_fmul_pch(A, B)
2077#define _mm256_mask_mul_pch(W, U, A, B) _mm256_mask_fmul_pch(W, U, A, B)
2078#define _mm256_maskz_mul_pch(U, A, B) _mm256_maskz_fmul_pch(U, A, B)
2080#define _mm_cmul_pch(A, B) _mm_fcmul_pch(A, B)
2081#define _mm_mask_cmul_pch(W, U, A, B) _mm_mask_fcmul_pch(W, U, A, B)
2082#define _mm_maskz_cmul_pch(U, A, B) _mm_maskz_fcmul_pch(U, A, B)
2083#define _mm256_cmul_pch(A, B) _mm256_fcmul_pch(A, B)
2084#define _mm256_mask_cmul_pch(W, U, A, B) _mm256_mask_fcmul_pch(W, U, A, B)
2085#define _mm256_maskz_cmul_pch(U, A, B) _mm256_maskz_fcmul_pch(U, A, B)
2087#undef __DEFAULT_FN_ATTRS128
2088#undef __DEFAULT_FN_ATTRS256
2089#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
2090#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
#define __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_and_epi32(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_and_epi32(__m128i __a, __m128i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_undefined_pd(void)
Create a 256-bit vector of [4 x double] with undefined values.
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_ps(float __w)
Constructs a 256-bit floating-point vector of [8 x float], with each of the eight single-precision fl...
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi32(int __i)
Constructs a 256-bit integer vector of [8 x i32], with each of the 32-bit integral vector elements se...
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_xor_ps(__m256 __a, __m256 __b)
Performs a bitwise XOR of two 256-bit vectors of [8 x float].
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ void int __a
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] with unspecified content.
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_undefined_ps(void)
Create a 128-bit vector of [4 x float] with undefined values.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_xor_ps(__m128 __a, __m128 __b)
Performs a bitwise exclusive OR of two 128-bit vectors of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_ps(float __w)
Constructs a 128-bit floating-point vector of [4 x float], with each of the four single-precision flo...
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.