clang 22.0.0git
avx512vlfp16intrin.h
Go to the documentation of this file.
1/*===---------- avx512vlfp16intrin.h - AVX512-FP16 intrinsics --------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error \
11 "Never use <avx512vlfp16intrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifdef __SSE2__
15
16#ifndef __AVX512VLFP16INTRIN_H
17#define __AVX512VLFP16INTRIN_H
18
19/* Define the default attributes for the functions in this file. */
20#define __DEFAULT_FN_ATTRS256 \
21 __attribute__((__always_inline__, __nodebug__, \
22 __target__("avx512fp16,avx512vl"), \
23 __min_vector_width__(256)))
24#define __DEFAULT_FN_ATTRS128 \
25 __attribute__((__always_inline__, __nodebug__, \
26 __target__("avx512fp16,avx512vl"), \
27 __min_vector_width__(128)))
28
29#if defined(__cplusplus) && (__cplusplus >= 201103L)
30#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
31#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
32#else
33#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
34#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
35#endif
36
37static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_cvtsh_h(__m128h __a) {
38 return __a[0];
39}
40
41static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_cvtsh_h(__m256h __a) {
42 return __a[0];
43}
44
45static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_set_sh(_Float16 __h) {
46 return __extension__(__m128h){__h, 0, 0, 0, 0, 0, 0, 0};
47}
48
49static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
50_mm_set1_ph(_Float16 __h) {
51 return (__m128h)(__v8hf){__h, __h, __h, __h, __h, __h, __h, __h};
52}
53
54static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
55_mm256_set1_ph(_Float16 __h) {
56 return (__m256h)(__v16hf){__h, __h, __h, __h, __h, __h, __h, __h,
57 __h, __h, __h, __h, __h, __h, __h, __h};
58}
59
60static __inline __m128h __DEFAULT_FN_ATTRS128
61_mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
62 _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) {
63 return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1};
64}
65
66static __inline __m256h __DEFAULT_FN_ATTRS256
67_mm256_set1_pch(_Float16 _Complex h) {
68 return (__m256h)_mm256_set1_ps(__builtin_bit_cast(float, h));
69}
70
71static __inline __m128h __DEFAULT_FN_ATTRS128
72_mm_set1_pch(_Float16 _Complex h) {
73 return (__m128h)_mm_set1_ps(__builtin_bit_cast(float, h));
74}
75
76static __inline __m256h __DEFAULT_FN_ATTRS256
77_mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
78 _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
79 _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12,
80 _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16) {
81 return (__m256h)(__v16hf){__h16, __h15, __h14, __h13, __h12, __h11,
82 __h10, __h9, __h8, __h7, __h6, __h5,
83 __h4, __h3, __h2, __h1};
84}
85
86static __inline__ __m128h __DEFAULT_FN_ATTRS128
87_mm_setr_ph(_Float16 e0, _Float16 e1, _Float16 e2, _Float16 e3, _Float16 e4,
88 _Float16 e5, _Float16 e6, _Float16 e7) {
89 return _mm_set_ph(e7, e6, e5, e4, e3, e2, e1, e0);
90}
91
92static __inline__ __m256h __DEFAULT_FN_ATTRS256
93_mm256_setr_ph(_Float16 e0, _Float16 e1, _Float16 e2, _Float16 e3, _Float16 e4,
94 _Float16 e5, _Float16 e6, _Float16 e7, _Float16 e8, _Float16 e9,
95 _Float16 e10, _Float16 e11, _Float16 e12, _Float16 e13,
96 _Float16 e14, _Float16 e15) {
97 return _mm256_set_ph(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3,
98 e2, e1, e0);
99}
100
101static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_add_ph(__m256h __A,
102 __m256h __B) {
103 return (__m256h)((__v16hf)__A + (__v16hf)__B);
104}
105
106static __inline__ __m256h __DEFAULT_FN_ATTRS256
107_mm256_mask_add_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
108 return (__m256h)__builtin_ia32_selectph_256(
109 __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)__W);
110}
111
112static __inline__ __m256h __DEFAULT_FN_ATTRS256
113_mm256_maskz_add_ph(__mmask16 __U, __m256h __A, __m256h __B) {
114 return (__m256h)__builtin_ia32_selectph_256(
115 __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
116}
117
118static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_ph(__m128h __A,
119 __m128h __B) {
120 return (__m128h)((__v8hf)__A + (__v8hf)__B);
121}
122
123static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_ph(__m128h __W,
124 __mmask8 __U,
125 __m128h __A,
126 __m128h __B) {
127 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B),
128 (__v8hf)__W);
129}
130
131static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_ph(__mmask8 __U,
132 __m128h __A,
133 __m128h __B) {
134 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B),
135 (__v8hf)_mm_setzero_ph());
136}
137
138static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_sub_ph(__m256h __A,
139 __m256h __B) {
140 return (__m256h)((__v16hf)__A - (__v16hf)__B);
141}
142
143static __inline__ __m256h __DEFAULT_FN_ATTRS256
144_mm256_mask_sub_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
145 return (__m256h)__builtin_ia32_selectph_256(
146 __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)__W);
147}
148
149static __inline__ __m256h __DEFAULT_FN_ATTRS256
150_mm256_maskz_sub_ph(__mmask16 __U, __m256h __A, __m256h __B) {
151 return (__m256h)__builtin_ia32_selectph_256(
152 __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
153}
154
155static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_ph(__m128h __A,
156 __m128h __B) {
157 return (__m128h)((__v8hf)__A - (__v8hf)__B);
158}
159
160static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_ph(__m128h __W,
161 __mmask8 __U,
162 __m128h __A,
163 __m128h __B) {
164 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B),
165 (__v8hf)__W);
166}
167
168static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ph(__mmask8 __U,
169 __m128h __A,
170 __m128h __B) {
171 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B),
172 (__v8hf)_mm_setzero_ph());
173}
174
175static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mul_ph(__m256h __A,
176 __m256h __B) {
177 return (__m256h)((__v16hf)__A * (__v16hf)__B);
178}
179
180static __inline__ __m256h __DEFAULT_FN_ATTRS256
181_mm256_mask_mul_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
182 return (__m256h)__builtin_ia32_selectph_256(
183 __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)__W);
184}
185
186static __inline__ __m256h __DEFAULT_FN_ATTRS256
187_mm256_maskz_mul_ph(__mmask16 __U, __m256h __A, __m256h __B) {
188 return (__m256h)__builtin_ia32_selectph_256(
189 __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
190}
191
192static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_ph(__m128h __A,
193 __m128h __B) {
194 return (__m128h)((__v8hf)__A * (__v8hf)__B);
195}
196
197static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_ph(__m128h __W,
198 __mmask8 __U,
199 __m128h __A,
200 __m128h __B) {
201 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B),
202 (__v8hf)__W);
203}
204
205static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ph(__mmask8 __U,
206 __m128h __A,
207 __m128h __B) {
208 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B),
209 (__v8hf)_mm_setzero_ph());
210}
211
212static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_div_ph(__m256h __A,
213 __m256h __B) {
214 return (__m256h)((__v16hf)__A / (__v16hf)__B);
215}
216
217static __inline__ __m256h __DEFAULT_FN_ATTRS256
218_mm256_mask_div_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
219 return (__m256h)__builtin_ia32_selectph_256(
220 __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)__W);
221}
222
223static __inline__ __m256h __DEFAULT_FN_ATTRS256
224_mm256_maskz_div_ph(__mmask16 __U, __m256h __A, __m256h __B) {
225 return (__m256h)__builtin_ia32_selectph_256(
226 __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
227}
228
229static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_ph(__m128h __A,
230 __m128h __B) {
231 return (__m128h)((__v8hf)__A / (__v8hf)__B);
232}
233
234static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_ph(__m128h __W,
235 __mmask8 __U,
236 __m128h __A,
237 __m128h __B) {
238 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B),
239 (__v8hf)__W);
240}
241
242static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_ph(__mmask8 __U,
243 __m128h __A,
244 __m128h __B) {
245 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B),
246 (__v8hf)_mm_setzero_ph());
247}
248
249static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_min_ph(__m256h __A,
250 __m256h __B) {
251 return (__m256h)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B);
252}
253
254static __inline__ __m256h __DEFAULT_FN_ATTRS256
255_mm256_mask_min_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
256 return (__m256h)__builtin_ia32_selectph_256(
257 (__mmask16)__U,
258 (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B),
259 (__v16hf)__W);
260}
261
262static __inline__ __m256h __DEFAULT_FN_ATTRS256
263_mm256_maskz_min_ph(__mmask16 __U, __m256h __A, __m256h __B) {
264 return (__m256h)__builtin_ia32_selectph_256(
265 (__mmask16)__U,
266 (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B),
267 (__v16hf)_mm256_setzero_ph());
268}
269
270static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_ph(__m128h __A,
271 __m128h __B) {
272 return (__m128h)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B);
273}
274
275static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_ph(__m128h __W,
276 __mmask8 __U,
277 __m128h __A,
278 __m128h __B) {
279 return (__m128h)__builtin_ia32_selectph_128(
280 (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
281 (__v8hf)__W);
282}
283
284static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_ph(__mmask8 __U,
285 __m128h __A,
286 __m128h __B) {
287 return (__m128h)__builtin_ia32_selectph_128(
288 (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
289 (__v8hf)_mm_setzero_ph());
290}
291
292static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_max_ph(__m256h __A,
293 __m256h __B) {
294 return (__m256h)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B);
295}
296
297static __inline__ __m256h __DEFAULT_FN_ATTRS256
298_mm256_mask_max_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
299 return (__m256h)__builtin_ia32_selectph_256(
300 (__mmask16)__U,
301 (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B),
302 (__v16hf)__W);
303}
304
305static __inline__ __m256h __DEFAULT_FN_ATTRS256
306_mm256_maskz_max_ph(__mmask16 __U, __m256h __A, __m256h __B) {
307 return (__m256h)__builtin_ia32_selectph_256(
308 (__mmask16)__U,
309 (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B),
310 (__v16hf)_mm256_setzero_ph());
311}
312
313static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_ph(__m128h __A,
314 __m128h __B) {
315 return (__m128h)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B);
316}
317
318static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_ph(__m128h __W,
319 __mmask8 __U,
320 __m128h __A,
321 __m128h __B) {
322 return (__m128h)__builtin_ia32_selectph_128(
323 (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
324 (__v8hf)__W);
325}
326
327static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_ph(__mmask8 __U,
328 __m128h __A,
329 __m128h __B) {
330 return (__m128h)__builtin_ia32_selectph_128(
331 (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
332 (__v8hf)_mm_setzero_ph());
333}
334
335static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
336_mm256_abs_ph(__m256h __A) {
337 return (__m256h)_mm256_and_epi32(_mm256_set1_epi32(0x7FFF7FFF), (__m256i)__A);
338}
339
340static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
341_mm_abs_ph(__m128h __A) {
342 return (__m128h)_mm_and_epi32(_mm_set1_epi32(0x7FFF7FFF), (__m128i)__A);
343}
344
345static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_conj_pch(__m256h __A) {
346 return (__m256h)_mm256_xor_ps((__m256)__A, _mm256_set1_ps(-0.0f));
347}
348
349static __inline__ __m256h __DEFAULT_FN_ATTRS256
350_mm256_mask_conj_pch(__m256h __W, __mmask8 __U, __m256h __A) {
351 return (__m256h)__builtin_ia32_selectps_256(
352 (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)__W);
353}
354
355static __inline__ __m256h __DEFAULT_FN_ATTRS256
356_mm256_maskz_conj_pch(__mmask8 __U, __m256h __A) {
357 return (__m256h)__builtin_ia32_selectps_256(
358 (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)_mm256_setzero_ps());
359}
360
361static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_conj_pch(__m128h __A) {
362 return (__m128h)_mm_xor_ps((__m128)__A, _mm_set1_ps(-0.0f));
363}
364
365static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_conj_pch(__m128h __W,
366 __mmask8 __U,
367 __m128h __A) {
368 return (__m128h)__builtin_ia32_selectps_128(
369 (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)__W);
370}
371
372static __inline__ __m128h __DEFAULT_FN_ATTRS128
373_mm_maskz_conj_pch(__mmask8 __U, __m128h __A) {
374 return (__m128h)__builtin_ia32_selectps_128(
375 (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)_mm_setzero_ps());
376}
377
378#define _mm256_cmp_ph_mask(a, b, p) \
379 ((__mmask16)__builtin_ia32_cmpph256_mask( \
380 (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)-1))
381
382#define _mm256_mask_cmp_ph_mask(m, a, b, p) \
383 ((__mmask16)__builtin_ia32_cmpph256_mask( \
384 (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)(m)))
385
386#define _mm_cmp_ph_mask(a, b, p) \
387 ((__mmask8)__builtin_ia32_cmpph128_mask( \
388 (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)-1))
389
390#define _mm_mask_cmp_ph_mask(m, a, b, p) \
391 ((__mmask8)__builtin_ia32_cmpph128_mask( \
392 (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)(m)))
393
394static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rcp_ph(__m256h __A) {
395 return (__m256h)__builtin_ia32_rcpph256_mask(
396 (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1);
397}
398
399static __inline__ __m256h __DEFAULT_FN_ATTRS256
400_mm256_mask_rcp_ph(__m256h __W, __mmask16 __U, __m256h __A) {
401 return (__m256h)__builtin_ia32_rcpph256_mask((__v16hf)__A, (__v16hf)__W,
402 (__mmask16)__U);
403}
404
405static __inline__ __m256h __DEFAULT_FN_ATTRS256
406_mm256_maskz_rcp_ph(__mmask16 __U, __m256h __A) {
407 return (__m256h)__builtin_ia32_rcpph256_mask(
408 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
409}
410
411static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_ph(__m128h __A) {
412 return (__m128h)__builtin_ia32_rcpph128_mask(
413 (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
414}
415
416static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rcp_ph(__m128h __W,
417 __mmask8 __U,
418 __m128h __A) {
419 return (__m128h)__builtin_ia32_rcpph128_mask((__v8hf)__A, (__v8hf)__W,
420 (__mmask8)__U);
421}
422
423static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rcp_ph(__mmask8 __U,
424 __m128h __A) {
425 return (__m128h)__builtin_ia32_rcpph128_mask(
426 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
427}
428
429static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rsqrt_ph(__m256h __A) {
430 return (__m256h)__builtin_ia32_rsqrtph256_mask(
431 (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1);
432}
433
434static __inline__ __m256h __DEFAULT_FN_ATTRS256
435_mm256_mask_rsqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) {
436 return (__m256h)__builtin_ia32_rsqrtph256_mask((__v16hf)__A, (__v16hf)__W,
437 (__mmask16)__U);
438}
439
440static __inline__ __m256h __DEFAULT_FN_ATTRS256
441_mm256_maskz_rsqrt_ph(__mmask16 __U, __m256h __A) {
442 return (__m256h)__builtin_ia32_rsqrtph256_mask(
443 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
444}
445
446static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_ph(__m128h __A) {
447 return (__m128h)__builtin_ia32_rsqrtph128_mask(
448 (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
449}
450
451static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt_ph(__m128h __W,
452 __mmask8 __U,
453 __m128h __A) {
454 return (__m128h)__builtin_ia32_rsqrtph128_mask((__v8hf)__A, (__v8hf)__W,
455 (__mmask8)__U);
456}
457
458static __inline__ __m128h __DEFAULT_FN_ATTRS128
459_mm_maskz_rsqrt_ph(__mmask8 __U, __m128h __A) {
460 return (__m128h)__builtin_ia32_rsqrtph128_mask(
461 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
462}
463
464static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_ph(__m128h __A) {
465 return (__m128h)__builtin_ia32_getexpph128_mask(
466 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
467}
468
469static __inline__ __m128h __DEFAULT_FN_ATTRS128
470_mm_mask_getexp_ph(__m128h __W, __mmask8 __U, __m128h __A) {
471 return (__m128h)__builtin_ia32_getexpph128_mask((__v8hf)__A, (__v8hf)__W,
472 (__mmask8)__U);
473}
474
475static __inline__ __m128h __DEFAULT_FN_ATTRS128
476_mm_maskz_getexp_ph(__mmask8 __U, __m128h __A) {
477 return (__m128h)__builtin_ia32_getexpph128_mask(
478 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
479}
480
481static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_getexp_ph(__m256h __A) {
482 return (__m256h)__builtin_ia32_getexpph256_mask(
483 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1);
484}
485
486static __inline__ __m256h __DEFAULT_FN_ATTRS256
487_mm256_mask_getexp_ph(__m256h __W, __mmask16 __U, __m256h __A) {
488 return (__m256h)__builtin_ia32_getexpph256_mask((__v16hf)__A, (__v16hf)__W,
489 (__mmask16)__U);
490}
491
492static __inline__ __m256h __DEFAULT_FN_ATTRS256
493_mm256_maskz_getexp_ph(__mmask16 __U, __m256h __A) {
494 return (__m256h)__builtin_ia32_getexpph256_mask(
495 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
496}
497
498#define _mm_getmant_ph(A, B, C) \
499 ((__m128h)__builtin_ia32_getmantph128_mask( \
500 (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \
501 (__mmask8)-1))
502
503#define _mm_mask_getmant_ph(W, U, A, B, C) \
504 ((__m128h)__builtin_ia32_getmantph128_mask( \
505 (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)(__m128h)(W), \
506 (__mmask8)(U)))
507
508#define _mm_maskz_getmant_ph(U, A, B, C) \
509 ((__m128h)__builtin_ia32_getmantph128_mask( \
510 (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \
511 (__mmask8)(U)))
512
513#define _mm256_getmant_ph(A, B, C) \
514 ((__m256h)__builtin_ia32_getmantph256_mask( \
515 (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \
516 (__v16hf)_mm256_setzero_ph(), (__mmask16)-1))
517
518#define _mm256_mask_getmant_ph(W, U, A, B, C) \
519 ((__m256h)__builtin_ia32_getmantph256_mask( \
520 (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), (__v16hf)(__m256h)(W), \
521 (__mmask16)(U)))
522
523#define _mm256_maskz_getmant_ph(U, A, B, C) \
524 ((__m256h)__builtin_ia32_getmantph256_mask( \
525 (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \
526 (__v16hf)_mm256_setzero_ph(), (__mmask16)(U)))
527
528static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_ph(__m128h __A,
529 __m128h __B) {
530 return (__m128h)__builtin_ia32_scalefph128_mask(
531 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
532}
533
534static __inline__ __m128h __DEFAULT_FN_ATTRS128
535_mm_mask_scalef_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
536 return (__m128h)__builtin_ia32_scalefph128_mask((__v8hf)__A, (__v8hf)__B,
537 (__v8hf)__W, (__mmask8)__U);
538}
539
540static __inline__ __m128h __DEFAULT_FN_ATTRS128
541_mm_maskz_scalef_ph(__mmask8 __U, __m128h __A, __m128h __B) {
542 return (__m128h)__builtin_ia32_scalefph128_mask(
543 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
544}
545
546static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_scalef_ph(__m256h __A,
547 __m256h __B) {
548 return (__m256h)__builtin_ia32_scalefph256_mask(
549 (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1);
550}
551
552static __inline__ __m256h __DEFAULT_FN_ATTRS256
553_mm256_mask_scalef_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
554 return (__m256h)__builtin_ia32_scalefph256_mask((__v16hf)__A, (__v16hf)__B,
555 (__v16hf)__W, (__mmask16)__U);
556}
557
558static __inline__ __m256h __DEFAULT_FN_ATTRS256
559_mm256_maskz_scalef_ph(__mmask16 __U, __m256h __A, __m256h __B) {
560 return (__m256h)__builtin_ia32_scalefph256_mask(
561 (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
562}
563
564#define _mm_roundscale_ph(A, imm) \
565 ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
566 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \
567 (__mmask8)-1))
568
569#define _mm_mask_roundscale_ph(W, U, A, imm) \
570 ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
571 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U)))
572
573#define _mm_maskz_roundscale_ph(U, A, imm) \
574 ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
575 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \
576 (__mmask8)(U)))
577
578#define _mm256_roundscale_ph(A, imm) \
579 ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
580 (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \
581 (__mmask16)-1))
582
583#define _mm256_mask_roundscale_ph(W, U, A, imm) \
584 ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
585 (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)(__m256h)(W), \
586 (__mmask16)(U)))
587
588#define _mm256_maskz_roundscale_ph(U, A, imm) \
589 ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
590 (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \
591 (__mmask16)(U)))
592
593#define _mm_reduce_ph(A, imm) \
594 ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \
595 (__v8hf)_mm_setzero_ph(), \
596 (__mmask8)-1))
597
598#define _mm_mask_reduce_ph(W, U, A, imm) \
599 ((__m128h)__builtin_ia32_reduceph128_mask( \
600 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U)))
601
602#define _mm_maskz_reduce_ph(U, A, imm) \
603 ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \
604 (__v8hf)_mm_setzero_ph(), \
605 (__mmask8)(U)))
606
607#define _mm256_reduce_ph(A, imm) \
608 ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
609 (__v16hf)_mm256_setzero_ph(), \
610 (__mmask16)-1))
611
612#define _mm256_mask_reduce_ph(W, U, A, imm) \
613 ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
614 (__v16hf)(__m256h)(W), \
615 (__mmask16)(U)))
616
617#define _mm256_maskz_reduce_ph(U, A, imm) \
618 ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
619 (__v16hf)_mm256_setzero_ph(), \
620 (__mmask16)(U)))
621
622static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_ph(__m128h __a) {
623 return __builtin_ia32_sqrtph((__v8hf)__a);
624}
625
626static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ph(__m128h __W,
627 __mmask8 __U,
628 __m128h __A) {
629 return (__m128h)__builtin_ia32_selectph_128(
630 (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)__W);
631}
632
633static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ph(__mmask8 __U,
634 __m128h __A) {
635 return (__m128h)__builtin_ia32_selectph_128(
636 (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)_mm_setzero_ph());
637}
638
639static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_sqrt_ph(__m256h __a) {
640 return (__m256h)__builtin_ia32_sqrtph256((__v16hf)__a);
641}
642
643static __inline__ __m256h __DEFAULT_FN_ATTRS256
644_mm256_mask_sqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) {
645 return (__m256h)__builtin_ia32_selectph_256(
646 (__mmask16)__U, (__v16hf)_mm256_sqrt_ph(__A), (__v16hf)__W);
647}
648
649static __inline__ __m256h __DEFAULT_FN_ATTRS256
650_mm256_maskz_sqrt_ph(__mmask16 __U, __m256h __A) {
651 return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
652 (__v16hf)_mm256_sqrt_ph(__A),
653 (__v16hf)_mm256_setzero_ph());
654}
655
656#define _mm_mask_fpclass_ph_mask(U, A, imm) \
657 ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \
658 (int)(imm), (__mmask8)(U)))
659
660#define _mm_fpclass_ph_mask(A, imm) \
661 ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \
662 (int)(imm), (__mmask8)-1))
663
664#define _mm256_mask_fpclass_ph_mask(U, A, imm) \
665 ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \
666 (int)(imm), (__mmask16)(U)))
667
668#define _mm256_fpclass_ph_mask(A, imm) \
669 ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \
670 (int)(imm), (__mmask16)-1))
671
672static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtpd_ph(__m128d __A) {
673 return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
674 (__v2df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
675}
676
677static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ph(__m128h __W,
678 __mmask8 __U,
679 __m128d __A) {
680 return (__m128h)__builtin_ia32_vcvtpd2ph128_mask((__v2df)__A, (__v8hf)__W,
681 (__mmask8)__U);
682}
683
684static __inline__ __m128h __DEFAULT_FN_ATTRS128
685_mm_maskz_cvtpd_ph(__mmask8 __U, __m128d __A) {
686 return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
687 (__v2df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
688}
689
690static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtpd_ph(__m256d __A) {
691 return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
692 (__v4df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
693}
694
695static __inline__ __m128h __DEFAULT_FN_ATTRS256
696_mm256_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m256d __A) {
697 return (__m128h)__builtin_ia32_vcvtpd2ph256_mask((__v4df)__A, (__v8hf)__W,
698 (__mmask8)__U);
699}
700
701static __inline__ __m128h __DEFAULT_FN_ATTRS256
702_mm256_maskz_cvtpd_ph(__mmask8 __U, __m256d __A) {
703 return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
704 (__v4df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
705}
706
707static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtph_pd(__m128h __A) {
708 return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
709 (__v8hf)__A, (__v2df)_mm_undefined_pd(), (__mmask8)-1);
710}
711
712static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_pd(__m128d __W,
713 __mmask8 __U,
714 __m128h __A) {
715 return (__m128d)__builtin_ia32_vcvtph2pd128_mask((__v8hf)__A, (__v2df)__W,
716 (__mmask8)__U);
717}
718
719static __inline__ __m128d __DEFAULT_FN_ATTRS128
720_mm_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
721 return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
722 (__v8hf)__A, (__v2df)_mm_setzero_pd(), (__mmask8)__U);
723}
724
725static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtph_pd(__m128h __A) {
726 return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
727 (__v8hf)__A, (__v4df)_mm256_undefined_pd(), (__mmask8)-1);
728}
729
730static __inline__ __m256d __DEFAULT_FN_ATTRS256
731_mm256_mask_cvtph_pd(__m256d __W, __mmask8 __U, __m128h __A) {
732 return (__m256d)__builtin_ia32_vcvtph2pd256_mask((__v8hf)__A, (__v4df)__W,
733 (__mmask8)__U);
734}
735
736static __inline__ __m256d __DEFAULT_FN_ATTRS256
737_mm256_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
738 return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
739 (__v8hf)__A, (__v4df)_mm256_setzero_pd(), (__mmask8)__U);
740}
741
742static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi16(__m128h __A) {
743 return (__m128i)__builtin_ia32_vcvtph2w128_mask(
744 (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
745}
746
747static __inline__ __m128i __DEFAULT_FN_ATTRS128
748_mm_mask_cvtph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
749 return (__m128i)__builtin_ia32_vcvtph2w128_mask((__v8hf)__A, (__v8hi)__W,
750 (__mmask8)__U);
751}
752
753static __inline__ __m128i __DEFAULT_FN_ATTRS128
754_mm_maskz_cvtph_epi16(__mmask8 __U, __m128h __A) {
755 return (__m128i)__builtin_ia32_vcvtph2w128_mask(
756 (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
757}
758
759static __inline__ __m256i __DEFAULT_FN_ATTRS256
760_mm256_cvtph_epi16(__m256h __A) {
761 return (__m256i)__builtin_ia32_vcvtph2w256_mask(
762 (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
763}
764
765static __inline__ __m256i __DEFAULT_FN_ATTRS256
766_mm256_mask_cvtph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
767 return (__m256i)__builtin_ia32_vcvtph2w256_mask((__v16hf)__A, (__v16hi)__W,
768 (__mmask16)__U);
769}
770
771static __inline__ __m256i __DEFAULT_FN_ATTRS256
772_mm256_maskz_cvtph_epi16(__mmask16 __U, __m256h __A) {
773 return (__m256i)__builtin_ia32_vcvtph2w256_mask(
774 (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
775}
776
777static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi16(__m128h __A) {
778 return (__m128i)__builtin_ia32_vcvttph2w128_mask(
779 (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
780}
781
782static __inline__ __m128i __DEFAULT_FN_ATTRS128
783_mm_mask_cvttph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
784 return (__m128i)__builtin_ia32_vcvttph2w128_mask((__v8hf)__A, (__v8hi)__W,
785 (__mmask8)__U);
786}
787
788static __inline__ __m128i __DEFAULT_FN_ATTRS128
789_mm_maskz_cvttph_epi16(__mmask8 __U, __m128h __A) {
790 return (__m128i)__builtin_ia32_vcvttph2w128_mask(
791 (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
792}
793
794static __inline__ __m256i __DEFAULT_FN_ATTRS256
795_mm256_cvttph_epi16(__m256h __A) {
796 return (__m256i)__builtin_ia32_vcvttph2w256_mask(
797 (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
798}
799
800static __inline__ __m256i __DEFAULT_FN_ATTRS256
801_mm256_mask_cvttph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
802 return (__m256i)__builtin_ia32_vcvttph2w256_mask((__v16hf)__A, (__v16hi)__W,
803 (__mmask16)__U);
804}
805
806static __inline__ __m256i __DEFAULT_FN_ATTRS256
807_mm256_maskz_cvttph_epi16(__mmask16 __U, __m256h __A) {
808 return (__m256i)__builtin_ia32_vcvttph2w256_mask(
809 (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
810}
811
812static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
813_mm_cvtepi16_ph(__m128i __A) {
814 return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf);
815}
816
817static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
818_mm_mask_cvtepi16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
819 return (__m128h)__builtin_ia32_selectph_128(
820 (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W);
821}
822
823static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
824_mm_maskz_cvtepi16_ph(__mmask8 __U, __m128i __A) {
825 return (__m128h)__builtin_ia32_selectph_128(
826 (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph());
827}
828
829static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
830_mm256_cvtepi16_ph(__m256i __A) {
831 return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf);
832}
833
834static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
835_mm256_mask_cvtepi16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
836 return (__m256h)__builtin_ia32_selectph_256(
837 (__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W);
838}
839
840static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
841_mm256_maskz_cvtepi16_ph(__mmask16 __U, __m256i __A) {
842 return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
843 (__v16hf)_mm256_cvtepi16_ph(__A),
844 (__v16hf)_mm256_setzero_ph());
845}
846
847static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu16(__m128h __A) {
848 return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
849 (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
850}
851
852static __inline__ __m128i __DEFAULT_FN_ATTRS128
853_mm_mask_cvtph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
854 return (__m128i)__builtin_ia32_vcvtph2uw128_mask((__v8hf)__A, (__v8hu)__W,
855 (__mmask8)__U);
856}
857
858static __inline__ __m128i __DEFAULT_FN_ATTRS128
859_mm_maskz_cvtph_epu16(__mmask8 __U, __m128h __A) {
860 return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
861 (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
862}
863
864static __inline__ __m256i __DEFAULT_FN_ATTRS256
865_mm256_cvtph_epu16(__m256h __A) {
866 return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
867 (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
868}
869
870static __inline__ __m256i __DEFAULT_FN_ATTRS256
871_mm256_mask_cvtph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
872 return (__m256i)__builtin_ia32_vcvtph2uw256_mask((__v16hf)__A, (__v16hu)__W,
873 (__mmask16)__U);
874}
875
876static __inline__ __m256i __DEFAULT_FN_ATTRS256
877_mm256_maskz_cvtph_epu16(__mmask16 __U, __m256h __A) {
878 return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
879 (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
880}
881
882static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu16(__m128h __A) {
883 return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
884 (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
885}
886
887static __inline__ __m128i __DEFAULT_FN_ATTRS128
888_mm_mask_cvttph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
889 return (__m128i)__builtin_ia32_vcvttph2uw128_mask((__v8hf)__A, (__v8hu)__W,
890 (__mmask8)__U);
891}
892
893static __inline__ __m128i __DEFAULT_FN_ATTRS128
894_mm_maskz_cvttph_epu16(__mmask8 __U, __m128h __A) {
895 return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
896 (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
897}
898
899static __inline__ __m256i __DEFAULT_FN_ATTRS256
900_mm256_cvttph_epu16(__m256h __A) {
901 return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
902 (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
903}
904
905static __inline__ __m256i __DEFAULT_FN_ATTRS256
906_mm256_mask_cvttph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
907 return (__m256i)__builtin_ia32_vcvttph2uw256_mask((__v16hf)__A, (__v16hu)__W,
908 (__mmask16)__U);
909}
910
911static __inline__ __m256i __DEFAULT_FN_ATTRS256
912_mm256_maskz_cvttph_epu16(__mmask16 __U, __m256h __A) {
913 return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
914 (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
915}
916
917static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
918_mm_cvtepu16_ph(__m128i __A) {
919 return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf);
920}
921
922static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
923_mm_mask_cvtepu16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
924 return (__m128h)__builtin_ia32_selectph_128(
925 (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W);
926}
927
928static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
929_mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A) {
930 return (__m128h)__builtin_ia32_selectph_128(
931 (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph());
932}
933
934static __inline__ __m256h __DEFAULT_FN_ATTRS256
935_mm256_cvtepu16_ph(__m256i __A) {
936 return (__m256h) __builtin_convertvector((__v16hu)__A, __v16hf);
937}
938
939static __inline__ __m256h __DEFAULT_FN_ATTRS256
940_mm256_mask_cvtepu16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
941 return (__m256h)__builtin_ia32_selectph_256(
942 (__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)__W);
943}
944
945static __inline__ __m256h __DEFAULT_FN_ATTRS256
946_mm256_maskz_cvtepu16_ph(__mmask16 __U, __m256i __A) {
947 return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
948 (__v16hf)_mm256_cvtepu16_ph(__A),
949 (__v16hf)_mm256_setzero_ph());
950}
951
952static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi32(__m128h __A) {
953 return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
954 (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
955}
956
957static __inline__ __m128i __DEFAULT_FN_ATTRS128
958_mm_mask_cvtph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
959 return (__m128i)__builtin_ia32_vcvtph2dq128_mask((__v8hf)__A, (__v4si)__W,
960 (__mmask8)__U);
961}
962
963static __inline__ __m128i __DEFAULT_FN_ATTRS128
964_mm_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) {
965 return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
966 (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
967}
968
969static __inline__ __m256i __DEFAULT_FN_ATTRS256
970_mm256_cvtph_epi32(__m128h __A) {
971 return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
972 (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
973}
974
975static __inline__ __m256i __DEFAULT_FN_ATTRS256
976_mm256_mask_cvtph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
977 return (__m256i)__builtin_ia32_vcvtph2dq256_mask((__v8hf)__A, (__v8si)__W,
978 (__mmask8)__U);
979}
980
981static __inline__ __m256i __DEFAULT_FN_ATTRS256
982_mm256_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) {
983 return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
984 (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
985}
986
987static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu32(__m128h __A) {
988 return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
989 (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
990}
991
992static __inline__ __m128i __DEFAULT_FN_ATTRS128
993_mm_mask_cvtph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
994 return (__m128i)__builtin_ia32_vcvtph2udq128_mask((__v8hf)__A, (__v4su)__W,
995 (__mmask8)__U);
996}
997
998static __inline__ __m128i __DEFAULT_FN_ATTRS128
999_mm_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) {
1000 return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
1001 (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
1002}
1003
1004static __inline__ __m256i __DEFAULT_FN_ATTRS256
1005_mm256_cvtph_epu32(__m128h __A) {
1006 return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
1007 (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
1008}
1009
1010static __inline__ __m256i __DEFAULT_FN_ATTRS256
1011_mm256_mask_cvtph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
1012 return (__m256i)__builtin_ia32_vcvtph2udq256_mask((__v8hf)__A, (__v8su)__W,
1013 (__mmask8)__U);
1014}
1015
1016static __inline__ __m256i __DEFAULT_FN_ATTRS256
1017_mm256_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) {
1018 return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
1019 (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
1020}
1021
1022static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi32_ph(__m128i __A) {
1023 return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
1024 (__v4si)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1025}
1026
1027static __inline__ __m128h __DEFAULT_FN_ATTRS128
1028_mm_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1029 return (__m128h)__builtin_ia32_vcvtdq2ph128_mask((__v4si)__A, (__v8hf)__W,
1030 (__mmask8)__U);
1031}
1032
1033static __inline__ __m128h __DEFAULT_FN_ATTRS128
1034_mm_maskz_cvtepi32_ph(__mmask8 __U, __m128i __A) {
1035 return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
1036 (__v4si)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1037}
1038
1039static __inline__ __m128h __DEFAULT_FN_ATTRS256
1040_mm256_cvtepi32_ph(__m256i __A) {
1041 return (__m128h) __builtin_convertvector((__v8si)__A, __v8hf);
1042}
1043
1044static __inline__ __m128h __DEFAULT_FN_ATTRS256
1045_mm256_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1046 return (__m128h)__builtin_ia32_selectph_128(
1047 (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)__W);
1048}
1049
1050static __inline__ __m128h __DEFAULT_FN_ATTRS256
1051_mm256_maskz_cvtepi32_ph(__mmask8 __U, __m256i __A) {
1052 return (__m128h)__builtin_ia32_selectph_128(
1053 (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)_mm_setzero_ph());
1054}
1055
1056static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ph(__m128i __A) {
1057 return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
1058 (__v4su)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1059}
1060
1061static __inline__ __m128h __DEFAULT_FN_ATTRS128
1062_mm_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1063 return (__m128h)__builtin_ia32_vcvtudq2ph128_mask((__v4su)__A, (__v8hf)__W,
1064 (__mmask8)__U);
1065}
1066
1067static __inline__ __m128h __DEFAULT_FN_ATTRS128
1068_mm_maskz_cvtepu32_ph(__mmask8 __U, __m128i __A) {
1069 return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
1070 (__v4su)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1071}
1072
1073static __inline__ __m128h __DEFAULT_FN_ATTRS256
1074_mm256_cvtepu32_ph(__m256i __A) {
1075 return (__m128h) __builtin_convertvector((__v8su)__A, __v8hf);
1076}
1077
1078static __inline__ __m128h __DEFAULT_FN_ATTRS256
1079_mm256_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1080 return (__m128h)__builtin_ia32_selectph_128(
1081 (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)__W);
1082}
1083
1084static __inline__ __m128h __DEFAULT_FN_ATTRS256
1085_mm256_maskz_cvtepu32_ph(__mmask8 __U, __m256i __A) {
1086 return (__m128h)__builtin_ia32_selectph_128(
1087 (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)_mm_setzero_ph());
1088}
1089
1090static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi32(__m128h __A) {
1091 return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
1092 (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
1093}
1094
1095static __inline__ __m128i __DEFAULT_FN_ATTRS128
1096_mm_mask_cvttph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
1097 return (__m128i)__builtin_ia32_vcvttph2dq128_mask((__v8hf)__A, (__v4si)__W,
1098 (__mmask8)__U);
1099}
1100
1101static __inline__ __m128i __DEFAULT_FN_ATTRS128
1102_mm_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) {
1103 return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
1104 (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
1105}
1106
1107static __inline__ __m256i __DEFAULT_FN_ATTRS256
1108_mm256_cvttph_epi32(__m128h __A) {
1109 return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
1110 (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
1111}
1112
1113static __inline__ __m256i __DEFAULT_FN_ATTRS256
1114_mm256_mask_cvttph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
1115 return (__m256i)__builtin_ia32_vcvttph2dq256_mask((__v8hf)__A, (__v8si)__W,
1116 (__mmask8)__U);
1117}
1118
1119static __inline__ __m256i __DEFAULT_FN_ATTRS256
1120_mm256_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) {
1121 return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
1122 (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
1123}
1124
1125static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu32(__m128h __A) {
1126 return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
1127 (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
1128}
1129
1130static __inline__ __m128i __DEFAULT_FN_ATTRS128
1131_mm_mask_cvttph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
1132 return (__m128i)__builtin_ia32_vcvttph2udq128_mask((__v8hf)__A, (__v4su)__W,
1133 (__mmask8)__U);
1134}
1135
1136static __inline__ __m128i __DEFAULT_FN_ATTRS128
1137_mm_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) {
1138 return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
1139 (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
1140}
1141
1142static __inline__ __m256i __DEFAULT_FN_ATTRS256
1143_mm256_cvttph_epu32(__m128h __A) {
1144 return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
1145 (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
1146}
1147
1148static __inline__ __m256i __DEFAULT_FN_ATTRS256
1149_mm256_mask_cvttph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
1150 return (__m256i)__builtin_ia32_vcvttph2udq256_mask((__v8hf)__A, (__v8su)__W,
1151 (__mmask8)__U);
1152}
1153
1154static __inline__ __m256i __DEFAULT_FN_ATTRS256
1155_mm256_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) {
1156 return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
1157 (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
1158}
1159
1160static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ph(__m128i __A) {
1161 return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
1162 (__v2di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1163}
1164
1165static __inline__ __m128h __DEFAULT_FN_ATTRS128
1166_mm_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1167 return (__m128h)__builtin_ia32_vcvtqq2ph128_mask((__v2di)__A, (__v8hf)__W,
1168 (__mmask8)__U);
1169}
1170
1171static __inline__ __m128h __DEFAULT_FN_ATTRS128
1172_mm_maskz_cvtepi64_ph(__mmask8 __U, __m128i __A) {
1173 return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
1174 (__v2di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1175}
1176
1177static __inline__ __m128h __DEFAULT_FN_ATTRS256
1178_mm256_cvtepi64_ph(__m256i __A) {
1179 return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
1180 (__v4di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1181}
1182
1183static __inline__ __m128h __DEFAULT_FN_ATTRS256
1184_mm256_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1185 return (__m128h)__builtin_ia32_vcvtqq2ph256_mask((__v4di)__A, (__v8hf)__W,
1186 (__mmask8)__U);
1187}
1188
1189static __inline__ __m128h __DEFAULT_FN_ATTRS256
1190_mm256_maskz_cvtepi64_ph(__mmask8 __U, __m256i __A) {
1191 return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
1192 (__v4di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1193}
1194
1195static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi64(__m128h __A) {
1196 return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
1197 (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
1198}
1199
1200static __inline__ __m128i __DEFAULT_FN_ATTRS128
1201_mm_mask_cvtph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
1202 return (__m128i)__builtin_ia32_vcvtph2qq128_mask((__v8hf)__A, (__v2di)__W,
1203 (__mmask8)__U);
1204}
1205
1206static __inline__ __m128i __DEFAULT_FN_ATTRS128
1207_mm_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
1208 return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
1209 (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
1210}
1211
1212static __inline__ __m256i __DEFAULT_FN_ATTRS256
1213_mm256_cvtph_epi64(__m128h __A) {
1214 return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
1215 (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
1216}
1217
1218static __inline__ __m256i __DEFAULT_FN_ATTRS256
1219_mm256_mask_cvtph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
1220 return (__m256i)__builtin_ia32_vcvtph2qq256_mask((__v8hf)__A, (__v4di)__W,
1221 (__mmask8)__U);
1222}
1223
1224static __inline__ __m256i __DEFAULT_FN_ATTRS256
1225_mm256_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
1226 return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
1227 (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
1228}
1229
1230static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ph(__m128i __A) {
1231 return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
1232 (__v2du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1233}
1234
1235static __inline__ __m128h __DEFAULT_FN_ATTRS128
1236_mm_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1237 return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask((__v2du)__A, (__v8hf)__W,
1238 (__mmask8)__U);
1239}
1240
1241static __inline__ __m128h __DEFAULT_FN_ATTRS128
1242_mm_maskz_cvtepu64_ph(__mmask8 __U, __m128i __A) {
1243 return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
1244 (__v2du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1245}
1246
1247static __inline__ __m128h __DEFAULT_FN_ATTRS256
1248_mm256_cvtepu64_ph(__m256i __A) {
1249 return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
1250 (__v4du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1251}
1252
1253static __inline__ __m128h __DEFAULT_FN_ATTRS256
1254_mm256_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1255 return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask((__v4du)__A, (__v8hf)__W,
1256 (__mmask8)__U);
1257}
1258
1259static __inline__ __m128h __DEFAULT_FN_ATTRS256
1260_mm256_maskz_cvtepu64_ph(__mmask8 __U, __m256i __A) {
1261 return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
1262 (__v4du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1263}
1264
1265static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu64(__m128h __A) {
1266 return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
1267 (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
1268}
1269
1270static __inline__ __m128i __DEFAULT_FN_ATTRS128
1271_mm_mask_cvtph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
1272 return (__m128i)__builtin_ia32_vcvtph2uqq128_mask((__v8hf)__A, (__v2du)__W,
1273 (__mmask8)__U);
1274}
1275
1276static __inline__ __m128i __DEFAULT_FN_ATTRS128
1277_mm_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
1278 return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
1279 (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
1280}
1281
1282static __inline__ __m256i __DEFAULT_FN_ATTRS256
1283_mm256_cvtph_epu64(__m128h __A) {
1284 return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
1285 (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
1286}
1287
1288static __inline__ __m256i __DEFAULT_FN_ATTRS256
1289_mm256_mask_cvtph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
1290 return (__m256i)__builtin_ia32_vcvtph2uqq256_mask((__v8hf)__A, (__v4du)__W,
1291 (__mmask8)__U);
1292}
1293
1294static __inline__ __m256i __DEFAULT_FN_ATTRS256
1295_mm256_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
1296 return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
1297 (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
1298}
1299
1300static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi64(__m128h __A) {
1301 return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
1302 (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
1303}
1304
1305static __inline__ __m128i __DEFAULT_FN_ATTRS128
1306_mm_mask_cvttph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
1307 return (__m128i)__builtin_ia32_vcvttph2qq128_mask((__v8hf)__A, (__v2di)__W,
1308 (__mmask8)__U);
1309}
1310
1311static __inline__ __m128i __DEFAULT_FN_ATTRS128
1312_mm_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
1313 return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
1314 (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
1315}
1316
1317static __inline__ __m256i __DEFAULT_FN_ATTRS256
1318_mm256_cvttph_epi64(__m128h __A) {
1319 return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
1320 (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
1321}
1322
1323static __inline__ __m256i __DEFAULT_FN_ATTRS256
1324_mm256_mask_cvttph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
1325 return (__m256i)__builtin_ia32_vcvttph2qq256_mask((__v8hf)__A, (__v4di)__W,
1326 (__mmask8)__U);
1327}
1328
1329static __inline__ __m256i __DEFAULT_FN_ATTRS256
1330_mm256_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
1331 return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
1332 (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
1333}
1334
1335static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu64(__m128h __A) {
1336 return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
1337 (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
1338}
1339
1340static __inline__ __m128i __DEFAULT_FN_ATTRS128
1341_mm_mask_cvttph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
1342 return (__m128i)__builtin_ia32_vcvttph2uqq128_mask((__v8hf)__A, (__v2du)__W,
1343 (__mmask8)__U);
1344}
1345
1346static __inline__ __m128i __DEFAULT_FN_ATTRS128
1347_mm_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
1348 return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
1349 (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
1350}
1351
1352static __inline__ __m256i __DEFAULT_FN_ATTRS256
1353_mm256_cvttph_epu64(__m128h __A) {
1354 return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
1355 (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
1356}
1357
1358static __inline__ __m256i __DEFAULT_FN_ATTRS256
1359_mm256_mask_cvttph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
1360 return (__m256i)__builtin_ia32_vcvttph2uqq256_mask((__v8hf)__A, (__v4du)__W,
1361 (__mmask8)__U);
1362}
1363
1364static __inline__ __m256i __DEFAULT_FN_ATTRS256
1365_mm256_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
1366 return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
1367 (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
1368}
1369
1370static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtxph_ps(__m128h __A) {
1371 return (__m128)__builtin_ia32_vcvtph2psx128_mask(
1372 (__v8hf)__A, (__v4sf)_mm_undefined_ps(), (__mmask8)-1);
1373}
1374
1375static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtxph_ps(__m128 __W,
1376 __mmask8 __U,
1377 __m128h __A) {
1378 return (__m128)__builtin_ia32_vcvtph2psx128_mask((__v8hf)__A, (__v4sf)__W,
1379 (__mmask8)__U);
1380}
1381
1382static __inline__ __m128 __DEFAULT_FN_ATTRS128
1383_mm_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) {
1384 return (__m128)__builtin_ia32_vcvtph2psx128_mask(
1385 (__v8hf)__A, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);
1386}
1387
1388static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtxph_ps(__m128h __A) {
1389 return (__m256)__builtin_ia32_vcvtph2psx256_mask(
1390 (__v8hf)__A, (__v8sf)_mm256_undefined_ps(), (__mmask8)-1);
1391}
1392
1393static __inline__ __m256 __DEFAULT_FN_ATTRS256
1394_mm256_mask_cvtxph_ps(__m256 __W, __mmask8 __U, __m128h __A) {
1395 return (__m256)__builtin_ia32_vcvtph2psx256_mask((__v8hf)__A, (__v8sf)__W,
1396 (__mmask8)__U);
1397}
1398
1399static __inline__ __m256 __DEFAULT_FN_ATTRS256
1400_mm256_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) {
1401 return (__m256)__builtin_ia32_vcvtph2psx256_mask(
1402 (__v8hf)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)__U);
1403}
1404
1405static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtxps_ph(__m128 __A) {
1406 return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
1407 (__v4sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1408}
1409
1410static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtxps_ph(__m128h __W,
1411 __mmask8 __U,
1412 __m128 __A) {
1413 return (__m128h)__builtin_ia32_vcvtps2phx128_mask((__v4sf)__A, (__v8hf)__W,
1414 (__mmask8)__U);
1415}
1416
1417static __inline__ __m128h __DEFAULT_FN_ATTRS128
1418_mm_maskz_cvtxps_ph(__mmask8 __U, __m128 __A) {
1419 return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
1420 (__v4sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1421}
1422
1423static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtxps_ph(__m256 __A) {
1424 return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
1425 (__v8sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1426}
1427
1428static __inline__ __m128h __DEFAULT_FN_ATTRS256
1429_mm256_mask_cvtxps_ph(__m128h __W, __mmask8 __U, __m256 __A) {
1430 return (__m128h)__builtin_ia32_vcvtps2phx256_mask((__v8sf)__A, (__v8hf)__W,
1431 (__mmask8)__U);
1432}
1433
1434static __inline__ __m128h __DEFAULT_FN_ATTRS256
1435_mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A) {
1436 return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
1437 (__v8sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1438}
1439
1440static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_ph(__m128h __A,
1441 __m128h __B,
1442 __m128h __C) {
1443 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B,
1444 (__v8hf)__C);
1445}
1446
1447static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A,
1448 __mmask8 __U,
1449 __m128h __B,
1450 __m128h __C) {
1451 return (__m128h)__builtin_ia32_selectph_128(
1452 (__mmask8)__U,
1453 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1454 (__v8hf)__A);
1455}
1456
1457static __inline__ __m128h __DEFAULT_FN_ATTRS128
1458_mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1459 return (__m128h)__builtin_ia32_selectph_128(
1460 (__mmask8)__U,
1461 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1462 (__v8hf)__C);
1463}
1464
1465static __inline__ __m128h __DEFAULT_FN_ATTRS128
1466_mm_maskz_fmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1467 return (__m128h)__builtin_ia32_selectph_128(
1468 (__mmask8)__U,
1469 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1470 (__v8hf)_mm_setzero_ph());
1471}
1472
1473static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_ph(__m128h __A,
1474 __m128h __B,
1475 __m128h __C) {
1476 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B,
1477 -(__v8hf)__C);
1478}
1479
1480static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ph(__m128h __A,
1481 __mmask8 __U,
1482 __m128h __B,
1483 __m128h __C) {
1484 return (__m128h)__builtin_ia32_selectph_128(
1485 (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1486 (__v8hf)__A);
1487}
1488
1489static __inline__ __m128h __DEFAULT_FN_ATTRS128
1490_mm_maskz_fmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1491 return (__m128h)__builtin_ia32_selectph_128(
1492 (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1493 (__v8hf)_mm_setzero_ph());
1494}
1495
1496static __inline__ __m128h __DEFAULT_FN_ATTRS128
1497_mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1498 return (__m128h)__builtin_ia32_selectph_128(
1499 (__mmask8)__U,
1500 __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1501 (__v8hf)__C);
1502}
1503
1504static __inline__ __m128h __DEFAULT_FN_ATTRS128
1505_mm_maskz_fnmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1506 return (__m128h)__builtin_ia32_selectph_128(
1507 (__mmask8)__U,
1508 __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1509 (__v8hf)_mm_setzero_ph());
1510}
1511
1512static __inline__ __m128h __DEFAULT_FN_ATTRS128
1513_mm_maskz_fnmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1514 return (__m128h)__builtin_ia32_selectph_128(
1515 (__mmask8)__U,
1516 __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1517 (__v8hf)_mm_setzero_ph());
1518}
1519
1520static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_ph(__m256h __A,
1521 __m256h __B,
1522 __m256h __C) {
1523 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B,
1524 (__v16hf)__C);
1525}
1526
1527static __inline__ __m256h __DEFAULT_FN_ATTRS256
1528_mm256_mask_fmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1529 return (__m256h)__builtin_ia32_selectph_256(
1530 (__mmask16)__U,
1531 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1532 (__v16hf)__A);
1533}
1534
1535static __inline__ __m256h __DEFAULT_FN_ATTRS256
1536_mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1537 return (__m256h)__builtin_ia32_selectph_256(
1538 (__mmask16)__U,
1539 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1540 (__v16hf)__C);
1541}
1542
1543static __inline__ __m256h __DEFAULT_FN_ATTRS256
1544_mm256_maskz_fmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1545 return (__m256h)__builtin_ia32_selectph_256(
1546 (__mmask16)__U,
1547 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1548 (__v16hf)_mm256_setzero_ph());
1549}
1550
1551static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsub_ph(__m256h __A,
1552 __m256h __B,
1553 __m256h __C) {
1554 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B,
1555 -(__v16hf)__C);
1556}
1557
1558static __inline__ __m256h __DEFAULT_FN_ATTRS256
1559_mm256_mask_fmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1560 return (__m256h)__builtin_ia32_selectph_256(
1561 (__mmask16)__U,
1562 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1563 (__v16hf)__A);
1564}
1565
1566static __inline__ __m256h __DEFAULT_FN_ATTRS256
1567_mm256_maskz_fmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1568 return (__m256h)__builtin_ia32_selectph_256(
1569 (__mmask16)__U,
1570 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1571 (__v16hf)_mm256_setzero_ph());
1572}
1573
1574static __inline__ __m256h __DEFAULT_FN_ATTRS256
1575_mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1576 return (__m256h)__builtin_ia32_selectph_256(
1577 (__mmask16)__U,
1578 __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1579 (__v16hf)__C);
1580}
1581
1582static __inline__ __m256h __DEFAULT_FN_ATTRS256
1583_mm256_maskz_fnmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1584 return (__m256h)__builtin_ia32_selectph_256(
1585 (__mmask16)__U,
1586 __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1587 (__v16hf)_mm256_setzero_ph());
1588}
1589
1590static __inline__ __m256h __DEFAULT_FN_ATTRS256
1591_mm256_maskz_fnmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1592 return (__m256h)__builtin_ia32_selectph_256(
1593 (__mmask16)__U,
1594 __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1595 (__v16hf)_mm256_setzero_ph());
1596}
1597
1598static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmaddsub_ph(__m128h __A,
1599 __m128h __B,
1600 __m128h __C) {
1601 return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
1602 (__v8hf)__C);
1603}
1604
1605static __inline__ __m128h __DEFAULT_FN_ATTRS128
1606_mm_mask_fmaddsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1607 return (__m128h)__builtin_ia32_selectph_128(
1608 (__mmask8)__U,
1609 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1610 (__v8hf)__A);
1611}
1612
1613static __inline__ __m128h __DEFAULT_FN_ATTRS128
1614_mm_mask3_fmaddsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1615 return (__m128h)__builtin_ia32_selectph_128(
1616 (__mmask8)__U,
1617 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1618 (__v8hf)__C);
1619}
1620
1621static __inline__ __m128h __DEFAULT_FN_ATTRS128
1622_mm_maskz_fmaddsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1623 return (__m128h)__builtin_ia32_selectph_128(
1624 (__mmask8)__U,
1625 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1626 (__v8hf)_mm_setzero_ph());
1627}
1628
1629static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsubadd_ph(__m128h __A,
1630 __m128h __B,
1631 __m128h __C) {
1632 return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
1633 -(__v8hf)__C);
1634}
1635
1636static __inline__ __m128h __DEFAULT_FN_ATTRS128
1637_mm_mask_fmsubadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1638 return (__m128h)__builtin_ia32_selectph_128(
1639 (__mmask8)__U,
1640 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1641 (__v8hf)__A);
1642}
1643
1644static __inline__ __m128h __DEFAULT_FN_ATTRS128
1645_mm_maskz_fmsubadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1646 return (__m128h)__builtin_ia32_selectph_128(
1647 (__mmask8)__U,
1648 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1649 (__v8hf)_mm_setzero_ph());
1650}
1651
1652static __inline__ __m256h __DEFAULT_FN_ATTRS256
1653_mm256_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C) {
1654 return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
1655 (__v16hf)__C);
1656}
1657
1658static __inline__ __m256h __DEFAULT_FN_ATTRS256
1659_mm256_mask_fmaddsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1660 return (__m256h)__builtin_ia32_selectph_256(
1661 (__mmask16)__U,
1662 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1663 (__v16hf)__A);
1664}
1665
1666static __inline__ __m256h __DEFAULT_FN_ATTRS256
1667_mm256_mask3_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1668 return (__m256h)__builtin_ia32_selectph_256(
1669 (__mmask16)__U,
1670 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1671 (__v16hf)__C);
1672}
1673
1674static __inline__ __m256h __DEFAULT_FN_ATTRS256
1675_mm256_maskz_fmaddsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1676 return (__m256h)__builtin_ia32_selectph_256(
1677 (__mmask16)__U,
1678 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1679 (__v16hf)_mm256_setzero_ph());
1680}
1681
1682static __inline__ __m256h __DEFAULT_FN_ATTRS256
1683_mm256_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C) {
1684 return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
1685 -(__v16hf)__C);
1686}
1687
1688static __inline__ __m256h __DEFAULT_FN_ATTRS256
1689_mm256_mask_fmsubadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1690 return (__m256h)__builtin_ia32_selectph_256(
1691 (__mmask16)__U,
1692 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1693 (__v16hf)__A);
1694}
1695
1696static __inline__ __m256h __DEFAULT_FN_ATTRS256
1697_mm256_maskz_fmsubadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1698 return (__m256h)__builtin_ia32_selectph_256(
1699 (__mmask16)__U,
1700 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1701 (__v16hf)_mm256_setzero_ph());
1702}
1703
1704static __inline__ __m128h __DEFAULT_FN_ATTRS128
1705_mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1706 return (__m128h)__builtin_ia32_selectph_128(
1707 (__mmask8)__U,
1708 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1709 (__v8hf)__C);
1710}
1711
1712static __inline__ __m256h __DEFAULT_FN_ATTRS256
1713_mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1714 return (__m256h)__builtin_ia32_selectph_256(
1715 (__mmask16)__U,
1716 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1717 (__v16hf)__C);
1718}
1719
1720static __inline__ __m128h __DEFAULT_FN_ATTRS128
1721_mm_mask3_fmsubadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1722 return (__m128h)__builtin_ia32_selectph_128(
1723 (__mmask8)__U,
1724 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1725 (__v8hf)__C);
1726}
1727
1728static __inline__ __m256h __DEFAULT_FN_ATTRS256
1729_mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1730 return (__m256h)__builtin_ia32_selectph_256(
1731 (__mmask16)__U,
1732 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1733 (__v16hf)__C);
1734}
1735
1736static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_ph(__m128h __A,
1737 __m128h __B,
1738 __m128h __C) {
1739 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B,
1740 (__v8hf)__C);
1741}
1742
1743static __inline__ __m128h __DEFAULT_FN_ATTRS128
1744_mm_mask_fnmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1745 return (__m128h)__builtin_ia32_selectph_128(
1746 (__mmask8)__U,
1747 __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C),
1748 (__v8hf)__A);
1749}
1750
1751static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ph(__m256h __A,
1752 __m256h __B,
1753 __m256h __C) {
1754 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B,
1755 (__v16hf)__C);
1756}
1757
1758static __inline__ __m256h __DEFAULT_FN_ATTRS256
1759_mm256_mask_fnmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1760 return (__m256h)__builtin_ia32_selectph_256(
1761 (__mmask16)__U,
1762 __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C),
1763 (__v16hf)__A);
1764}
1765
1766static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_ph(__m128h __A,
1767 __m128h __B,
1768 __m128h __C) {
1769 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B,
1770 -(__v8hf)__C);
1771}
1772
1773static __inline__ __m128h __DEFAULT_FN_ATTRS128
1774_mm_mask_fnmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1775 return (__m128h)__builtin_ia32_selectph_128(
1776 (__mmask8)__U,
1777 __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
1778 (__v8hf)__A);
1779}
1780
1781static __inline__ __m128h __DEFAULT_FN_ATTRS128
1782_mm_mask3_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1783 return (__m128h)__builtin_ia32_selectph_128(
1784 (__mmask8)__U,
1785 __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
1786 (__v8hf)__C);
1787}
1788
1789static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ph(__m256h __A,
1790 __m256h __B,
1791 __m256h __C) {
1792 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B,
1793 -(__v16hf)__C);
1794}
1795
1796static __inline__ __m256h __DEFAULT_FN_ATTRS256
1797_mm256_mask_fnmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1798 return (__m256h)__builtin_ia32_selectph_256(
1799 (__mmask16)__U,
1800 __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
1801 (__v16hf)__A);
1802}
1803
1804static __inline__ __m256h __DEFAULT_FN_ATTRS256
1805_mm256_mask3_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1806 return (__m256h)__builtin_ia32_selectph_256(
1807 (__mmask16)__U,
1808 __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
1809 (__v16hf)__C);
1810}
1811
1812static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_pch(__m128h __A,
1813 __m128h __B) {
1814 return (__m128h)__builtin_ia32_vfcmulcph128_mask(
1815 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1);
1816}
1817
1818static __inline__ __m128h __DEFAULT_FN_ATTRS128
1819_mm_mask_fcmul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
1820 return (__m128h)__builtin_ia32_vfcmulcph128_mask((__v4sf)__A, (__v4sf)__B,
1821 (__v4sf)__W, (__mmask8)__U);
1822}
1823
1824static __inline__ __m128h __DEFAULT_FN_ATTRS128
1825_mm_maskz_fcmul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
1826 return (__m128h)__builtin_ia32_vfcmulcph128_mask(
1827 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U);
1828}
1829
1830static __inline__ __m256h __DEFAULT_FN_ATTRS128 _mm256_fcmul_pch(__m256h __A,
1831 __m256h __B) {
1832 return (__m256h)__builtin_ia32_vfcmulcph256_mask(
1833 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1);
1834}
1835
1836static __inline__ __m256h __DEFAULT_FN_ATTRS256
1837_mm256_mask_fcmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
1838 return (__m256h)__builtin_ia32_vfcmulcph256_mask((__v8sf)__A, (__v8sf)__B,
1839 (__v8sf)__W, (__mmask8)__U);
1840}
1841
1842static __inline__ __m256h __DEFAULT_FN_ATTRS256
1843_mm256_maskz_fcmul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
1844 return (__m256h)__builtin_ia32_vfcmulcph256_mask(
1845 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U);
1846}
1847
1848static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmadd_pch(__m128h __A,
1849 __m128h __B,
1850 __m128h __C) {
1851 return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1852 (__v4sf)__C, (__mmask8)-1);
1853}
1854
1855static __inline__ __m128h __DEFAULT_FN_ATTRS128
1856_mm_mask_fcmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1857 return (__m128h)__builtin_ia32_selectps_128(
1858 __U,
1859 __builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)(__m128h)__B,
1860 (__v4sf)__C, (__mmask8)__U),
1861 (__v4sf)__A);
1862}
1863
1864static __inline__ __m128h __DEFAULT_FN_ATTRS128
1865_mm_mask3_fcmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1866 return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1867 (__v4sf)__C, (__mmask8)__U);
1868}
1869
1870static __inline__ __m128h __DEFAULT_FN_ATTRS128
1871_mm_maskz_fcmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1872 return (__m128h)__builtin_ia32_vfcmaddcph128_maskz(
1873 (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U);
1874}
1875
1876static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fcmadd_pch(__m256h __A,
1877 __m256h __B,
1878 __m256h __C) {
1879 return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1880 (__v8sf)__C, (__mmask8)-1);
1881}
1882
1883static __inline__ __m256h __DEFAULT_FN_ATTRS256
1884_mm256_mask_fcmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) {
1885 return (__m256h)__builtin_ia32_selectps_256(
1886 __U,
1887 __builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C,
1888 (__mmask8)__U),
1889 (__v8sf)__A);
1890}
1891
1892static __inline__ __m256h __DEFAULT_FN_ATTRS256
1893_mm256_mask3_fcmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) {
1894 return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1895 (__v8sf)__C, (__mmask8)__U);
1896}
1897
1898static __inline__ __m256h __DEFAULT_FN_ATTRS256
1899_mm256_maskz_fcmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
1900 return (__m256h)__builtin_ia32_vfcmaddcph256_maskz(
1901 (__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U);
1902}
1903
1904static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmul_pch(__m128h __A,
1905 __m128h __B) {
1906 return (__m128h)__builtin_ia32_vfmulcph128_mask(
1907 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1);
1908}
1909
1910static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmul_pch(__m128h __W,
1911 __mmask8 __U,
1912 __m128h __A,
1913 __m128h __B) {
1914 return (__m128h)__builtin_ia32_vfmulcph128_mask((__v4sf)__A, (__v4sf)__B,
1915 (__v4sf)__W, (__mmask8)__U);
1916}
1917
1918static __inline__ __m128h __DEFAULT_FN_ATTRS128
1919_mm_maskz_fmul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
1920 return (__m128h)__builtin_ia32_vfmulcph128_mask(
1921 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U);
1922}
1923
1924static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmul_pch(__m256h __A,
1925 __m256h __B) {
1926 return (__m256h)__builtin_ia32_vfmulcph256_mask(
1927 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1);
1928}
1929
1930static __inline__ __m256h __DEFAULT_FN_ATTRS256
1931_mm256_mask_fmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
1932 return (__m256h)__builtin_ia32_vfmulcph256_mask((__v8sf)__A, (__v8sf)__B,
1933 (__v8sf)__W, (__mmask8)__U);
1934}
1935
1936static __inline__ __m256h __DEFAULT_FN_ATTRS256
1937_mm256_maskz_fmul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
1938 return (__m256h)__builtin_ia32_vfmulcph256_mask(
1939 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U);
1940}
1941
1942static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_pch(__m128h __A,
1943 __m128h __B,
1944 __m128h __C) {
1945 return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1946 (__v4sf)__C, (__mmask8)-1);
1947}
1948
1949static __inline__ __m128h __DEFAULT_FN_ATTRS128
1950_mm_mask_fmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1951 return (__m128h)__builtin_ia32_selectps_128(
1952 __U,
1953 __builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C,
1954 (__mmask8)__U),
1955 (__v4sf)__A);
1956}
1957
1958static __inline__ __m128h __DEFAULT_FN_ATTRS128
1959_mm_mask3_fmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1960 return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1961 (__v4sf)__C, (__mmask8)__U);
1962}
1963
1964static __inline__ __m128h __DEFAULT_FN_ATTRS128
1965_mm_maskz_fmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1966 return (__m128h)__builtin_ia32_vfmaddcph128_maskz((__v4sf)__A, (__v4sf)__B,
1967 (__v4sf)__C, (__mmask8)__U);
1968}
1969
1970static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_pch(__m256h __A,
1971 __m256h __B,
1972 __m256h __C) {
1973 return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1974 (__v8sf)__C, (__mmask8)-1);
1975}
1976
1977static __inline__ __m256h __DEFAULT_FN_ATTRS256
1978_mm256_mask_fmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) {
1979 return (__m256h)__builtin_ia32_selectps_256(
1980 __U,
1981 __builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C,
1982 (__mmask8)__U),
1983 (__v8sf)__A);
1984}
1985
1986static __inline__ __m256h __DEFAULT_FN_ATTRS256
1987_mm256_mask3_fmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) {
1988 return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1989 (__v8sf)__C, (__mmask8)__U);
1990}
1991
1992static __inline__ __m256h __DEFAULT_FN_ATTRS256
1993_mm256_maskz_fmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
1994 return (__m256h)__builtin_ia32_vfmaddcph256_maskz((__v8sf)__A, (__v8sf)__B,
1995 (__v8sf)__C, (__mmask8)__U);
1996}
1997
1998static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
1999_mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) {
2000 return (__m128h)__builtin_ia32_selectph_128((__mmask8)__U, (__v8hf)__W,
2001 (__v8hf)__A);
2002}
2003
2004static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
2005_mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) {
2006 return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)__W,
2007 (__v16hf)__A);
2008}
2009
2010static __inline__ __m128h __DEFAULT_FN_ATTRS128
2011_mm_permutex2var_ph(__m128h __A, __m128i __I, __m128h __B) {
2012 return (__m128h)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
2013 (__v8hi)__B);
2014}
2015
2016static __inline__ __m256h __DEFAULT_FN_ATTRS256
2017_mm256_permutex2var_ph(__m256h __A, __m256i __I, __m256h __B) {
2018 return (__m256h)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
2019 (__v16hi)__B);
2020}
2021
2022static __inline__ __m128h __DEFAULT_FN_ATTRS128
2023_mm_permutexvar_ph(__m128i __A, __m128h __B) {
2024 return (__m128h)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A);
2025}
2026
2027static __inline__ __m256h __DEFAULT_FN_ATTRS256
2028_mm256_permutexvar_ph(__m256i __A, __m256h __B) {
2029 return (__m256h)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A);
2030}
2031
2032static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2033_mm256_reduce_add_ph(__m256h __W) {
2034 return __builtin_ia32_reduce_fadd_ph256(-0.0f16, __W);
2035}
2036
2037static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2038_mm256_reduce_mul_ph(__m256h __W) {
2039 return __builtin_ia32_reduce_fmul_ph256(1.0f16, __W);
2040}
2041
2042static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2043_mm256_reduce_max_ph(__m256h __V) {
2044 return __builtin_ia32_reduce_fmax_ph256(__V);
2045}
2046
2047static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2048_mm256_reduce_min_ph(__m256h __V) {
2049 return __builtin_ia32_reduce_fmin_ph256(__V);
2050}
2051
2052static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2053_mm_reduce_add_ph(__m128h __W) {
2054 return __builtin_ia32_reduce_fadd_ph128(-0.0f16, __W);
2055}
2056
2057static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2058_mm_reduce_mul_ph(__m128h __W) {
2059 return __builtin_ia32_reduce_fmul_ph128(1.0f16, __W);
2060}
2061
2062static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2063_mm_reduce_max_ph(__m128h __V) {
2064 return __builtin_ia32_reduce_fmax_ph128(__V);
2065}
2066
2067static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2068_mm_reduce_min_ph(__m128h __V) {
2069 return __builtin_ia32_reduce_fmin_ph128(__V);
2070}
2071
2072// intrinsics below are alias for f*mul_*ch
2073#define _mm_mul_pch(A, B) _mm_fmul_pch(A, B)
2074#define _mm_mask_mul_pch(W, U, A, B) _mm_mask_fmul_pch(W, U, A, B)
2075#define _mm_maskz_mul_pch(U, A, B) _mm_maskz_fmul_pch(U, A, B)
2076#define _mm256_mul_pch(A, B) _mm256_fmul_pch(A, B)
2077#define _mm256_mask_mul_pch(W, U, A, B) _mm256_mask_fmul_pch(W, U, A, B)
2078#define _mm256_maskz_mul_pch(U, A, B) _mm256_maskz_fmul_pch(U, A, B)
2079
2080#define _mm_cmul_pch(A, B) _mm_fcmul_pch(A, B)
2081#define _mm_mask_cmul_pch(W, U, A, B) _mm_mask_fcmul_pch(W, U, A, B)
2082#define _mm_maskz_cmul_pch(U, A, B) _mm_maskz_fcmul_pch(U, A, B)
2083#define _mm256_cmul_pch(A, B) _mm256_fcmul_pch(A, B)
2084#define _mm256_mask_cmul_pch(W, U, A, B) _mm256_mask_fcmul_pch(W, U, A, B)
2085#define _mm256_maskz_cmul_pch(U, A, B) _mm256_maskz_fcmul_pch(U, A, B)
2086
2087#undef __DEFAULT_FN_ATTRS128
2088#undef __DEFAULT_FN_ATTRS256
2089#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
2090#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
2091
2092#endif
2093#endif
__device__ _Float16
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
Definition avx2intrin.h:30
#define __DEFAULT_FN_ATTRS256_CONSTEXPR
Definition avx2intrin.h:29
unsigned char __mmask8
unsigned short __mmask16
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_and_epi32(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_and_epi32(__m128i __a, __m128i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
Definition avxintrin.h:3644
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_undefined_pd(void)
Create a 256-bit vector of [4 x double] with undefined values.
Definition avxintrin.h:3631
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
Definition avxintrin.h:3657
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_ps(float __w)
Constructs a 256-bit floating-point vector of [8 x float], with each of the eight single-precision fl...
Definition avxintrin.h:4226
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition avxintrin.h:4328
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi32(int __i)
Constructs a 256-bit integer vector of [8 x i32], with each of the 32-bit integral vector elements se...
Definition avxintrin.h:4245
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_xor_ps(__m256 __a, __m256 __b)
Performs a bitwise XOR of two 256-bit vectors of [8 x float].
Definition avxintrin.h:674
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition avxintrin.h:4316
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition avxintrin.h:4340
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition emmintrin.h:3878
static __inline__ void int __a
Definition emmintrin.h:4077
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition emmintrin.h:3493
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] with unspecified content.
Definition emmintrin.h:1765
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition emmintrin.h:1867
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value.
Definition emmintrin.h:3709
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_undefined_ps(void)
Create a 128-bit vector of [4 x float] with undefined values.
Definition xmmintrin.h:1899
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_xor_ps(__m128 __a, __m128 __b)
Performs a bitwise exclusive OR of two 128-bit vectors of [4 x float].
Definition xmmintrin.h:489
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_ps(float __w)
Constructs a 128-bit floating-point vector of [4 x float], with each of the four single-precision flo...
Definition xmmintrin.h:1936
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition xmmintrin.h:2021