clang 22.0.0git
avx512vlfp16intrin.h
Go to the documentation of this file.
1/*===---------- avx512vlfp16intrin.h - AVX512-FP16 intrinsics --------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error \
11 "Never use <avx512vlfp16intrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifdef __SSE2__
15
16#ifndef __AVX512VLFP16INTRIN_H
17#define __AVX512VLFP16INTRIN_H
18
19/* Define the default attributes for the functions in this file. */
20#define __DEFAULT_FN_ATTRS256 \
21 __attribute__((__always_inline__, __nodebug__, \
22 __target__("avx512fp16,avx512vl"), \
23 __min_vector_width__(256)))
24#define __DEFAULT_FN_ATTRS128 \
25 __attribute__((__always_inline__, __nodebug__, \
26 __target__("avx512fp16,avx512vl"), \
27 __min_vector_width__(128)))
28
29#if defined(__cplusplus) && (__cplusplus >= 201103L)
30#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
31#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
32#else
33#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
34#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
35#endif
36
38_mm_cvtsh_h(__m128h __a) {
39 return __a[0];
40}
41
43_mm256_cvtsh_h(__m256h __a) {
44 return __a[0];
45}
46
47static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
48_mm_set_sh(_Float16 __h) {
49 return __extension__(__m128h){__h, 0, 0, 0, 0, 0, 0, 0};
50}
51
52static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
53_mm_set1_ph(_Float16 __h) {
54 return (__m128h)(__v8hf){__h, __h, __h, __h, __h, __h, __h, __h};
55}
56
57static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
58_mm256_set1_ph(_Float16 __h) {
59 return (__m256h)(__v16hf){__h, __h, __h, __h, __h, __h, __h, __h,
60 __h, __h, __h, __h, __h, __h, __h, __h};
61}
62
63static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
64_mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
65 _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) {
66 return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1};
67}
68
69static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
70_mm256_set1_pch(_Float16 _Complex h) {
71 return (__m256h)_mm256_set1_ps(__builtin_bit_cast(float, h));
72}
73
74static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
75_mm_set1_pch(_Float16 _Complex h) {
76 return (__m128h)_mm_set1_ps(__builtin_bit_cast(float, h));
77}
78
79static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
80_mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
81 _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
82 _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12,
83 _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16) {
84 return (__m256h)(__v16hf){__h16, __h15, __h14, __h13, __h12, __h11,
85 __h10, __h9, __h8, __h7, __h6, __h5,
86 __h4, __h3, __h2, __h1};
87}
88
89static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
90_mm_setr_ph(_Float16 e0, _Float16 e1, _Float16 e2, _Float16 e3, _Float16 e4,
91 _Float16 e5, _Float16 e6, _Float16 e7) {
92 return _mm_set_ph(e7, e6, e5, e4, e3, e2, e1, e0);
93}
94
95static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
96_mm256_setr_ph(_Float16 e0, _Float16 e1, _Float16 e2, _Float16 e3, _Float16 e4,
97 _Float16 e5, _Float16 e6, _Float16 e7, _Float16 e8, _Float16 e9,
98 _Float16 e10, _Float16 e11, _Float16 e12, _Float16 e13,
99 _Float16 e14, _Float16 e15) {
100 return _mm256_set_ph(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3,
101 e2, e1, e0);
102}
103
104static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_add_ph(__m256h __A,
105 __m256h __B) {
106 return (__m256h)((__v16hf)__A + (__v16hf)__B);
107}
108
109static __inline__ __m256h __DEFAULT_FN_ATTRS256
110_mm256_mask_add_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
111 return (__m256h)__builtin_ia32_selectph_256(
112 __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)__W);
113}
114
115static __inline__ __m256h __DEFAULT_FN_ATTRS256
116_mm256_maskz_add_ph(__mmask16 __U, __m256h __A, __m256h __B) {
117 return (__m256h)__builtin_ia32_selectph_256(
118 __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
119}
120
121static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_ph(__m128h __A,
122 __m128h __B) {
123 return (__m128h)((__v8hf)__A + (__v8hf)__B);
124}
125
126static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_ph(__m128h __W,
127 __mmask8 __U,
128 __m128h __A,
129 __m128h __B) {
130 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B),
131 (__v8hf)__W);
132}
133
134static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_ph(__mmask8 __U,
135 __m128h __A,
136 __m128h __B) {
137 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B),
138 (__v8hf)_mm_setzero_ph());
139}
140
141static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_sub_ph(__m256h __A,
142 __m256h __B) {
143 return (__m256h)((__v16hf)__A - (__v16hf)__B);
144}
145
146static __inline__ __m256h __DEFAULT_FN_ATTRS256
147_mm256_mask_sub_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
148 return (__m256h)__builtin_ia32_selectph_256(
149 __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)__W);
150}
151
152static __inline__ __m256h __DEFAULT_FN_ATTRS256
153_mm256_maskz_sub_ph(__mmask16 __U, __m256h __A, __m256h __B) {
154 return (__m256h)__builtin_ia32_selectph_256(
155 __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
156}
157
158static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_ph(__m128h __A,
159 __m128h __B) {
160 return (__m128h)((__v8hf)__A - (__v8hf)__B);
161}
162
163static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_ph(__m128h __W,
164 __mmask8 __U,
165 __m128h __A,
166 __m128h __B) {
167 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B),
168 (__v8hf)__W);
169}
170
171static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ph(__mmask8 __U,
172 __m128h __A,
173 __m128h __B) {
174 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B),
175 (__v8hf)_mm_setzero_ph());
176}
177
178static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mul_ph(__m256h __A,
179 __m256h __B) {
180 return (__m256h)((__v16hf)__A * (__v16hf)__B);
181}
182
183static __inline__ __m256h __DEFAULT_FN_ATTRS256
184_mm256_mask_mul_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
185 return (__m256h)__builtin_ia32_selectph_256(
186 __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)__W);
187}
188
189static __inline__ __m256h __DEFAULT_FN_ATTRS256
190_mm256_maskz_mul_ph(__mmask16 __U, __m256h __A, __m256h __B) {
191 return (__m256h)__builtin_ia32_selectph_256(
192 __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
193}
194
195static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_ph(__m128h __A,
196 __m128h __B) {
197 return (__m128h)((__v8hf)__A * (__v8hf)__B);
198}
199
200static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_ph(__m128h __W,
201 __mmask8 __U,
202 __m128h __A,
203 __m128h __B) {
204 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B),
205 (__v8hf)__W);
206}
207
208static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ph(__mmask8 __U,
209 __m128h __A,
210 __m128h __B) {
211 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B),
212 (__v8hf)_mm_setzero_ph());
213}
214
215static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_div_ph(__m256h __A,
216 __m256h __B) {
217 return (__m256h)((__v16hf)__A / (__v16hf)__B);
218}
219
220static __inline__ __m256h __DEFAULT_FN_ATTRS256
221_mm256_mask_div_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
222 return (__m256h)__builtin_ia32_selectph_256(
223 __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)__W);
224}
225
226static __inline__ __m256h __DEFAULT_FN_ATTRS256
227_mm256_maskz_div_ph(__mmask16 __U, __m256h __A, __m256h __B) {
228 return (__m256h)__builtin_ia32_selectph_256(
229 __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
230}
231
232static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_ph(__m128h __A,
233 __m128h __B) {
234 return (__m128h)((__v8hf)__A / (__v8hf)__B);
235}
236
237static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_ph(__m128h __W,
238 __mmask8 __U,
239 __m128h __A,
240 __m128h __B) {
241 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B),
242 (__v8hf)__W);
243}
244
245static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_ph(__mmask8 __U,
246 __m128h __A,
247 __m128h __B) {
248 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B),
249 (__v8hf)_mm_setzero_ph());
250}
251
252static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_min_ph(__m256h __A,
253 __m256h __B) {
254 return (__m256h)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B);
255}
256
257static __inline__ __m256h __DEFAULT_FN_ATTRS256
258_mm256_mask_min_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
259 return (__m256h)__builtin_ia32_selectph_256(
260 (__mmask16)__U,
261 (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B),
262 (__v16hf)__W);
263}
264
265static __inline__ __m256h __DEFAULT_FN_ATTRS256
266_mm256_maskz_min_ph(__mmask16 __U, __m256h __A, __m256h __B) {
267 return (__m256h)__builtin_ia32_selectph_256(
268 (__mmask16)__U,
269 (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B),
270 (__v16hf)_mm256_setzero_ph());
271}
272
273static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_ph(__m128h __A,
274 __m128h __B) {
275 return (__m128h)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B);
276}
277
278static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_ph(__m128h __W,
279 __mmask8 __U,
280 __m128h __A,
281 __m128h __B) {
282 return (__m128h)__builtin_ia32_selectph_128(
283 (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
284 (__v8hf)__W);
285}
286
287static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_ph(__mmask8 __U,
288 __m128h __A,
289 __m128h __B) {
290 return (__m128h)__builtin_ia32_selectph_128(
291 (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
292 (__v8hf)_mm_setzero_ph());
293}
294
295static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_max_ph(__m256h __A,
296 __m256h __B) {
297 return (__m256h)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B);
298}
299
300static __inline__ __m256h __DEFAULT_FN_ATTRS256
301_mm256_mask_max_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
302 return (__m256h)__builtin_ia32_selectph_256(
303 (__mmask16)__U,
304 (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B),
305 (__v16hf)__W);
306}
307
308static __inline__ __m256h __DEFAULT_FN_ATTRS256
309_mm256_maskz_max_ph(__mmask16 __U, __m256h __A, __m256h __B) {
310 return (__m256h)__builtin_ia32_selectph_256(
311 (__mmask16)__U,
312 (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B),
313 (__v16hf)_mm256_setzero_ph());
314}
315
316static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_ph(__m128h __A,
317 __m128h __B) {
318 return (__m128h)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B);
319}
320
321static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_ph(__m128h __W,
322 __mmask8 __U,
323 __m128h __A,
324 __m128h __B) {
325 return (__m128h)__builtin_ia32_selectph_128(
326 (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
327 (__v8hf)__W);
328}
329
330static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_ph(__mmask8 __U,
331 __m128h __A,
332 __m128h __B) {
333 return (__m128h)__builtin_ia32_selectph_128(
334 (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
335 (__v8hf)_mm_setzero_ph());
336}
337
338static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
339_mm256_abs_ph(__m256h __A) {
340 return (__m256h)_mm256_and_epi32(_mm256_set1_epi32(0x7FFF7FFF), (__m256i)__A);
341}
342
343static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
344_mm_abs_ph(__m128h __A) {
345 return (__m128h)_mm_and_epi32(_mm_set1_epi32(0x7FFF7FFF), (__m128i)__A);
346}
347
348static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_conj_pch(__m256h __A) {
349 return (__m256h)_mm256_xor_ps((__m256)__A, _mm256_set1_ps(-0.0f));
350}
351
352static __inline__ __m256h __DEFAULT_FN_ATTRS256
353_mm256_mask_conj_pch(__m256h __W, __mmask8 __U, __m256h __A) {
354 return (__m256h)__builtin_ia32_selectps_256(
355 (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)__W);
356}
357
358static __inline__ __m256h __DEFAULT_FN_ATTRS256
359_mm256_maskz_conj_pch(__mmask8 __U, __m256h __A) {
360 return (__m256h)__builtin_ia32_selectps_256(
361 (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)_mm256_setzero_ps());
362}
363
364static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_conj_pch(__m128h __A) {
365 return (__m128h)_mm_xor_ps((__m128)__A, _mm_set1_ps(-0.0f));
366}
367
368static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_conj_pch(__m128h __W,
369 __mmask8 __U,
370 __m128h __A) {
371 return (__m128h)__builtin_ia32_selectps_128(
372 (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)__W);
373}
374
375static __inline__ __m128h __DEFAULT_FN_ATTRS128
376_mm_maskz_conj_pch(__mmask8 __U, __m128h __A) {
377 return (__m128h)__builtin_ia32_selectps_128(
378 (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)_mm_setzero_ps());
379}
380
381#define _mm256_cmp_ph_mask(a, b, p) \
382 ((__mmask16)__builtin_ia32_cmpph256_mask( \
383 (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)-1))
384
385#define _mm256_mask_cmp_ph_mask(m, a, b, p) \
386 ((__mmask16)__builtin_ia32_cmpph256_mask( \
387 (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)(m)))
388
389#define _mm_cmp_ph_mask(a, b, p) \
390 ((__mmask8)__builtin_ia32_cmpph128_mask( \
391 (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)-1))
392
393#define _mm_mask_cmp_ph_mask(m, a, b, p) \
394 ((__mmask8)__builtin_ia32_cmpph128_mask( \
395 (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)(m)))
396
397static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rcp_ph(__m256h __A) {
398 return (__m256h)__builtin_ia32_rcpph256_mask(
399 (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1);
400}
401
402static __inline__ __m256h __DEFAULT_FN_ATTRS256
403_mm256_mask_rcp_ph(__m256h __W, __mmask16 __U, __m256h __A) {
404 return (__m256h)__builtin_ia32_rcpph256_mask((__v16hf)__A, (__v16hf)__W,
405 (__mmask16)__U);
406}
407
408static __inline__ __m256h __DEFAULT_FN_ATTRS256
409_mm256_maskz_rcp_ph(__mmask16 __U, __m256h __A) {
410 return (__m256h)__builtin_ia32_rcpph256_mask(
411 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
412}
413
414static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_ph(__m128h __A) {
415 return (__m128h)__builtin_ia32_rcpph128_mask(
416 (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
417}
418
419static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rcp_ph(__m128h __W,
420 __mmask8 __U,
421 __m128h __A) {
422 return (__m128h)__builtin_ia32_rcpph128_mask((__v8hf)__A, (__v8hf)__W,
423 (__mmask8)__U);
424}
425
426static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rcp_ph(__mmask8 __U,
427 __m128h __A) {
428 return (__m128h)__builtin_ia32_rcpph128_mask(
429 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
430}
431
432static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rsqrt_ph(__m256h __A) {
433 return (__m256h)__builtin_ia32_rsqrtph256_mask(
434 (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1);
435}
436
437static __inline__ __m256h __DEFAULT_FN_ATTRS256
438_mm256_mask_rsqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) {
439 return (__m256h)__builtin_ia32_rsqrtph256_mask((__v16hf)__A, (__v16hf)__W,
440 (__mmask16)__U);
441}
442
443static __inline__ __m256h __DEFAULT_FN_ATTRS256
444_mm256_maskz_rsqrt_ph(__mmask16 __U, __m256h __A) {
445 return (__m256h)__builtin_ia32_rsqrtph256_mask(
446 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
447}
448
449static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_ph(__m128h __A) {
450 return (__m128h)__builtin_ia32_rsqrtph128_mask(
451 (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
452}
453
454static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt_ph(__m128h __W,
455 __mmask8 __U,
456 __m128h __A) {
457 return (__m128h)__builtin_ia32_rsqrtph128_mask((__v8hf)__A, (__v8hf)__W,
458 (__mmask8)__U);
459}
460
461static __inline__ __m128h __DEFAULT_FN_ATTRS128
462_mm_maskz_rsqrt_ph(__mmask8 __U, __m128h __A) {
463 return (__m128h)__builtin_ia32_rsqrtph128_mask(
464 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
465}
466
467static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_ph(__m128h __A) {
468 return (__m128h)__builtin_ia32_getexpph128_mask(
469 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
470}
471
472static __inline__ __m128h __DEFAULT_FN_ATTRS128
473_mm_mask_getexp_ph(__m128h __W, __mmask8 __U, __m128h __A) {
474 return (__m128h)__builtin_ia32_getexpph128_mask((__v8hf)__A, (__v8hf)__W,
475 (__mmask8)__U);
476}
477
478static __inline__ __m128h __DEFAULT_FN_ATTRS128
479_mm_maskz_getexp_ph(__mmask8 __U, __m128h __A) {
480 return (__m128h)__builtin_ia32_getexpph128_mask(
481 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
482}
483
484static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_getexp_ph(__m256h __A) {
485 return (__m256h)__builtin_ia32_getexpph256_mask(
486 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1);
487}
488
489static __inline__ __m256h __DEFAULT_FN_ATTRS256
490_mm256_mask_getexp_ph(__m256h __W, __mmask16 __U, __m256h __A) {
491 return (__m256h)__builtin_ia32_getexpph256_mask((__v16hf)__A, (__v16hf)__W,
492 (__mmask16)__U);
493}
494
495static __inline__ __m256h __DEFAULT_FN_ATTRS256
496_mm256_maskz_getexp_ph(__mmask16 __U, __m256h __A) {
497 return (__m256h)__builtin_ia32_getexpph256_mask(
498 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
499}
500
501#define _mm_getmant_ph(A, B, C) \
502 ((__m128h)__builtin_ia32_getmantph128_mask( \
503 (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \
504 (__mmask8)-1))
505
506#define _mm_mask_getmant_ph(W, U, A, B, C) \
507 ((__m128h)__builtin_ia32_getmantph128_mask( \
508 (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)(__m128h)(W), \
509 (__mmask8)(U)))
510
511#define _mm_maskz_getmant_ph(U, A, B, C) \
512 ((__m128h)__builtin_ia32_getmantph128_mask( \
513 (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \
514 (__mmask8)(U)))
515
516#define _mm256_getmant_ph(A, B, C) \
517 ((__m256h)__builtin_ia32_getmantph256_mask( \
518 (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \
519 (__v16hf)_mm256_setzero_ph(), (__mmask16)-1))
520
521#define _mm256_mask_getmant_ph(W, U, A, B, C) \
522 ((__m256h)__builtin_ia32_getmantph256_mask( \
523 (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), (__v16hf)(__m256h)(W), \
524 (__mmask16)(U)))
525
526#define _mm256_maskz_getmant_ph(U, A, B, C) \
527 ((__m256h)__builtin_ia32_getmantph256_mask( \
528 (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \
529 (__v16hf)_mm256_setzero_ph(), (__mmask16)(U)))
530
531static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_ph(__m128h __A,
532 __m128h __B) {
533 return (__m128h)__builtin_ia32_scalefph128_mask(
534 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
535}
536
537static __inline__ __m128h __DEFAULT_FN_ATTRS128
538_mm_mask_scalef_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
539 return (__m128h)__builtin_ia32_scalefph128_mask((__v8hf)__A, (__v8hf)__B,
540 (__v8hf)__W, (__mmask8)__U);
541}
542
543static __inline__ __m128h __DEFAULT_FN_ATTRS128
544_mm_maskz_scalef_ph(__mmask8 __U, __m128h __A, __m128h __B) {
545 return (__m128h)__builtin_ia32_scalefph128_mask(
546 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
547}
548
549static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_scalef_ph(__m256h __A,
550 __m256h __B) {
551 return (__m256h)__builtin_ia32_scalefph256_mask(
552 (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1);
553}
554
555static __inline__ __m256h __DEFAULT_FN_ATTRS256
556_mm256_mask_scalef_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
557 return (__m256h)__builtin_ia32_scalefph256_mask((__v16hf)__A, (__v16hf)__B,
558 (__v16hf)__W, (__mmask16)__U);
559}
560
561static __inline__ __m256h __DEFAULT_FN_ATTRS256
562_mm256_maskz_scalef_ph(__mmask16 __U, __m256h __A, __m256h __B) {
563 return (__m256h)__builtin_ia32_scalefph256_mask(
564 (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
565}
566
567#define _mm_roundscale_ph(A, imm) \
568 ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
569 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \
570 (__mmask8)-1))
571
572#define _mm_mask_roundscale_ph(W, U, A, imm) \
573 ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
574 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U)))
575
576#define _mm_maskz_roundscale_ph(U, A, imm) \
577 ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
578 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \
579 (__mmask8)(U)))
580
581#define _mm256_roundscale_ph(A, imm) \
582 ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
583 (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \
584 (__mmask16)-1))
585
586#define _mm256_mask_roundscale_ph(W, U, A, imm) \
587 ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
588 (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)(__m256h)(W), \
589 (__mmask16)(U)))
590
591#define _mm256_maskz_roundscale_ph(U, A, imm) \
592 ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
593 (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \
594 (__mmask16)(U)))
595
596#define _mm_reduce_ph(A, imm) \
597 ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \
598 (__v8hf)_mm_setzero_ph(), \
599 (__mmask8)-1))
600
601#define _mm_mask_reduce_ph(W, U, A, imm) \
602 ((__m128h)__builtin_ia32_reduceph128_mask( \
603 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U)))
604
605#define _mm_maskz_reduce_ph(U, A, imm) \
606 ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \
607 (__v8hf)_mm_setzero_ph(), \
608 (__mmask8)(U)))
609
610#define _mm256_reduce_ph(A, imm) \
611 ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
612 (__v16hf)_mm256_setzero_ph(), \
613 (__mmask16)-1))
614
615#define _mm256_mask_reduce_ph(W, U, A, imm) \
616 ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
617 (__v16hf)(__m256h)(W), \
618 (__mmask16)(U)))
619
620#define _mm256_maskz_reduce_ph(U, A, imm) \
621 ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
622 (__v16hf)_mm256_setzero_ph(), \
623 (__mmask16)(U)))
624
625static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_ph(__m128h __a) {
626 return __builtin_ia32_sqrtph((__v8hf)__a);
627}
628
629static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ph(__m128h __W,
630 __mmask8 __U,
631 __m128h __A) {
632 return (__m128h)__builtin_ia32_selectph_128(
633 (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)__W);
634}
635
636static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ph(__mmask8 __U,
637 __m128h __A) {
638 return (__m128h)__builtin_ia32_selectph_128(
639 (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)_mm_setzero_ph());
640}
641
642static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_sqrt_ph(__m256h __a) {
643 return (__m256h)__builtin_ia32_sqrtph256((__v16hf)__a);
644}
645
646static __inline__ __m256h __DEFAULT_FN_ATTRS256
647_mm256_mask_sqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) {
648 return (__m256h)__builtin_ia32_selectph_256(
649 (__mmask16)__U, (__v16hf)_mm256_sqrt_ph(__A), (__v16hf)__W);
650}
651
652static __inline__ __m256h __DEFAULT_FN_ATTRS256
653_mm256_maskz_sqrt_ph(__mmask16 __U, __m256h __A) {
654 return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
655 (__v16hf)_mm256_sqrt_ph(__A),
656 (__v16hf)_mm256_setzero_ph());
657}
658
659#define _mm_mask_fpclass_ph_mask(U, A, imm) \
660 ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \
661 (int)(imm), (__mmask8)(U)))
662
663#define _mm_fpclass_ph_mask(A, imm) \
664 ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \
665 (int)(imm), (__mmask8)-1))
666
667#define _mm256_mask_fpclass_ph_mask(U, A, imm) \
668 ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \
669 (int)(imm), (__mmask16)(U)))
670
671#define _mm256_fpclass_ph_mask(A, imm) \
672 ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \
673 (int)(imm), (__mmask16)-1))
674
675static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtpd_ph(__m128d __A) {
676 return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
677 (__v2df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
678}
679
680static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ph(__m128h __W,
681 __mmask8 __U,
682 __m128d __A) {
683 return (__m128h)__builtin_ia32_vcvtpd2ph128_mask((__v2df)__A, (__v8hf)__W,
684 (__mmask8)__U);
685}
686
687static __inline__ __m128h __DEFAULT_FN_ATTRS128
688_mm_maskz_cvtpd_ph(__mmask8 __U, __m128d __A) {
689 return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
690 (__v2df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
691}
692
693static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtpd_ph(__m256d __A) {
694 return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
695 (__v4df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
696}
697
698static __inline__ __m128h __DEFAULT_FN_ATTRS256
699_mm256_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m256d __A) {
700 return (__m128h)__builtin_ia32_vcvtpd2ph256_mask((__v4df)__A, (__v8hf)__W,
701 (__mmask8)__U);
702}
703
704static __inline__ __m128h __DEFAULT_FN_ATTRS256
705_mm256_maskz_cvtpd_ph(__mmask8 __U, __m256d __A) {
706 return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
707 (__v4df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
708}
709
710static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtph_pd(__m128h __A) {
711 return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
712 (__v8hf)__A, (__v2df)_mm_undefined_pd(), (__mmask8)-1);
713}
714
715static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_pd(__m128d __W,
716 __mmask8 __U,
717 __m128h __A) {
718 return (__m128d)__builtin_ia32_vcvtph2pd128_mask((__v8hf)__A, (__v2df)__W,
719 (__mmask8)__U);
720}
721
722static __inline__ __m128d __DEFAULT_FN_ATTRS128
723_mm_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
724 return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
725 (__v8hf)__A, (__v2df)_mm_setzero_pd(), (__mmask8)__U);
726}
727
728static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtph_pd(__m128h __A) {
729 return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
730 (__v8hf)__A, (__v4df)_mm256_undefined_pd(), (__mmask8)-1);
731}
732
733static __inline__ __m256d __DEFAULT_FN_ATTRS256
734_mm256_mask_cvtph_pd(__m256d __W, __mmask8 __U, __m128h __A) {
735 return (__m256d)__builtin_ia32_vcvtph2pd256_mask((__v8hf)__A, (__v4df)__W,
736 (__mmask8)__U);
737}
738
739static __inline__ __m256d __DEFAULT_FN_ATTRS256
740_mm256_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
741 return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
742 (__v8hf)__A, (__v4df)_mm256_setzero_pd(), (__mmask8)__U);
743}
744
745static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi16(__m128h __A) {
746 return (__m128i)__builtin_ia32_vcvtph2w128_mask(
747 (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
748}
749
750static __inline__ __m128i __DEFAULT_FN_ATTRS128
751_mm_mask_cvtph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
752 return (__m128i)__builtin_ia32_vcvtph2w128_mask((__v8hf)__A, (__v8hi)__W,
753 (__mmask8)__U);
754}
755
756static __inline__ __m128i __DEFAULT_FN_ATTRS128
757_mm_maskz_cvtph_epi16(__mmask8 __U, __m128h __A) {
758 return (__m128i)__builtin_ia32_vcvtph2w128_mask(
759 (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
760}
761
762static __inline__ __m256i __DEFAULT_FN_ATTRS256
763_mm256_cvtph_epi16(__m256h __A) {
764 return (__m256i)__builtin_ia32_vcvtph2w256_mask(
765 (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
766}
767
768static __inline__ __m256i __DEFAULT_FN_ATTRS256
769_mm256_mask_cvtph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
770 return (__m256i)__builtin_ia32_vcvtph2w256_mask((__v16hf)__A, (__v16hi)__W,
771 (__mmask16)__U);
772}
773
774static __inline__ __m256i __DEFAULT_FN_ATTRS256
775_mm256_maskz_cvtph_epi16(__mmask16 __U, __m256h __A) {
776 return (__m256i)__builtin_ia32_vcvtph2w256_mask(
777 (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
778}
779
780static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi16(__m128h __A) {
781 return (__m128i)__builtin_ia32_vcvttph2w128_mask(
782 (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
783}
784
785static __inline__ __m128i __DEFAULT_FN_ATTRS128
786_mm_mask_cvttph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
787 return (__m128i)__builtin_ia32_vcvttph2w128_mask((__v8hf)__A, (__v8hi)__W,
788 (__mmask8)__U);
789}
790
791static __inline__ __m128i __DEFAULT_FN_ATTRS128
792_mm_maskz_cvttph_epi16(__mmask8 __U, __m128h __A) {
793 return (__m128i)__builtin_ia32_vcvttph2w128_mask(
794 (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
795}
796
797static __inline__ __m256i __DEFAULT_FN_ATTRS256
798_mm256_cvttph_epi16(__m256h __A) {
799 return (__m256i)__builtin_ia32_vcvttph2w256_mask(
800 (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
801}
802
803static __inline__ __m256i __DEFAULT_FN_ATTRS256
804_mm256_mask_cvttph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
805 return (__m256i)__builtin_ia32_vcvttph2w256_mask((__v16hf)__A, (__v16hi)__W,
806 (__mmask16)__U);
807}
808
809static __inline__ __m256i __DEFAULT_FN_ATTRS256
810_mm256_maskz_cvttph_epi16(__mmask16 __U, __m256h __A) {
811 return (__m256i)__builtin_ia32_vcvttph2w256_mask(
812 (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
813}
814
815static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
816_mm_cvtepi16_ph(__m128i __A) {
817 return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf);
818}
819
820static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
821_mm_mask_cvtepi16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
822 return (__m128h)__builtin_ia32_selectph_128(
823 (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W);
824}
825
826static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
827_mm_maskz_cvtepi16_ph(__mmask8 __U, __m128i __A) {
828 return (__m128h)__builtin_ia32_selectph_128(
829 (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph());
830}
831
832static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
833_mm256_cvtepi16_ph(__m256i __A) {
834 return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf);
835}
836
837static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
838_mm256_mask_cvtepi16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
839 return (__m256h)__builtin_ia32_selectph_256(
840 (__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W);
841}
842
843static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
844_mm256_maskz_cvtepi16_ph(__mmask16 __U, __m256i __A) {
845 return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
846 (__v16hf)_mm256_cvtepi16_ph(__A),
847 (__v16hf)_mm256_setzero_ph());
848}
849
850static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu16(__m128h __A) {
851 return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
852 (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
853}
854
855static __inline__ __m128i __DEFAULT_FN_ATTRS128
856_mm_mask_cvtph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
857 return (__m128i)__builtin_ia32_vcvtph2uw128_mask((__v8hf)__A, (__v8hu)__W,
858 (__mmask8)__U);
859}
860
861static __inline__ __m128i __DEFAULT_FN_ATTRS128
862_mm_maskz_cvtph_epu16(__mmask8 __U, __m128h __A) {
863 return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
864 (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
865}
866
867static __inline__ __m256i __DEFAULT_FN_ATTRS256
868_mm256_cvtph_epu16(__m256h __A) {
869 return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
870 (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
871}
872
873static __inline__ __m256i __DEFAULT_FN_ATTRS256
874_mm256_mask_cvtph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
875 return (__m256i)__builtin_ia32_vcvtph2uw256_mask((__v16hf)__A, (__v16hu)__W,
876 (__mmask16)__U);
877}
878
879static __inline__ __m256i __DEFAULT_FN_ATTRS256
880_mm256_maskz_cvtph_epu16(__mmask16 __U, __m256h __A) {
881 return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
882 (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
883}
884
885static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu16(__m128h __A) {
886 return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
887 (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
888}
889
890static __inline__ __m128i __DEFAULT_FN_ATTRS128
891_mm_mask_cvttph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
892 return (__m128i)__builtin_ia32_vcvttph2uw128_mask((__v8hf)__A, (__v8hu)__W,
893 (__mmask8)__U);
894}
895
896static __inline__ __m128i __DEFAULT_FN_ATTRS128
897_mm_maskz_cvttph_epu16(__mmask8 __U, __m128h __A) {
898 return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
899 (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
900}
901
902static __inline__ __m256i __DEFAULT_FN_ATTRS256
903_mm256_cvttph_epu16(__m256h __A) {
904 return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
905 (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
906}
907
908static __inline__ __m256i __DEFAULT_FN_ATTRS256
909_mm256_mask_cvttph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
910 return (__m256i)__builtin_ia32_vcvttph2uw256_mask((__v16hf)__A, (__v16hu)__W,
911 (__mmask16)__U);
912}
913
914static __inline__ __m256i __DEFAULT_FN_ATTRS256
915_mm256_maskz_cvttph_epu16(__mmask16 __U, __m256h __A) {
916 return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
917 (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
918}
919
920static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
921_mm_cvtepu16_ph(__m128i __A) {
922 return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf);
923}
924
925static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
926_mm_mask_cvtepu16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
927 return (__m128h)__builtin_ia32_selectph_128(
928 (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W);
929}
930
931static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
932_mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A) {
933 return (__m128h)__builtin_ia32_selectph_128(
934 (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph());
935}
936
937static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
938_mm256_cvtepu16_ph(__m256i __A) {
939 return (__m256h) __builtin_convertvector((__v16hu)__A, __v16hf);
940}
941
942static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
943_mm256_mask_cvtepu16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
944 return (__m256h)__builtin_ia32_selectph_256(
945 (__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)__W);
946}
947
948static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
949_mm256_maskz_cvtepu16_ph(__mmask16 __U, __m256i __A) {
950 return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
951 (__v16hf)_mm256_cvtepu16_ph(__A),
952 (__v16hf)_mm256_setzero_ph());
953}
954
955static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi32(__m128h __A) {
956 return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
957 (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
958}
959
960static __inline__ __m128i __DEFAULT_FN_ATTRS128
961_mm_mask_cvtph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
962 return (__m128i)__builtin_ia32_vcvtph2dq128_mask((__v8hf)__A, (__v4si)__W,
963 (__mmask8)__U);
964}
965
966static __inline__ __m128i __DEFAULT_FN_ATTRS128
967_mm_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) {
968 return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
969 (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
970}
971
972static __inline__ __m256i __DEFAULT_FN_ATTRS256
973_mm256_cvtph_epi32(__m128h __A) {
974 return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
975 (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
976}
977
978static __inline__ __m256i __DEFAULT_FN_ATTRS256
979_mm256_mask_cvtph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
980 return (__m256i)__builtin_ia32_vcvtph2dq256_mask((__v8hf)__A, (__v8si)__W,
981 (__mmask8)__U);
982}
983
984static __inline__ __m256i __DEFAULT_FN_ATTRS256
985_mm256_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) {
986 return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
987 (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
988}
989
990static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu32(__m128h __A) {
991 return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
992 (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
993}
994
995static __inline__ __m128i __DEFAULT_FN_ATTRS128
996_mm_mask_cvtph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
997 return (__m128i)__builtin_ia32_vcvtph2udq128_mask((__v8hf)__A, (__v4su)__W,
998 (__mmask8)__U);
999}
1000
1001static __inline__ __m128i __DEFAULT_FN_ATTRS128
1002_mm_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) {
1003 return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
1004 (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
1005}
1006
1007static __inline__ __m256i __DEFAULT_FN_ATTRS256
1008_mm256_cvtph_epu32(__m128h __A) {
1009 return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
1010 (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
1011}
1012
1013static __inline__ __m256i __DEFAULT_FN_ATTRS256
1014_mm256_mask_cvtph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
1015 return (__m256i)__builtin_ia32_vcvtph2udq256_mask((__v8hf)__A, (__v8su)__W,
1016 (__mmask8)__U);
1017}
1018
1019static __inline__ __m256i __DEFAULT_FN_ATTRS256
1020_mm256_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) {
1021 return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
1022 (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
1023}
1024
1025static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi32_ph(__m128i __A) {
1026 return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
1027 (__v4si)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1028}
1029
1030static __inline__ __m128h __DEFAULT_FN_ATTRS128
1031_mm_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1032 return (__m128h)__builtin_ia32_vcvtdq2ph128_mask((__v4si)__A, (__v8hf)__W,
1033 (__mmask8)__U);
1034}
1035
1036static __inline__ __m128h __DEFAULT_FN_ATTRS128
1037_mm_maskz_cvtepi32_ph(__mmask8 __U, __m128i __A) {
1038 return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
1039 (__v4si)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1040}
1041
1042static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
1043_mm256_cvtepi32_ph(__m256i __A) {
1044 return (__m128h) __builtin_convertvector((__v8si)__A, __v8hf);
1045}
1046
1047static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
1048_mm256_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1049 return (__m128h)__builtin_ia32_selectph_128(
1050 (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)__W);
1051}
1052
1053static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
1054_mm256_maskz_cvtepi32_ph(__mmask8 __U, __m256i __A) {
1055 return (__m128h)__builtin_ia32_selectph_128(
1056 (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)_mm_setzero_ph());
1057}
1058
1059static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ph(__m128i __A) {
1060 return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
1061 (__v4su)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1062}
1063
1064static __inline__ __m128h __DEFAULT_FN_ATTRS128
1065_mm_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1066 return (__m128h)__builtin_ia32_vcvtudq2ph128_mask((__v4su)__A, (__v8hf)__W,
1067 (__mmask8)__U);
1068}
1069
1070static __inline__ __m128h __DEFAULT_FN_ATTRS128
1071_mm_maskz_cvtepu32_ph(__mmask8 __U, __m128i __A) {
1072 return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
1073 (__v4su)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1074}
1075
1076static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
1077_mm256_cvtepu32_ph(__m256i __A) {
1078 return (__m128h) __builtin_convertvector((__v8su)__A, __v8hf);
1079}
1080
1081static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
1082_mm256_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1083 return (__m128h)__builtin_ia32_selectph_128(
1084 (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)__W);
1085}
1086
1087static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
1088_mm256_maskz_cvtepu32_ph(__mmask8 __U, __m256i __A) {
1089 return (__m128h)__builtin_ia32_selectph_128(
1090 (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)_mm_setzero_ph());
1091}
1092
1093static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi32(__m128h __A) {
1094 return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
1095 (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
1096}
1097
1098static __inline__ __m128i __DEFAULT_FN_ATTRS128
1099_mm_mask_cvttph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
1100 return (__m128i)__builtin_ia32_vcvttph2dq128_mask((__v8hf)__A, (__v4si)__W,
1101 (__mmask8)__U);
1102}
1103
1104static __inline__ __m128i __DEFAULT_FN_ATTRS128
1105_mm_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) {
1106 return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
1107 (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
1108}
1109
1110static __inline__ __m256i __DEFAULT_FN_ATTRS256
1111_mm256_cvttph_epi32(__m128h __A) {
1112 return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
1113 (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
1114}
1115
1116static __inline__ __m256i __DEFAULT_FN_ATTRS256
1117_mm256_mask_cvttph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
1118 return (__m256i)__builtin_ia32_vcvttph2dq256_mask((__v8hf)__A, (__v8si)__W,
1119 (__mmask8)__U);
1120}
1121
1122static __inline__ __m256i __DEFAULT_FN_ATTRS256
1123_mm256_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) {
1124 return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
1125 (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
1126}
1127
1128static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu32(__m128h __A) {
1129 return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
1130 (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
1131}
1132
1133static __inline__ __m128i __DEFAULT_FN_ATTRS128
1134_mm_mask_cvttph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
1135 return (__m128i)__builtin_ia32_vcvttph2udq128_mask((__v8hf)__A, (__v4su)__W,
1136 (__mmask8)__U);
1137}
1138
1139static __inline__ __m128i __DEFAULT_FN_ATTRS128
1140_mm_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) {
1141 return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
1142 (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
1143}
1144
1145static __inline__ __m256i __DEFAULT_FN_ATTRS256
1146_mm256_cvttph_epu32(__m128h __A) {
1147 return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
1148 (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
1149}
1150
1151static __inline__ __m256i __DEFAULT_FN_ATTRS256
1152_mm256_mask_cvttph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
1153 return (__m256i)__builtin_ia32_vcvttph2udq256_mask((__v8hf)__A, (__v8su)__W,
1154 (__mmask8)__U);
1155}
1156
1157static __inline__ __m256i __DEFAULT_FN_ATTRS256
1158_mm256_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) {
1159 return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
1160 (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
1161}
1162
1163static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ph(__m128i __A) {
1164 return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
1165 (__v2di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1166}
1167
1168static __inline__ __m128h __DEFAULT_FN_ATTRS128
1169_mm_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1170 return (__m128h)__builtin_ia32_vcvtqq2ph128_mask((__v2di)__A, (__v8hf)__W,
1171 (__mmask8)__U);
1172}
1173
1174static __inline__ __m128h __DEFAULT_FN_ATTRS128
1175_mm_maskz_cvtepi64_ph(__mmask8 __U, __m128i __A) {
1176 return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
1177 (__v2di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1178}
1179
1180static __inline__ __m128h __DEFAULT_FN_ATTRS256
1181_mm256_cvtepi64_ph(__m256i __A) {
1182 return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
1183 (__v4di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1184}
1185
1186static __inline__ __m128h __DEFAULT_FN_ATTRS256
1187_mm256_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1188 return (__m128h)__builtin_ia32_vcvtqq2ph256_mask((__v4di)__A, (__v8hf)__W,
1189 (__mmask8)__U);
1190}
1191
1192static __inline__ __m128h __DEFAULT_FN_ATTRS256
1193_mm256_maskz_cvtepi64_ph(__mmask8 __U, __m256i __A) {
1194 return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
1195 (__v4di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1196}
1197
1198static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi64(__m128h __A) {
1199 return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
1200 (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
1201}
1202
1203static __inline__ __m128i __DEFAULT_FN_ATTRS128
1204_mm_mask_cvtph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
1205 return (__m128i)__builtin_ia32_vcvtph2qq128_mask((__v8hf)__A, (__v2di)__W,
1206 (__mmask8)__U);
1207}
1208
1209static __inline__ __m128i __DEFAULT_FN_ATTRS128
1210_mm_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
1211 return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
1212 (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
1213}
1214
1215static __inline__ __m256i __DEFAULT_FN_ATTRS256
1216_mm256_cvtph_epi64(__m128h __A) {
1217 return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
1218 (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
1219}
1220
1221static __inline__ __m256i __DEFAULT_FN_ATTRS256
1222_mm256_mask_cvtph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
1223 return (__m256i)__builtin_ia32_vcvtph2qq256_mask((__v8hf)__A, (__v4di)__W,
1224 (__mmask8)__U);
1225}
1226
1227static __inline__ __m256i __DEFAULT_FN_ATTRS256
1228_mm256_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
1229 return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
1230 (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
1231}
1232
1233static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ph(__m128i __A) {
1234 return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
1235 (__v2du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1236}
1237
1238static __inline__ __m128h __DEFAULT_FN_ATTRS128
1239_mm_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1240 return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask((__v2du)__A, (__v8hf)__W,
1241 (__mmask8)__U);
1242}
1243
1244static __inline__ __m128h __DEFAULT_FN_ATTRS128
1245_mm_maskz_cvtepu64_ph(__mmask8 __U, __m128i __A) {
1246 return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
1247 (__v2du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1248}
1249
1250static __inline__ __m128h __DEFAULT_FN_ATTRS256
1251_mm256_cvtepu64_ph(__m256i __A) {
1252 return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
1253 (__v4du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1254}
1255
1256static __inline__ __m128h __DEFAULT_FN_ATTRS256
1257_mm256_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1258 return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask((__v4du)__A, (__v8hf)__W,
1259 (__mmask8)__U);
1260}
1261
1262static __inline__ __m128h __DEFAULT_FN_ATTRS256
1263_mm256_maskz_cvtepu64_ph(__mmask8 __U, __m256i __A) {
1264 return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
1265 (__v4du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1266}
1267
1268static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu64(__m128h __A) {
1269 return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
1270 (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
1271}
1272
1273static __inline__ __m128i __DEFAULT_FN_ATTRS128
1274_mm_mask_cvtph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
1275 return (__m128i)__builtin_ia32_vcvtph2uqq128_mask((__v8hf)__A, (__v2du)__W,
1276 (__mmask8)__U);
1277}
1278
1279static __inline__ __m128i __DEFAULT_FN_ATTRS128
1280_mm_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
1281 return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
1282 (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
1283}
1284
1285static __inline__ __m256i __DEFAULT_FN_ATTRS256
1286_mm256_cvtph_epu64(__m128h __A) {
1287 return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
1288 (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
1289}
1290
1291static __inline__ __m256i __DEFAULT_FN_ATTRS256
1292_mm256_mask_cvtph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
1293 return (__m256i)__builtin_ia32_vcvtph2uqq256_mask((__v8hf)__A, (__v4du)__W,
1294 (__mmask8)__U);
1295}
1296
1297static __inline__ __m256i __DEFAULT_FN_ATTRS256
1298_mm256_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
1299 return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
1300 (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
1301}
1302
1303static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi64(__m128h __A) {
1304 return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
1305 (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
1306}
1307
1308static __inline__ __m128i __DEFAULT_FN_ATTRS128
1309_mm_mask_cvttph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
1310 return (__m128i)__builtin_ia32_vcvttph2qq128_mask((__v8hf)__A, (__v2di)__W,
1311 (__mmask8)__U);
1312}
1313
1314static __inline__ __m128i __DEFAULT_FN_ATTRS128
1315_mm_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
1316 return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
1317 (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
1318}
1319
1320static __inline__ __m256i __DEFAULT_FN_ATTRS256
1321_mm256_cvttph_epi64(__m128h __A) {
1322 return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
1323 (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
1324}
1325
1326static __inline__ __m256i __DEFAULT_FN_ATTRS256
1327_mm256_mask_cvttph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
1328 return (__m256i)__builtin_ia32_vcvttph2qq256_mask((__v8hf)__A, (__v4di)__W,
1329 (__mmask8)__U);
1330}
1331
1332static __inline__ __m256i __DEFAULT_FN_ATTRS256
1333_mm256_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
1334 return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
1335 (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
1336}
1337
1338static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu64(__m128h __A) {
1339 return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
1340 (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
1341}
1342
1343static __inline__ __m128i __DEFAULT_FN_ATTRS128
1344_mm_mask_cvttph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
1345 return (__m128i)__builtin_ia32_vcvttph2uqq128_mask((__v8hf)__A, (__v2du)__W,
1346 (__mmask8)__U);
1347}
1348
1349static __inline__ __m128i __DEFAULT_FN_ATTRS128
1350_mm_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
1351 return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
1352 (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
1353}
1354
1355static __inline__ __m256i __DEFAULT_FN_ATTRS256
1356_mm256_cvttph_epu64(__m128h __A) {
1357 return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
1358 (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
1359}
1360
1361static __inline__ __m256i __DEFAULT_FN_ATTRS256
1362_mm256_mask_cvttph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
1363 return (__m256i)__builtin_ia32_vcvttph2uqq256_mask((__v8hf)__A, (__v4du)__W,
1364 (__mmask8)__U);
1365}
1366
1367static __inline__ __m256i __DEFAULT_FN_ATTRS256
1368_mm256_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
1369 return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
1370 (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
1371}
1372
1373static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtxph_ps(__m128h __A) {
1374 return (__m128)__builtin_ia32_vcvtph2psx128_mask(
1375 (__v8hf)__A, (__v4sf)_mm_undefined_ps(), (__mmask8)-1);
1376}
1377
1378static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtxph_ps(__m128 __W,
1379 __mmask8 __U,
1380 __m128h __A) {
1381 return (__m128)__builtin_ia32_vcvtph2psx128_mask((__v8hf)__A, (__v4sf)__W,
1382 (__mmask8)__U);
1383}
1384
1385static __inline__ __m128 __DEFAULT_FN_ATTRS128
1386_mm_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) {
1387 return (__m128)__builtin_ia32_vcvtph2psx128_mask(
1388 (__v8hf)__A, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);
1389}
1390
1391static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtxph_ps(__m128h __A) {
1392 return (__m256)__builtin_ia32_vcvtph2psx256_mask(
1393 (__v8hf)__A, (__v8sf)_mm256_undefined_ps(), (__mmask8)-1);
1394}
1395
1396static __inline__ __m256 __DEFAULT_FN_ATTRS256
1397_mm256_mask_cvtxph_ps(__m256 __W, __mmask8 __U, __m128h __A) {
1398 return (__m256)__builtin_ia32_vcvtph2psx256_mask((__v8hf)__A, (__v8sf)__W,
1399 (__mmask8)__U);
1400}
1401
1402static __inline__ __m256 __DEFAULT_FN_ATTRS256
1403_mm256_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) {
1404 return (__m256)__builtin_ia32_vcvtph2psx256_mask(
1405 (__v8hf)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)__U);
1406}
1407
1408static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtxps_ph(__m128 __A) {
1409 return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
1410 (__v4sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1411}
1412
1413static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtxps_ph(__m128h __W,
1414 __mmask8 __U,
1415 __m128 __A) {
1416 return (__m128h)__builtin_ia32_vcvtps2phx128_mask((__v4sf)__A, (__v8hf)__W,
1417 (__mmask8)__U);
1418}
1419
1420static __inline__ __m128h __DEFAULT_FN_ATTRS128
1421_mm_maskz_cvtxps_ph(__mmask8 __U, __m128 __A) {
1422 return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
1423 (__v4sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1424}
1425
1426static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtxps_ph(__m256 __A) {
1427 return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
1428 (__v8sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1429}
1430
1431static __inline__ __m128h __DEFAULT_FN_ATTRS256
1432_mm256_mask_cvtxps_ph(__m128h __W, __mmask8 __U, __m256 __A) {
1433 return (__m128h)__builtin_ia32_vcvtps2phx256_mask((__v8sf)__A, (__v8hf)__W,
1434 (__mmask8)__U);
1435}
1436
1437static __inline__ __m128h __DEFAULT_FN_ATTRS256
1438_mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A) {
1439 return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
1440 (__v8sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1441}
1442
1443static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_ph(__m128h __A,
1444 __m128h __B,
1445 __m128h __C) {
1446 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B,
1447 (__v8hf)__C);
1448}
1449
1450static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A,
1451 __mmask8 __U,
1452 __m128h __B,
1453 __m128h __C) {
1454 return (__m128h)__builtin_ia32_selectph_128(
1455 (__mmask8)__U,
1456 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1457 (__v8hf)__A);
1458}
1459
1460static __inline__ __m128h __DEFAULT_FN_ATTRS128
1461_mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1462 return (__m128h)__builtin_ia32_selectph_128(
1463 (__mmask8)__U,
1464 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1465 (__v8hf)__C);
1466}
1467
1468static __inline__ __m128h __DEFAULT_FN_ATTRS128
1469_mm_maskz_fmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1470 return (__m128h)__builtin_ia32_selectph_128(
1471 (__mmask8)__U,
1472 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1473 (__v8hf)_mm_setzero_ph());
1474}
1475
1476static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_ph(__m128h __A,
1477 __m128h __B,
1478 __m128h __C) {
1479 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B,
1480 -(__v8hf)__C);
1481}
1482
1483static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ph(__m128h __A,
1484 __mmask8 __U,
1485 __m128h __B,
1486 __m128h __C) {
1487 return (__m128h)__builtin_ia32_selectph_128(
1488 (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1489 (__v8hf)__A);
1490}
1491
1492static __inline__ __m128h __DEFAULT_FN_ATTRS128
1493_mm_maskz_fmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1494 return (__m128h)__builtin_ia32_selectph_128(
1495 (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1496 (__v8hf)_mm_setzero_ph());
1497}
1498
1499static __inline__ __m128h __DEFAULT_FN_ATTRS128
1500_mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1501 return (__m128h)__builtin_ia32_selectph_128(
1502 (__mmask8)__U,
1503 __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1504 (__v8hf)__C);
1505}
1506
1507static __inline__ __m128h __DEFAULT_FN_ATTRS128
1508_mm_maskz_fnmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1509 return (__m128h)__builtin_ia32_selectph_128(
1510 (__mmask8)__U,
1511 __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1512 (__v8hf)_mm_setzero_ph());
1513}
1514
1515static __inline__ __m128h __DEFAULT_FN_ATTRS128
1516_mm_maskz_fnmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1517 return (__m128h)__builtin_ia32_selectph_128(
1518 (__mmask8)__U,
1519 __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1520 (__v8hf)_mm_setzero_ph());
1521}
1522
1523static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_ph(__m256h __A,
1524 __m256h __B,
1525 __m256h __C) {
1526 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B,
1527 (__v16hf)__C);
1528}
1529
1530static __inline__ __m256h __DEFAULT_FN_ATTRS256
1531_mm256_mask_fmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1532 return (__m256h)__builtin_ia32_selectph_256(
1533 (__mmask16)__U,
1534 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1535 (__v16hf)__A);
1536}
1537
1538static __inline__ __m256h __DEFAULT_FN_ATTRS256
1539_mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1540 return (__m256h)__builtin_ia32_selectph_256(
1541 (__mmask16)__U,
1542 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1543 (__v16hf)__C);
1544}
1545
1546static __inline__ __m256h __DEFAULT_FN_ATTRS256
1547_mm256_maskz_fmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1548 return (__m256h)__builtin_ia32_selectph_256(
1549 (__mmask16)__U,
1550 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1551 (__v16hf)_mm256_setzero_ph());
1552}
1553
1554static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsub_ph(__m256h __A,
1555 __m256h __B,
1556 __m256h __C) {
1557 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B,
1558 -(__v16hf)__C);
1559}
1560
1561static __inline__ __m256h __DEFAULT_FN_ATTRS256
1562_mm256_mask_fmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1563 return (__m256h)__builtin_ia32_selectph_256(
1564 (__mmask16)__U,
1565 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1566 (__v16hf)__A);
1567}
1568
1569static __inline__ __m256h __DEFAULT_FN_ATTRS256
1570_mm256_maskz_fmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1571 return (__m256h)__builtin_ia32_selectph_256(
1572 (__mmask16)__U,
1573 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1574 (__v16hf)_mm256_setzero_ph());
1575}
1576
1577static __inline__ __m256h __DEFAULT_FN_ATTRS256
1578_mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1579 return (__m256h)__builtin_ia32_selectph_256(
1580 (__mmask16)__U,
1581 __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1582 (__v16hf)__C);
1583}
1584
1585static __inline__ __m256h __DEFAULT_FN_ATTRS256
1586_mm256_maskz_fnmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1587 return (__m256h)__builtin_ia32_selectph_256(
1588 (__mmask16)__U,
1589 __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1590 (__v16hf)_mm256_setzero_ph());
1591}
1592
1593static __inline__ __m256h __DEFAULT_FN_ATTRS256
1594_mm256_maskz_fnmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1595 return (__m256h)__builtin_ia32_selectph_256(
1596 (__mmask16)__U,
1597 __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1598 (__v16hf)_mm256_setzero_ph());
1599}
1600
1601static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmaddsub_ph(__m128h __A,
1602 __m128h __B,
1603 __m128h __C) {
1604 return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
1605 (__v8hf)__C);
1606}
1607
1608static __inline__ __m128h __DEFAULT_FN_ATTRS128
1609_mm_mask_fmaddsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1610 return (__m128h)__builtin_ia32_selectph_128(
1611 (__mmask8)__U,
1612 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1613 (__v8hf)__A);
1614}
1615
1616static __inline__ __m128h __DEFAULT_FN_ATTRS128
1617_mm_mask3_fmaddsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1618 return (__m128h)__builtin_ia32_selectph_128(
1619 (__mmask8)__U,
1620 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1621 (__v8hf)__C);
1622}
1623
1624static __inline__ __m128h __DEFAULT_FN_ATTRS128
1625_mm_maskz_fmaddsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1626 return (__m128h)__builtin_ia32_selectph_128(
1627 (__mmask8)__U,
1628 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1629 (__v8hf)_mm_setzero_ph());
1630}
1631
1632static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsubadd_ph(__m128h __A,
1633 __m128h __B,
1634 __m128h __C) {
1635 return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
1636 -(__v8hf)__C);
1637}
1638
1639static __inline__ __m128h __DEFAULT_FN_ATTRS128
1640_mm_mask_fmsubadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1641 return (__m128h)__builtin_ia32_selectph_128(
1642 (__mmask8)__U,
1643 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1644 (__v8hf)__A);
1645}
1646
1647static __inline__ __m128h __DEFAULT_FN_ATTRS128
1648_mm_maskz_fmsubadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1649 return (__m128h)__builtin_ia32_selectph_128(
1650 (__mmask8)__U,
1651 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1652 (__v8hf)_mm_setzero_ph());
1653}
1654
1655static __inline__ __m256h __DEFAULT_FN_ATTRS256
1656_mm256_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C) {
1657 return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
1658 (__v16hf)__C);
1659}
1660
1661static __inline__ __m256h __DEFAULT_FN_ATTRS256
1662_mm256_mask_fmaddsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1663 return (__m256h)__builtin_ia32_selectph_256(
1664 (__mmask16)__U,
1665 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1666 (__v16hf)__A);
1667}
1668
1669static __inline__ __m256h __DEFAULT_FN_ATTRS256
1670_mm256_mask3_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1671 return (__m256h)__builtin_ia32_selectph_256(
1672 (__mmask16)__U,
1673 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1674 (__v16hf)__C);
1675}
1676
1677static __inline__ __m256h __DEFAULT_FN_ATTRS256
1678_mm256_maskz_fmaddsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1679 return (__m256h)__builtin_ia32_selectph_256(
1680 (__mmask16)__U,
1681 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1682 (__v16hf)_mm256_setzero_ph());
1683}
1684
1685static __inline__ __m256h __DEFAULT_FN_ATTRS256
1686_mm256_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C) {
1687 return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
1688 -(__v16hf)__C);
1689}
1690
1691static __inline__ __m256h __DEFAULT_FN_ATTRS256
1692_mm256_mask_fmsubadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1693 return (__m256h)__builtin_ia32_selectph_256(
1694 (__mmask16)__U,
1695 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1696 (__v16hf)__A);
1697}
1698
1699static __inline__ __m256h __DEFAULT_FN_ATTRS256
1700_mm256_maskz_fmsubadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1701 return (__m256h)__builtin_ia32_selectph_256(
1702 (__mmask16)__U,
1703 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1704 (__v16hf)_mm256_setzero_ph());
1705}
1706
1707static __inline__ __m128h __DEFAULT_FN_ATTRS128
1708_mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1709 return (__m128h)__builtin_ia32_selectph_128(
1710 (__mmask8)__U,
1711 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1712 (__v8hf)__C);
1713}
1714
1715static __inline__ __m256h __DEFAULT_FN_ATTRS256
1716_mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1717 return (__m256h)__builtin_ia32_selectph_256(
1718 (__mmask16)__U,
1719 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1720 (__v16hf)__C);
1721}
1722
1723static __inline__ __m128h __DEFAULT_FN_ATTRS128
1724_mm_mask3_fmsubadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1725 return (__m128h)__builtin_ia32_selectph_128(
1726 (__mmask8)__U,
1727 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1728 (__v8hf)__C);
1729}
1730
1731static __inline__ __m256h __DEFAULT_FN_ATTRS256
1732_mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1733 return (__m256h)__builtin_ia32_selectph_256(
1734 (__mmask16)__U,
1735 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1736 (__v16hf)__C);
1737}
1738
1739static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_ph(__m128h __A,
1740 __m128h __B,
1741 __m128h __C) {
1742 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B,
1743 (__v8hf)__C);
1744}
1745
1746static __inline__ __m128h __DEFAULT_FN_ATTRS128
1747_mm_mask_fnmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1748 return (__m128h)__builtin_ia32_selectph_128(
1749 (__mmask8)__U,
1750 __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C),
1751 (__v8hf)__A);
1752}
1753
1754static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ph(__m256h __A,
1755 __m256h __B,
1756 __m256h __C) {
1757 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B,
1758 (__v16hf)__C);
1759}
1760
1761static __inline__ __m256h __DEFAULT_FN_ATTRS256
1762_mm256_mask_fnmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1763 return (__m256h)__builtin_ia32_selectph_256(
1764 (__mmask16)__U,
1765 __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C),
1766 (__v16hf)__A);
1767}
1768
1769static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_ph(__m128h __A,
1770 __m128h __B,
1771 __m128h __C) {
1772 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B,
1773 -(__v8hf)__C);
1774}
1775
1776static __inline__ __m128h __DEFAULT_FN_ATTRS128
1777_mm_mask_fnmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1778 return (__m128h)__builtin_ia32_selectph_128(
1779 (__mmask8)__U,
1780 __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
1781 (__v8hf)__A);
1782}
1783
1784static __inline__ __m128h __DEFAULT_FN_ATTRS128
1785_mm_mask3_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1786 return (__m128h)__builtin_ia32_selectph_128(
1787 (__mmask8)__U,
1788 __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
1789 (__v8hf)__C);
1790}
1791
1792static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ph(__m256h __A,
1793 __m256h __B,
1794 __m256h __C) {
1795 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B,
1796 -(__v16hf)__C);
1797}
1798
1799static __inline__ __m256h __DEFAULT_FN_ATTRS256
1800_mm256_mask_fnmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1801 return (__m256h)__builtin_ia32_selectph_256(
1802 (__mmask16)__U,
1803 __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
1804 (__v16hf)__A);
1805}
1806
1807static __inline__ __m256h __DEFAULT_FN_ATTRS256
1808_mm256_mask3_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1809 return (__m256h)__builtin_ia32_selectph_256(
1810 (__mmask16)__U,
1811 __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
1812 (__v16hf)__C);
1813}
1814
1815static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_pch(__m128h __A,
1816 __m128h __B) {
1817 return (__m128h)__builtin_ia32_vfcmulcph128_mask(
1818 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1);
1819}
1820
1821static __inline__ __m128h __DEFAULT_FN_ATTRS128
1822_mm_mask_fcmul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
1823 return (__m128h)__builtin_ia32_vfcmulcph128_mask((__v4sf)__A, (__v4sf)__B,
1824 (__v4sf)__W, (__mmask8)__U);
1825}
1826
1827static __inline__ __m128h __DEFAULT_FN_ATTRS128
1828_mm_maskz_fcmul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
1829 return (__m128h)__builtin_ia32_vfcmulcph128_mask(
1830 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U);
1831}
1832
1833static __inline__ __m256h __DEFAULT_FN_ATTRS128 _mm256_fcmul_pch(__m256h __A,
1834 __m256h __B) {
1835 return (__m256h)__builtin_ia32_vfcmulcph256_mask(
1836 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1);
1837}
1838
1839static __inline__ __m256h __DEFAULT_FN_ATTRS256
1840_mm256_mask_fcmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
1841 return (__m256h)__builtin_ia32_vfcmulcph256_mask((__v8sf)__A, (__v8sf)__B,
1842 (__v8sf)__W, (__mmask8)__U);
1843}
1844
1845static __inline__ __m256h __DEFAULT_FN_ATTRS256
1846_mm256_maskz_fcmul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
1847 return (__m256h)__builtin_ia32_vfcmulcph256_mask(
1848 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U);
1849}
1850
1851static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmadd_pch(__m128h __A,
1852 __m128h __B,
1853 __m128h __C) {
1854 return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1855 (__v4sf)__C, (__mmask8)-1);
1856}
1857
1858static __inline__ __m128h __DEFAULT_FN_ATTRS128
1859_mm_mask_fcmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1860 return (__m128h)__builtin_ia32_selectps_128(
1861 __U,
1862 __builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)(__m128h)__B,
1863 (__v4sf)__C, (__mmask8)__U),
1864 (__v4sf)__A);
1865}
1866
1867static __inline__ __m128h __DEFAULT_FN_ATTRS128
1868_mm_mask3_fcmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1869 return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1870 (__v4sf)__C, (__mmask8)__U);
1871}
1872
1873static __inline__ __m128h __DEFAULT_FN_ATTRS128
1874_mm_maskz_fcmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1875 return (__m128h)__builtin_ia32_vfcmaddcph128_maskz(
1876 (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U);
1877}
1878
1879static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fcmadd_pch(__m256h __A,
1880 __m256h __B,
1881 __m256h __C) {
1882 return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1883 (__v8sf)__C, (__mmask8)-1);
1884}
1885
1886static __inline__ __m256h __DEFAULT_FN_ATTRS256
1887_mm256_mask_fcmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) {
1888 return (__m256h)__builtin_ia32_selectps_256(
1889 __U,
1890 __builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C,
1891 (__mmask8)__U),
1892 (__v8sf)__A);
1893}
1894
1895static __inline__ __m256h __DEFAULT_FN_ATTRS256
1896_mm256_mask3_fcmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) {
1897 return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1898 (__v8sf)__C, (__mmask8)__U);
1899}
1900
1901static __inline__ __m256h __DEFAULT_FN_ATTRS256
1902_mm256_maskz_fcmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
1903 return (__m256h)__builtin_ia32_vfcmaddcph256_maskz(
1904 (__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U);
1905}
1906
1907static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmul_pch(__m128h __A,
1908 __m128h __B) {
1909 return (__m128h)__builtin_ia32_vfmulcph128_mask(
1910 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1);
1911}
1912
1913static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmul_pch(__m128h __W,
1914 __mmask8 __U,
1915 __m128h __A,
1916 __m128h __B) {
1917 return (__m128h)__builtin_ia32_vfmulcph128_mask((__v4sf)__A, (__v4sf)__B,
1918 (__v4sf)__W, (__mmask8)__U);
1919}
1920
1921static __inline__ __m128h __DEFAULT_FN_ATTRS128
1922_mm_maskz_fmul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
1923 return (__m128h)__builtin_ia32_vfmulcph128_mask(
1924 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U);
1925}
1926
1927static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmul_pch(__m256h __A,
1928 __m256h __B) {
1929 return (__m256h)__builtin_ia32_vfmulcph256_mask(
1930 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1);
1931}
1932
1933static __inline__ __m256h __DEFAULT_FN_ATTRS256
1934_mm256_mask_fmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
1935 return (__m256h)__builtin_ia32_vfmulcph256_mask((__v8sf)__A, (__v8sf)__B,
1936 (__v8sf)__W, (__mmask8)__U);
1937}
1938
1939static __inline__ __m256h __DEFAULT_FN_ATTRS256
1940_mm256_maskz_fmul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
1941 return (__m256h)__builtin_ia32_vfmulcph256_mask(
1942 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U);
1943}
1944
1945static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_pch(__m128h __A,
1946 __m128h __B,
1947 __m128h __C) {
1948 return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1949 (__v4sf)__C, (__mmask8)-1);
1950}
1951
1952static __inline__ __m128h __DEFAULT_FN_ATTRS128
1953_mm_mask_fmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1954 return (__m128h)__builtin_ia32_selectps_128(
1955 __U,
1956 __builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C,
1957 (__mmask8)__U),
1958 (__v4sf)__A);
1959}
1960
1961static __inline__ __m128h __DEFAULT_FN_ATTRS128
1962_mm_mask3_fmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1963 return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1964 (__v4sf)__C, (__mmask8)__U);
1965}
1966
1967static __inline__ __m128h __DEFAULT_FN_ATTRS128
1968_mm_maskz_fmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1969 return (__m128h)__builtin_ia32_vfmaddcph128_maskz((__v4sf)__A, (__v4sf)__B,
1970 (__v4sf)__C, (__mmask8)__U);
1971}
1972
1973static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_pch(__m256h __A,
1974 __m256h __B,
1975 __m256h __C) {
1976 return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1977 (__v8sf)__C, (__mmask8)-1);
1978}
1979
1980static __inline__ __m256h __DEFAULT_FN_ATTRS256
1981_mm256_mask_fmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) {
1982 return (__m256h)__builtin_ia32_selectps_256(
1983 __U,
1984 __builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C,
1985 (__mmask8)__U),
1986 (__v8sf)__A);
1987}
1988
1989static __inline__ __m256h __DEFAULT_FN_ATTRS256
1990_mm256_mask3_fmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) {
1991 return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1992 (__v8sf)__C, (__mmask8)__U);
1993}
1994
1995static __inline__ __m256h __DEFAULT_FN_ATTRS256
1996_mm256_maskz_fmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
1997 return (__m256h)__builtin_ia32_vfmaddcph256_maskz((__v8sf)__A, (__v8sf)__B,
1998 (__v8sf)__C, (__mmask8)__U);
1999}
2000
2001static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
2002_mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) {
2003 return (__m128h)__builtin_ia32_selectph_128((__mmask8)__U, (__v8hf)__W,
2004 (__v8hf)__A);
2005}
2006
2007static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
2008_mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) {
2009 return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)__W,
2010 (__v16hf)__A);
2011}
2012
2013static __inline__ __m128h __DEFAULT_FN_ATTRS128
2014_mm_permutex2var_ph(__m128h __A, __m128i __I, __m128h __B) {
2015 return (__m128h)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
2016 (__v8hi)__B);
2017}
2018
2019static __inline__ __m256h __DEFAULT_FN_ATTRS256
2020_mm256_permutex2var_ph(__m256h __A, __m256i __I, __m256h __B) {
2021 return (__m256h)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
2022 (__v16hi)__B);
2023}
2024
2025static __inline__ __m128h __DEFAULT_FN_ATTRS128
2026_mm_permutexvar_ph(__m128i __A, __m128h __B) {
2027 return (__m128h)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A);
2028}
2029
2030static __inline__ __m256h __DEFAULT_FN_ATTRS256
2031_mm256_permutexvar_ph(__m256i __A, __m256h __B) {
2032 return (__m256h)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A);
2033}
2034
2035static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2036_mm256_reduce_add_ph(__m256h __W) {
2037 return __builtin_ia32_reduce_fadd_ph256(-0.0f16, __W);
2038}
2039
2040static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2041_mm256_reduce_mul_ph(__m256h __W) {
2042 return __builtin_ia32_reduce_fmul_ph256(1.0f16, __W);
2043}
2044
2045static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2046_mm256_reduce_max_ph(__m256h __V) {
2047 return __builtin_ia32_reduce_fmax_ph256(__V);
2048}
2049
2050static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2051_mm256_reduce_min_ph(__m256h __V) {
2052 return __builtin_ia32_reduce_fmin_ph256(__V);
2053}
2054
2055static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2056_mm_reduce_add_ph(__m128h __W) {
2057 return __builtin_ia32_reduce_fadd_ph128(-0.0f16, __W);
2058}
2059
2060static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2061_mm_reduce_mul_ph(__m128h __W) {
2062 return __builtin_ia32_reduce_fmul_ph128(1.0f16, __W);
2063}
2064
2065static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2066_mm_reduce_max_ph(__m128h __V) {
2067 return __builtin_ia32_reduce_fmax_ph128(__V);
2068}
2069
2070static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2071_mm_reduce_min_ph(__m128h __V) {
2072 return __builtin_ia32_reduce_fmin_ph128(__V);
2073}
2074
2075// intrinsics below are alias for f*mul_*ch
2076#define _mm_mul_pch(A, B) _mm_fmul_pch(A, B)
2077#define _mm_mask_mul_pch(W, U, A, B) _mm_mask_fmul_pch(W, U, A, B)
2078#define _mm_maskz_mul_pch(U, A, B) _mm_maskz_fmul_pch(U, A, B)
2079#define _mm256_mul_pch(A, B) _mm256_fmul_pch(A, B)
2080#define _mm256_mask_mul_pch(W, U, A, B) _mm256_mask_fmul_pch(W, U, A, B)
2081#define _mm256_maskz_mul_pch(U, A, B) _mm256_maskz_fmul_pch(U, A, B)
2082
2083#define _mm_cmul_pch(A, B) _mm_fcmul_pch(A, B)
2084#define _mm_mask_cmul_pch(W, U, A, B) _mm_mask_fcmul_pch(W, U, A, B)
2085#define _mm_maskz_cmul_pch(U, A, B) _mm_maskz_fcmul_pch(U, A, B)
2086#define _mm256_cmul_pch(A, B) _mm256_fcmul_pch(A, B)
2087#define _mm256_mask_cmul_pch(W, U, A, B) _mm256_mask_fcmul_pch(W, U, A, B)
2088#define _mm256_maskz_cmul_pch(U, A, B) _mm256_maskz_fcmul_pch(U, A, B)
2089
2090#undef __DEFAULT_FN_ATTRS128
2091#undef __DEFAULT_FN_ATTRS256
2092#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
2093#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
2094
2095#endif
2096#endif
__device__ _Float16
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
Definition avx2intrin.h:30
#define __DEFAULT_FN_ATTRS256_CONSTEXPR
Definition avx2intrin.h:29
unsigned char __mmask8
unsigned short __mmask16
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_and_epi32(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_and_epi32(__m128i __a, __m128i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
Definition avxintrin.h:3641
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_undefined_pd(void)
Create a 256-bit vector of [4 x double] with undefined values.
Definition avxintrin.h:3628
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
Definition avxintrin.h:3654
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_ps(float __w)
Constructs a 256-bit floating-point vector of [8 x float], with each of the eight single-precision fl...
Definition avxintrin.h:4223
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition avxintrin.h:4325
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi32(int __i)
Constructs a 256-bit integer vector of [8 x i32], with each of the 32-bit integral vector elements se...
Definition avxintrin.h:4242
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_xor_ps(__m256 __a, __m256 __b)
Performs a bitwise XOR of two 256-bit vectors of [8 x float].
Definition avxintrin.h:674
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition avxintrin.h:4313
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition avxintrin.h:4337
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition emmintrin.h:3878
static __inline__ void int __a
Definition emmintrin.h:4077
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition emmintrin.h:3493
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] with unspecified content.
Definition emmintrin.h:1765
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition emmintrin.h:1867
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value.
Definition emmintrin.h:3709
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_undefined_ps(void)
Create a 128-bit vector of [4 x float] with undefined values.
Definition xmmintrin.h:1899
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_xor_ps(__m128 __a, __m128 __b)
Performs a bitwise exclusive OR of two 128-bit vectors of [4 x float].
Definition xmmintrin.h:489
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_ps(float __w)
Constructs a 128-bit floating-point vector of [4 x float], with each of the four single-precision flo...
Definition xmmintrin.h:1936
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition xmmintrin.h:2021