clang 23.0.0git
avx512vlfp16intrin.h
Go to the documentation of this file.
1/*===---------- avx512vlfp16intrin.h - AVX512-FP16 intrinsics --------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error \
11 "Never use <avx512vlfp16intrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifdef __SSE2__
15
16#ifndef __AVX512VLFP16INTRIN_H
17#define __AVX512VLFP16INTRIN_H
18
19/* Define the default attributes for the functions in this file. */
20#define __DEFAULT_FN_ATTRS256 \
21 __attribute__((__always_inline__, __nodebug__, \
22 __target__("avx512fp16,avx512vl"), \
23 __min_vector_width__(256)))
24#define __DEFAULT_FN_ATTRS128 \
25 __attribute__((__always_inline__, __nodebug__, \
26 __target__("avx512fp16,avx512vl"), \
27 __min_vector_width__(128)))
28
29#if defined(__cplusplus) && (__cplusplus >= 201103L)
30#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
31#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
32#else
33#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
34#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
35#endif
36
38_mm_cvtsh_h(__m128h __a) {
39 return __a[0];
40}
41
43_mm256_cvtsh_h(__m256h __a) {
44 return __a[0];
45}
46
47static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
48_mm_set_sh(_Float16 __h) {
49 return __extension__(__m128h){__h, 0, 0, 0, 0, 0, 0, 0};
50}
51
52static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
53_mm_set1_ph(_Float16 __h) {
54 return (__m128h)(__v8hf){__h, __h, __h, __h, __h, __h, __h, __h};
55}
56
57static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
58_mm256_set1_ph(_Float16 __h) {
59 return (__m256h)(__v16hf){__h, __h, __h, __h, __h, __h, __h, __h,
60 __h, __h, __h, __h, __h, __h, __h, __h};
61}
62
63static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
64_mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
65 _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) {
66 return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1};
67}
68
69static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
70_mm256_set1_pch(_Float16 _Complex h) {
71 return (__m256h)_mm256_set1_ps(__builtin_bit_cast(float, h));
72}
73
74static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
75_mm_set1_pch(_Float16 _Complex h) {
76 return (__m128h)_mm_set1_ps(__builtin_bit_cast(float, h));
77}
78
79static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
80_mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
81 _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
82 _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12,
83 _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16) {
84 return (__m256h)(__v16hf){__h16, __h15, __h14, __h13, __h12, __h11,
85 __h10, __h9, __h8, __h7, __h6, __h5,
86 __h4, __h3, __h2, __h1};
87}
88
89static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
90_mm_setr_ph(_Float16 e0, _Float16 e1, _Float16 e2, _Float16 e3, _Float16 e4,
91 _Float16 e5, _Float16 e6, _Float16 e7) {
92 return _mm_set_ph(e7, e6, e5, e4, e3, e2, e1, e0);
93}
94
95static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
96_mm256_setr_ph(_Float16 e0, _Float16 e1, _Float16 e2, _Float16 e3, _Float16 e4,
97 _Float16 e5, _Float16 e6, _Float16 e7, _Float16 e8, _Float16 e9,
98 _Float16 e10, _Float16 e11, _Float16 e12, _Float16 e13,
99 _Float16 e14, _Float16 e15) {
100 return _mm256_set_ph(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3,
101 e2, e1, e0);
102}
103
104static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_add_ph(__m256h __A,
105 __m256h __B) {
106 return (__m256h)((__v16hf)__A + (__v16hf)__B);
107}
108
109static __inline__ __m256h __DEFAULT_FN_ATTRS256
110_mm256_mask_add_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
111 return (__m256h)__builtin_ia32_selectph_256(
112 __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)__W);
113}
114
115static __inline__ __m256h __DEFAULT_FN_ATTRS256
116_mm256_maskz_add_ph(__mmask16 __U, __m256h __A, __m256h __B) {
117 return (__m256h)__builtin_ia32_selectph_256(
118 __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
119}
120
121static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_ph(__m128h __A,
122 __m128h __B) {
123 return (__m128h)((__v8hf)__A + (__v8hf)__B);
124}
125
126static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_ph(__m128h __W,
127 __mmask8 __U,
128 __m128h __A,
129 __m128h __B) {
130 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B),
131 (__v8hf)__W);
132}
133
134static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_ph(__mmask8 __U,
135 __m128h __A,
136 __m128h __B) {
137 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B),
138 (__v8hf)_mm_setzero_ph());
139}
140
141static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_sub_ph(__m256h __A,
142 __m256h __B) {
143 return (__m256h)((__v16hf)__A - (__v16hf)__B);
144}
145
146static __inline__ __m256h __DEFAULT_FN_ATTRS256
147_mm256_mask_sub_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
148 return (__m256h)__builtin_ia32_selectph_256(
149 __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)__W);
150}
151
152static __inline__ __m256h __DEFAULT_FN_ATTRS256
153_mm256_maskz_sub_ph(__mmask16 __U, __m256h __A, __m256h __B) {
154 return (__m256h)__builtin_ia32_selectph_256(
155 __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
156}
157
158static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_ph(__m128h __A,
159 __m128h __B) {
160 return (__m128h)((__v8hf)__A - (__v8hf)__B);
161}
162
163static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_ph(__m128h __W,
164 __mmask8 __U,
165 __m128h __A,
166 __m128h __B) {
167 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B),
168 (__v8hf)__W);
169}
170
171static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ph(__mmask8 __U,
172 __m128h __A,
173 __m128h __B) {
174 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B),
175 (__v8hf)_mm_setzero_ph());
176}
177
178static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mul_ph(__m256h __A,
179 __m256h __B) {
180 return (__m256h)((__v16hf)__A * (__v16hf)__B);
181}
182
183static __inline__ __m256h __DEFAULT_FN_ATTRS256
184_mm256_mask_mul_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
185 return (__m256h)__builtin_ia32_selectph_256(
186 __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)__W);
187}
188
189static __inline__ __m256h __DEFAULT_FN_ATTRS256
190_mm256_maskz_mul_ph(__mmask16 __U, __m256h __A, __m256h __B) {
191 return (__m256h)__builtin_ia32_selectph_256(
192 __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
193}
194
195static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_ph(__m128h __A,
196 __m128h __B) {
197 return (__m128h)((__v8hf)__A * (__v8hf)__B);
198}
199
200static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_ph(__m128h __W,
201 __mmask8 __U,
202 __m128h __A,
203 __m128h __B) {
204 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B),
205 (__v8hf)__W);
206}
207
208static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ph(__mmask8 __U,
209 __m128h __A,
210 __m128h __B) {
211 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B),
212 (__v8hf)_mm_setzero_ph());
213}
214
215static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_div_ph(__m256h __A,
216 __m256h __B) {
217 return (__m256h)((__v16hf)__A / (__v16hf)__B);
218}
219
220static __inline__ __m256h __DEFAULT_FN_ATTRS256
221_mm256_mask_div_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
222 return (__m256h)__builtin_ia32_selectph_256(
223 __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)__W);
224}
225
226static __inline__ __m256h __DEFAULT_FN_ATTRS256
227_mm256_maskz_div_ph(__mmask16 __U, __m256h __A, __m256h __B) {
228 return (__m256h)__builtin_ia32_selectph_256(
229 __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
230}
231
232static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_ph(__m128h __A,
233 __m128h __B) {
234 return (__m128h)((__v8hf)__A / (__v8hf)__B);
235}
236
237static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_ph(__m128h __W,
238 __mmask8 __U,
239 __m128h __A,
240 __m128h __B) {
241 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B),
242 (__v8hf)__W);
243}
244
245static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_ph(__mmask8 __U,
246 __m128h __A,
247 __m128h __B) {
248 return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B),
249 (__v8hf)_mm_setzero_ph());
250}
251
252static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
253_mm256_min_ph(__m256h __A, __m256h __B) {
254 return (__m256h)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B);
255}
256
257static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
258_mm256_mask_min_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
259 return (__m256h)__builtin_ia32_selectph_256(
260 (__mmask16)__U,
261 (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B),
262 (__v16hf)__W);
263}
264
265static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
266_mm256_maskz_min_ph(__mmask16 __U, __m256h __A, __m256h __B) {
267 return (__m256h)__builtin_ia32_selectph_256(
268 (__mmask16)__U,
269 (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B),
270 (__v16hf)_mm256_setzero_ph());
271}
272
273static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
274_mm_min_ph(__m128h __A, __m128h __B) {
275 return (__m128h)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B);
276}
277
278static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
279_mm_mask_min_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
280 return (__m128h)__builtin_ia32_selectph_128(
281 (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
282 (__v8hf)__W);
283}
284
285static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
286_mm_maskz_min_ph(__mmask8 __U, __m128h __A, __m128h __B) {
287 return (__m128h)__builtin_ia32_selectph_128(
288 (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
289 (__v8hf)_mm_setzero_ph());
290}
291
292static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
293_mm256_max_ph(__m256h __A, __m256h __B) {
294 return (__m256h)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B);
295}
296
297static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
298_mm256_mask_max_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
299 return (__m256h)__builtin_ia32_selectph_256(
300 (__mmask16)__U,
301 (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B),
302 (__v16hf)__W);
303}
304
305static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
306_mm256_maskz_max_ph(__mmask16 __U, __m256h __A, __m256h __B) {
307 return (__m256h)__builtin_ia32_selectph_256(
308 (__mmask16)__U,
309 (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B),
310 (__v16hf)_mm256_setzero_ph());
311}
312
313static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
314_mm_max_ph(__m128h __A, __m128h __B) {
315 return (__m128h)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B);
316}
317
318static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
319_mm_mask_max_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
320 return (__m128h)__builtin_ia32_selectph_128(
321 (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
322 (__v8hf)__W);
323}
324
325static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
326_mm_maskz_max_ph(__mmask8 __U, __m128h __A, __m128h __B) {
327 return (__m128h)__builtin_ia32_selectph_128(
328 (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
329 (__v8hf)_mm_setzero_ph());
330}
331
332static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
333_mm256_abs_ph(__m256h __A) {
334 return (__m256h)_mm256_and_epi32(_mm256_set1_epi32(0x7FFF7FFF), (__m256i)__A);
335}
336
337static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
338_mm_abs_ph(__m128h __A) {
339 return (__m128h)_mm_and_epi32(_mm_set1_epi32(0x7FFF7FFF), (__m128i)__A);
340}
341
342static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_conj_pch(__m256h __A) {
343 return (__m256h)_mm256_xor_ps((__m256)__A, _mm256_set1_ps(-0.0f));
344}
345
346static __inline__ __m256h __DEFAULT_FN_ATTRS256
347_mm256_mask_conj_pch(__m256h __W, __mmask8 __U, __m256h __A) {
348 return (__m256h)__builtin_ia32_selectps_256(
349 (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)__W);
350}
351
352static __inline__ __m256h __DEFAULT_FN_ATTRS256
353_mm256_maskz_conj_pch(__mmask8 __U, __m256h __A) {
354 return (__m256h)__builtin_ia32_selectps_256(
355 (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)_mm256_setzero_ps());
356}
357
358static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_conj_pch(__m128h __A) {
359 return (__m128h)_mm_xor_ps((__m128)__A, _mm_set1_ps(-0.0f));
360}
361
362static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_conj_pch(__m128h __W,
363 __mmask8 __U,
364 __m128h __A) {
365 return (__m128h)__builtin_ia32_selectps_128(
366 (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)__W);
367}
368
369static __inline__ __m128h __DEFAULT_FN_ATTRS128
370_mm_maskz_conj_pch(__mmask8 __U, __m128h __A) {
371 return (__m128h)__builtin_ia32_selectps_128(
372 (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)_mm_setzero_ps());
373}
374
375#define _mm256_cmp_ph_mask(a, b, p) \
376 ((__mmask16)__builtin_ia32_cmpph256_mask( \
377 (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)-1))
378
379#define _mm256_mask_cmp_ph_mask(m, a, b, p) \
380 ((__mmask16)__builtin_ia32_cmpph256_mask( \
381 (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)(m)))
382
383#define _mm_cmp_ph_mask(a, b, p) \
384 ((__mmask8)__builtin_ia32_cmpph128_mask( \
385 (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)-1))
386
387#define _mm_mask_cmp_ph_mask(m, a, b, p) \
388 ((__mmask8)__builtin_ia32_cmpph128_mask( \
389 (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)(m)))
390
391static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rcp_ph(__m256h __A) {
392 return (__m256h)__builtin_ia32_rcpph256_mask(
393 (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1);
394}
395
396static __inline__ __m256h __DEFAULT_FN_ATTRS256
397_mm256_mask_rcp_ph(__m256h __W, __mmask16 __U, __m256h __A) {
398 return (__m256h)__builtin_ia32_rcpph256_mask((__v16hf)__A, (__v16hf)__W,
399 (__mmask16)__U);
400}
401
402static __inline__ __m256h __DEFAULT_FN_ATTRS256
403_mm256_maskz_rcp_ph(__mmask16 __U, __m256h __A) {
404 return (__m256h)__builtin_ia32_rcpph256_mask(
405 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
406}
407
408static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_ph(__m128h __A) {
409 return (__m128h)__builtin_ia32_rcpph128_mask(
410 (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
411}
412
413static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rcp_ph(__m128h __W,
414 __mmask8 __U,
415 __m128h __A) {
416 return (__m128h)__builtin_ia32_rcpph128_mask((__v8hf)__A, (__v8hf)__W,
417 (__mmask8)__U);
418}
419
420static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rcp_ph(__mmask8 __U,
421 __m128h __A) {
422 return (__m128h)__builtin_ia32_rcpph128_mask(
423 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
424}
425
426static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rsqrt_ph(__m256h __A) {
427 return (__m256h)__builtin_ia32_rsqrtph256_mask(
428 (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1);
429}
430
431static __inline__ __m256h __DEFAULT_FN_ATTRS256
432_mm256_mask_rsqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) {
433 return (__m256h)__builtin_ia32_rsqrtph256_mask((__v16hf)__A, (__v16hf)__W,
434 (__mmask16)__U);
435}
436
437static __inline__ __m256h __DEFAULT_FN_ATTRS256
438_mm256_maskz_rsqrt_ph(__mmask16 __U, __m256h __A) {
439 return (__m256h)__builtin_ia32_rsqrtph256_mask(
440 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
441}
442
443static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_ph(__m128h __A) {
444 return (__m128h)__builtin_ia32_rsqrtph128_mask(
445 (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
446}
447
448static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt_ph(__m128h __W,
449 __mmask8 __U,
450 __m128h __A) {
451 return (__m128h)__builtin_ia32_rsqrtph128_mask((__v8hf)__A, (__v8hf)__W,
452 (__mmask8)__U);
453}
454
455static __inline__ __m128h __DEFAULT_FN_ATTRS128
456_mm_maskz_rsqrt_ph(__mmask8 __U, __m128h __A) {
457 return (__m128h)__builtin_ia32_rsqrtph128_mask(
458 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
459}
460
461static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_ph(__m128h __A) {
462 return (__m128h)__builtin_ia32_getexpph128_mask(
463 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
464}
465
466static __inline__ __m128h __DEFAULT_FN_ATTRS128
467_mm_mask_getexp_ph(__m128h __W, __mmask8 __U, __m128h __A) {
468 return (__m128h)__builtin_ia32_getexpph128_mask((__v8hf)__A, (__v8hf)__W,
469 (__mmask8)__U);
470}
471
472static __inline__ __m128h __DEFAULT_FN_ATTRS128
473_mm_maskz_getexp_ph(__mmask8 __U, __m128h __A) {
474 return (__m128h)__builtin_ia32_getexpph128_mask(
475 (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
476}
477
478static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_getexp_ph(__m256h __A) {
479 return (__m256h)__builtin_ia32_getexpph256_mask(
480 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1);
481}
482
483static __inline__ __m256h __DEFAULT_FN_ATTRS256
484_mm256_mask_getexp_ph(__m256h __W, __mmask16 __U, __m256h __A) {
485 return (__m256h)__builtin_ia32_getexpph256_mask((__v16hf)__A, (__v16hf)__W,
486 (__mmask16)__U);
487}
488
489static __inline__ __m256h __DEFAULT_FN_ATTRS256
490_mm256_maskz_getexp_ph(__mmask16 __U, __m256h __A) {
491 return (__m256h)__builtin_ia32_getexpph256_mask(
492 (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
493}
494
495#define _mm_getmant_ph(A, B, C) \
496 ((__m128h)__builtin_ia32_getmantph128_mask( \
497 (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \
498 (__mmask8)-1))
499
500#define _mm_mask_getmant_ph(W, U, A, B, C) \
501 ((__m128h)__builtin_ia32_getmantph128_mask( \
502 (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)(__m128h)(W), \
503 (__mmask8)(U)))
504
505#define _mm_maskz_getmant_ph(U, A, B, C) \
506 ((__m128h)__builtin_ia32_getmantph128_mask( \
507 (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \
508 (__mmask8)(U)))
509
510#define _mm256_getmant_ph(A, B, C) \
511 ((__m256h)__builtin_ia32_getmantph256_mask( \
512 (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \
513 (__v16hf)_mm256_setzero_ph(), (__mmask16)-1))
514
515#define _mm256_mask_getmant_ph(W, U, A, B, C) \
516 ((__m256h)__builtin_ia32_getmantph256_mask( \
517 (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), (__v16hf)(__m256h)(W), \
518 (__mmask16)(U)))
519
520#define _mm256_maskz_getmant_ph(U, A, B, C) \
521 ((__m256h)__builtin_ia32_getmantph256_mask( \
522 (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \
523 (__v16hf)_mm256_setzero_ph(), (__mmask16)(U)))
524
525static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_ph(__m128h __A,
526 __m128h __B) {
527 return (__m128h)__builtin_ia32_scalefph128_mask(
528 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
529}
530
531static __inline__ __m128h __DEFAULT_FN_ATTRS128
532_mm_mask_scalef_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
533 return (__m128h)__builtin_ia32_scalefph128_mask((__v8hf)__A, (__v8hf)__B,
534 (__v8hf)__W, (__mmask8)__U);
535}
536
537static __inline__ __m128h __DEFAULT_FN_ATTRS128
538_mm_maskz_scalef_ph(__mmask8 __U, __m128h __A, __m128h __B) {
539 return (__m128h)__builtin_ia32_scalefph128_mask(
540 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
541}
542
543static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_scalef_ph(__m256h __A,
544 __m256h __B) {
545 return (__m256h)__builtin_ia32_scalefph256_mask(
546 (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1);
547}
548
549static __inline__ __m256h __DEFAULT_FN_ATTRS256
550_mm256_mask_scalef_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
551 return (__m256h)__builtin_ia32_scalefph256_mask((__v16hf)__A, (__v16hf)__B,
552 (__v16hf)__W, (__mmask16)__U);
553}
554
555static __inline__ __m256h __DEFAULT_FN_ATTRS256
556_mm256_maskz_scalef_ph(__mmask16 __U, __m256h __A, __m256h __B) {
557 return (__m256h)__builtin_ia32_scalefph256_mask(
558 (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
559}
560
561#define _mm_roundscale_ph(A, imm) \
562 ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
563 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \
564 (__mmask8)-1))
565
566#define _mm_mask_roundscale_ph(W, U, A, imm) \
567 ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
568 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U)))
569
570#define _mm_maskz_roundscale_ph(U, A, imm) \
571 ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
572 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \
573 (__mmask8)(U)))
574
575#define _mm256_roundscale_ph(A, imm) \
576 ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
577 (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \
578 (__mmask16)-1))
579
580#define _mm256_mask_roundscale_ph(W, U, A, imm) \
581 ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
582 (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)(__m256h)(W), \
583 (__mmask16)(U)))
584
585#define _mm256_maskz_roundscale_ph(U, A, imm) \
586 ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
587 (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \
588 (__mmask16)(U)))
589
590#define _mm_reduce_ph(A, imm) \
591 ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \
592 (__v8hf)_mm_setzero_ph(), \
593 (__mmask8)-1))
594
595#define _mm_mask_reduce_ph(W, U, A, imm) \
596 ((__m128h)__builtin_ia32_reduceph128_mask( \
597 (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U)))
598
599#define _mm_maskz_reduce_ph(U, A, imm) \
600 ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \
601 (__v8hf)_mm_setzero_ph(), \
602 (__mmask8)(U)))
603
604#define _mm256_reduce_ph(A, imm) \
605 ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
606 (__v16hf)_mm256_setzero_ph(), \
607 (__mmask16)-1))
608
609#define _mm256_mask_reduce_ph(W, U, A, imm) \
610 ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
611 (__v16hf)(__m256h)(W), \
612 (__mmask16)(U)))
613
614#define _mm256_maskz_reduce_ph(U, A, imm) \
615 ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
616 (__v16hf)_mm256_setzero_ph(), \
617 (__mmask16)(U)))
618
619static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_ph(__m128h __a) {
620 return __builtin_elementwise_sqrt(__a);
621}
622
623static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ph(__m128h __W,
624 __mmask8 __U,
625 __m128h __A) {
626 return (__m128h)__builtin_ia32_selectph_128(
627 (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)__W);
628}
629
630static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ph(__mmask8 __U,
631 __m128h __A) {
632 return (__m128h)__builtin_ia32_selectph_128(
633 (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)_mm_setzero_ph());
634}
635
636static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_sqrt_ph(__m256h __a) {
637 return __builtin_elementwise_sqrt(__a);
638}
639
640static __inline__ __m256h __DEFAULT_FN_ATTRS256
641_mm256_mask_sqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) {
642 return (__m256h)__builtin_ia32_selectph_256(
643 (__mmask16)__U, (__v16hf)_mm256_sqrt_ph(__A), (__v16hf)__W);
644}
645
646static __inline__ __m256h __DEFAULT_FN_ATTRS256
647_mm256_maskz_sqrt_ph(__mmask16 __U, __m256h __A) {
648 return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
649 (__v16hf)_mm256_sqrt_ph(__A),
650 (__v16hf)_mm256_setzero_ph());
651}
652
653#define _mm_mask_fpclass_ph_mask(U, A, imm) \
654 ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \
655 (int)(imm), (__mmask8)(U)))
656
657#define _mm_fpclass_ph_mask(A, imm) \
658 ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \
659 (int)(imm), (__mmask8)-1))
660
661#define _mm256_mask_fpclass_ph_mask(U, A, imm) \
662 ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \
663 (int)(imm), (__mmask16)(U)))
664
665#define _mm256_fpclass_ph_mask(A, imm) \
666 ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \
667 (int)(imm), (__mmask16)-1))
668
669static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtpd_ph(__m128d __A) {
670 return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
671 (__v2df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
672}
673
674static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ph(__m128h __W,
675 __mmask8 __U,
676 __m128d __A) {
677 return (__m128h)__builtin_ia32_vcvtpd2ph128_mask((__v2df)__A, (__v8hf)__W,
678 (__mmask8)__U);
679}
680
681static __inline__ __m128h __DEFAULT_FN_ATTRS128
682_mm_maskz_cvtpd_ph(__mmask8 __U, __m128d __A) {
683 return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
684 (__v2df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
685}
686
687static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtpd_ph(__m256d __A) {
688 return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
689 (__v4df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
690}
691
692static __inline__ __m128h __DEFAULT_FN_ATTRS256
693_mm256_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m256d __A) {
694 return (__m128h)__builtin_ia32_vcvtpd2ph256_mask((__v4df)__A, (__v8hf)__W,
695 (__mmask8)__U);
696}
697
698static __inline__ __m128h __DEFAULT_FN_ATTRS256
699_mm256_maskz_cvtpd_ph(__mmask8 __U, __m256d __A) {
700 return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
701 (__v4df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
702}
703
704static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtph_pd(__m128h __A) {
705 return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
706 (__v8hf)__A, (__v2df)_mm_undefined_pd(), (__mmask8)-1);
707}
708
709static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_pd(__m128d __W,
710 __mmask8 __U,
711 __m128h __A) {
712 return (__m128d)__builtin_ia32_vcvtph2pd128_mask((__v8hf)__A, (__v2df)__W,
713 (__mmask8)__U);
714}
715
716static __inline__ __m128d __DEFAULT_FN_ATTRS128
717_mm_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
718 return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
719 (__v8hf)__A, (__v2df)_mm_setzero_pd(), (__mmask8)__U);
720}
721
722static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtph_pd(__m128h __A) {
723 return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
724 (__v8hf)__A, (__v4df)_mm256_undefined_pd(), (__mmask8)-1);
725}
726
727static __inline__ __m256d __DEFAULT_FN_ATTRS256
728_mm256_mask_cvtph_pd(__m256d __W, __mmask8 __U, __m128h __A) {
729 return (__m256d)__builtin_ia32_vcvtph2pd256_mask((__v8hf)__A, (__v4df)__W,
730 (__mmask8)__U);
731}
732
733static __inline__ __m256d __DEFAULT_FN_ATTRS256
734_mm256_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
735 return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
736 (__v8hf)__A, (__v4df)_mm256_setzero_pd(), (__mmask8)__U);
737}
738
739static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi16(__m128h __A) {
740 return (__m128i)__builtin_ia32_vcvtph2w128_mask(
741 (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
742}
743
744static __inline__ __m128i __DEFAULT_FN_ATTRS128
745_mm_mask_cvtph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
746 return (__m128i)__builtin_ia32_vcvtph2w128_mask((__v8hf)__A, (__v8hi)__W,
747 (__mmask8)__U);
748}
749
750static __inline__ __m128i __DEFAULT_FN_ATTRS128
751_mm_maskz_cvtph_epi16(__mmask8 __U, __m128h __A) {
752 return (__m128i)__builtin_ia32_vcvtph2w128_mask(
753 (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
754}
755
756static __inline__ __m256i __DEFAULT_FN_ATTRS256
757_mm256_cvtph_epi16(__m256h __A) {
758 return (__m256i)__builtin_ia32_vcvtph2w256_mask(
759 (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
760}
761
762static __inline__ __m256i __DEFAULT_FN_ATTRS256
763_mm256_mask_cvtph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
764 return (__m256i)__builtin_ia32_vcvtph2w256_mask((__v16hf)__A, (__v16hi)__W,
765 (__mmask16)__U);
766}
767
768static __inline__ __m256i __DEFAULT_FN_ATTRS256
769_mm256_maskz_cvtph_epi16(__mmask16 __U, __m256h __A) {
770 return (__m256i)__builtin_ia32_vcvtph2w256_mask(
771 (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
772}
773
774static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi16(__m128h __A) {
775 return (__m128i)__builtin_ia32_vcvttph2w128_mask(
776 (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
777}
778
779static __inline__ __m128i __DEFAULT_FN_ATTRS128
780_mm_mask_cvttph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
781 return (__m128i)__builtin_ia32_vcvttph2w128_mask((__v8hf)__A, (__v8hi)__W,
782 (__mmask8)__U);
783}
784
785static __inline__ __m128i __DEFAULT_FN_ATTRS128
786_mm_maskz_cvttph_epi16(__mmask8 __U, __m128h __A) {
787 return (__m128i)__builtin_ia32_vcvttph2w128_mask(
788 (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
789}
790
791static __inline__ __m256i __DEFAULT_FN_ATTRS256
792_mm256_cvttph_epi16(__m256h __A) {
793 return (__m256i)__builtin_ia32_vcvttph2w256_mask(
794 (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
795}
796
797static __inline__ __m256i __DEFAULT_FN_ATTRS256
798_mm256_mask_cvttph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
799 return (__m256i)__builtin_ia32_vcvttph2w256_mask((__v16hf)__A, (__v16hi)__W,
800 (__mmask16)__U);
801}
802
803static __inline__ __m256i __DEFAULT_FN_ATTRS256
804_mm256_maskz_cvttph_epi16(__mmask16 __U, __m256h __A) {
805 return (__m256i)__builtin_ia32_vcvttph2w256_mask(
806 (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
807}
808
809static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
810_mm_cvtepi16_ph(__m128i __A) {
811 return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf);
812}
813
814static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
815_mm_mask_cvtepi16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
816 return (__m128h)__builtin_ia32_selectph_128(
817 (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W);
818}
819
820static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
821_mm_maskz_cvtepi16_ph(__mmask8 __U, __m128i __A) {
822 return (__m128h)__builtin_ia32_selectph_128(
823 (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph());
824}
825
826static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
827_mm256_cvtepi16_ph(__m256i __A) {
828 return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf);
829}
830
831static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
832_mm256_mask_cvtepi16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
833 return (__m256h)__builtin_ia32_selectph_256(
834 (__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W);
835}
836
837static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
838_mm256_maskz_cvtepi16_ph(__mmask16 __U, __m256i __A) {
839 return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
840 (__v16hf)_mm256_cvtepi16_ph(__A),
841 (__v16hf)_mm256_setzero_ph());
842}
843
844static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu16(__m128h __A) {
845 return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
846 (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
847}
848
849static __inline__ __m128i __DEFAULT_FN_ATTRS128
850_mm_mask_cvtph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
851 return (__m128i)__builtin_ia32_vcvtph2uw128_mask((__v8hf)__A, (__v8hu)__W,
852 (__mmask8)__U);
853}
854
855static __inline__ __m128i __DEFAULT_FN_ATTRS128
856_mm_maskz_cvtph_epu16(__mmask8 __U, __m128h __A) {
857 return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
858 (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
859}
860
861static __inline__ __m256i __DEFAULT_FN_ATTRS256
862_mm256_cvtph_epu16(__m256h __A) {
863 return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
864 (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
865}
866
867static __inline__ __m256i __DEFAULT_FN_ATTRS256
868_mm256_mask_cvtph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
869 return (__m256i)__builtin_ia32_vcvtph2uw256_mask((__v16hf)__A, (__v16hu)__W,
870 (__mmask16)__U);
871}
872
873static __inline__ __m256i __DEFAULT_FN_ATTRS256
874_mm256_maskz_cvtph_epu16(__mmask16 __U, __m256h __A) {
875 return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
876 (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
877}
878
879static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu16(__m128h __A) {
880 return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
881 (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
882}
883
884static __inline__ __m128i __DEFAULT_FN_ATTRS128
885_mm_mask_cvttph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
886 return (__m128i)__builtin_ia32_vcvttph2uw128_mask((__v8hf)__A, (__v8hu)__W,
887 (__mmask8)__U);
888}
889
890static __inline__ __m128i __DEFAULT_FN_ATTRS128
891_mm_maskz_cvttph_epu16(__mmask8 __U, __m128h __A) {
892 return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
893 (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
894}
895
896static __inline__ __m256i __DEFAULT_FN_ATTRS256
897_mm256_cvttph_epu16(__m256h __A) {
898 return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
899 (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
900}
901
902static __inline__ __m256i __DEFAULT_FN_ATTRS256
903_mm256_mask_cvttph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
904 return (__m256i)__builtin_ia32_vcvttph2uw256_mask((__v16hf)__A, (__v16hu)__W,
905 (__mmask16)__U);
906}
907
908static __inline__ __m256i __DEFAULT_FN_ATTRS256
909_mm256_maskz_cvttph_epu16(__mmask16 __U, __m256h __A) {
910 return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
911 (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
912}
913
914static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
915_mm_cvtepu16_ph(__m128i __A) {
916 return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf);
917}
918
919static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
920_mm_mask_cvtepu16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
921 return (__m128h)__builtin_ia32_selectph_128(
922 (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W);
923}
924
925static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
926_mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A) {
927 return (__m128h)__builtin_ia32_selectph_128(
928 (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph());
929}
930
931static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
932_mm256_cvtepu16_ph(__m256i __A) {
933 return (__m256h) __builtin_convertvector((__v16hu)__A, __v16hf);
934}
935
936static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
937_mm256_mask_cvtepu16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
938 return (__m256h)__builtin_ia32_selectph_256(
939 (__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)__W);
940}
941
942static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
943_mm256_maskz_cvtepu16_ph(__mmask16 __U, __m256i __A) {
944 return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
945 (__v16hf)_mm256_cvtepu16_ph(__A),
946 (__v16hf)_mm256_setzero_ph());
947}
948
949static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi32(__m128h __A) {
950 return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
951 (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
952}
953
954static __inline__ __m128i __DEFAULT_FN_ATTRS128
955_mm_mask_cvtph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
956 return (__m128i)__builtin_ia32_vcvtph2dq128_mask((__v8hf)__A, (__v4si)__W,
957 (__mmask8)__U);
958}
959
960static __inline__ __m128i __DEFAULT_FN_ATTRS128
961_mm_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) {
962 return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
963 (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
964}
965
966static __inline__ __m256i __DEFAULT_FN_ATTRS256
967_mm256_cvtph_epi32(__m128h __A) {
968 return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
969 (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
970}
971
972static __inline__ __m256i __DEFAULT_FN_ATTRS256
973_mm256_mask_cvtph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
974 return (__m256i)__builtin_ia32_vcvtph2dq256_mask((__v8hf)__A, (__v8si)__W,
975 (__mmask8)__U);
976}
977
978static __inline__ __m256i __DEFAULT_FN_ATTRS256
979_mm256_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) {
980 return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
981 (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
982}
983
984static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu32(__m128h __A) {
985 return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
986 (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
987}
988
989static __inline__ __m128i __DEFAULT_FN_ATTRS128
990_mm_mask_cvtph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
991 return (__m128i)__builtin_ia32_vcvtph2udq128_mask((__v8hf)__A, (__v4su)__W,
992 (__mmask8)__U);
993}
994
995static __inline__ __m128i __DEFAULT_FN_ATTRS128
996_mm_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) {
997 return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
998 (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
999}
1000
1001static __inline__ __m256i __DEFAULT_FN_ATTRS256
1002_mm256_cvtph_epu32(__m128h __A) {
1003 return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
1004 (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
1005}
1006
1007static __inline__ __m256i __DEFAULT_FN_ATTRS256
1008_mm256_mask_cvtph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
1009 return (__m256i)__builtin_ia32_vcvtph2udq256_mask((__v8hf)__A, (__v8su)__W,
1010 (__mmask8)__U);
1011}
1012
1013static __inline__ __m256i __DEFAULT_FN_ATTRS256
1014_mm256_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) {
1015 return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
1016 (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
1017}
1018
1019static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi32_ph(__m128i __A) {
1020 return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
1021 (__v4si)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1022}
1023
1024static __inline__ __m128h __DEFAULT_FN_ATTRS128
1025_mm_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1026 return (__m128h)__builtin_ia32_vcvtdq2ph128_mask((__v4si)__A, (__v8hf)__W,
1027 (__mmask8)__U);
1028}
1029
1030static __inline__ __m128h __DEFAULT_FN_ATTRS128
1031_mm_maskz_cvtepi32_ph(__mmask8 __U, __m128i __A) {
1032 return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
1033 (__v4si)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1034}
1035
1036static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
1037_mm256_cvtepi32_ph(__m256i __A) {
1038 return (__m128h) __builtin_convertvector((__v8si)__A, __v8hf);
1039}
1040
1041static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
1042_mm256_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1043 return (__m128h)__builtin_ia32_selectph_128(
1044 (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)__W);
1045}
1046
1047static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
1048_mm256_maskz_cvtepi32_ph(__mmask8 __U, __m256i __A) {
1049 return (__m128h)__builtin_ia32_selectph_128(
1050 (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)_mm_setzero_ph());
1051}
1052
1053static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ph(__m128i __A) {
1054 return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
1055 (__v4su)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1056}
1057
1058static __inline__ __m128h __DEFAULT_FN_ATTRS128
1059_mm_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1060 return (__m128h)__builtin_ia32_vcvtudq2ph128_mask((__v4su)__A, (__v8hf)__W,
1061 (__mmask8)__U);
1062}
1063
1064static __inline__ __m128h __DEFAULT_FN_ATTRS128
1065_mm_maskz_cvtepu32_ph(__mmask8 __U, __m128i __A) {
1066 return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
1067 (__v4su)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1068}
1069
1070static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
1071_mm256_cvtepu32_ph(__m256i __A) {
1072 return (__m128h) __builtin_convertvector((__v8su)__A, __v8hf);
1073}
1074
1075static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
1076_mm256_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1077 return (__m128h)__builtin_ia32_selectph_128(
1078 (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)__W);
1079}
1080
1081static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
1082_mm256_maskz_cvtepu32_ph(__mmask8 __U, __m256i __A) {
1083 return (__m128h)__builtin_ia32_selectph_128(
1084 (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)_mm_setzero_ph());
1085}
1086
1087static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi32(__m128h __A) {
1088 return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
1089 (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
1090}
1091
1092static __inline__ __m128i __DEFAULT_FN_ATTRS128
1093_mm_mask_cvttph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
1094 return (__m128i)__builtin_ia32_vcvttph2dq128_mask((__v8hf)__A, (__v4si)__W,
1095 (__mmask8)__U);
1096}
1097
1098static __inline__ __m128i __DEFAULT_FN_ATTRS128
1099_mm_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) {
1100 return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
1101 (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
1102}
1103
1104static __inline__ __m256i __DEFAULT_FN_ATTRS256
1105_mm256_cvttph_epi32(__m128h __A) {
1106 return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
1107 (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
1108}
1109
1110static __inline__ __m256i __DEFAULT_FN_ATTRS256
1111_mm256_mask_cvttph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
1112 return (__m256i)__builtin_ia32_vcvttph2dq256_mask((__v8hf)__A, (__v8si)__W,
1113 (__mmask8)__U);
1114}
1115
1116static __inline__ __m256i __DEFAULT_FN_ATTRS256
1117_mm256_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) {
1118 return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
1119 (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
1120}
1121
1122static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu32(__m128h __A) {
1123 return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
1124 (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
1125}
1126
1127static __inline__ __m128i __DEFAULT_FN_ATTRS128
1128_mm_mask_cvttph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
1129 return (__m128i)__builtin_ia32_vcvttph2udq128_mask((__v8hf)__A, (__v4su)__W,
1130 (__mmask8)__U);
1131}
1132
1133static __inline__ __m128i __DEFAULT_FN_ATTRS128
1134_mm_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) {
1135 return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
1136 (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
1137}
1138
1139static __inline__ __m256i __DEFAULT_FN_ATTRS256
1140_mm256_cvttph_epu32(__m128h __A) {
1141 return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
1142 (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
1143}
1144
1145static __inline__ __m256i __DEFAULT_FN_ATTRS256
1146_mm256_mask_cvttph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
1147 return (__m256i)__builtin_ia32_vcvttph2udq256_mask((__v8hf)__A, (__v8su)__W,
1148 (__mmask8)__U);
1149}
1150
1151static __inline__ __m256i __DEFAULT_FN_ATTRS256
1152_mm256_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) {
1153 return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
1154 (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
1155}
1156
1157static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ph(__m128i __A) {
1158 return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
1159 (__v2di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1160}
1161
1162static __inline__ __m128h __DEFAULT_FN_ATTRS128
1163_mm_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1164 return (__m128h)__builtin_ia32_vcvtqq2ph128_mask((__v2di)__A, (__v8hf)__W,
1165 (__mmask8)__U);
1166}
1167
1168static __inline__ __m128h __DEFAULT_FN_ATTRS128
1169_mm_maskz_cvtepi64_ph(__mmask8 __U, __m128i __A) {
1170 return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
1171 (__v2di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1172}
1173
1174static __inline__ __m128h __DEFAULT_FN_ATTRS256
1175_mm256_cvtepi64_ph(__m256i __A) {
1176 return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
1177 (__v4di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1178}
1179
1180static __inline__ __m128h __DEFAULT_FN_ATTRS256
1181_mm256_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1182 return (__m128h)__builtin_ia32_vcvtqq2ph256_mask((__v4di)__A, (__v8hf)__W,
1183 (__mmask8)__U);
1184}
1185
1186static __inline__ __m128h __DEFAULT_FN_ATTRS256
1187_mm256_maskz_cvtepi64_ph(__mmask8 __U, __m256i __A) {
1188 return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
1189 (__v4di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1190}
1191
1192static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi64(__m128h __A) {
1193 return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
1194 (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
1195}
1196
1197static __inline__ __m128i __DEFAULT_FN_ATTRS128
1198_mm_mask_cvtph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
1199 return (__m128i)__builtin_ia32_vcvtph2qq128_mask((__v8hf)__A, (__v2di)__W,
1200 (__mmask8)__U);
1201}
1202
1203static __inline__ __m128i __DEFAULT_FN_ATTRS128
1204_mm_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
1205 return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
1206 (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
1207}
1208
1209static __inline__ __m256i __DEFAULT_FN_ATTRS256
1210_mm256_cvtph_epi64(__m128h __A) {
1211 return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
1212 (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
1213}
1214
1215static __inline__ __m256i __DEFAULT_FN_ATTRS256
1216_mm256_mask_cvtph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
1217 return (__m256i)__builtin_ia32_vcvtph2qq256_mask((__v8hf)__A, (__v4di)__W,
1218 (__mmask8)__U);
1219}
1220
1221static __inline__ __m256i __DEFAULT_FN_ATTRS256
1222_mm256_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
1223 return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
1224 (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
1225}
1226
1227static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ph(__m128i __A) {
1228 return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
1229 (__v2du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1230}
1231
1232static __inline__ __m128h __DEFAULT_FN_ATTRS128
1233_mm_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1234 return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask((__v2du)__A, (__v8hf)__W,
1235 (__mmask8)__U);
1236}
1237
1238static __inline__ __m128h __DEFAULT_FN_ATTRS128
1239_mm_maskz_cvtepu64_ph(__mmask8 __U, __m128i __A) {
1240 return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
1241 (__v2du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1242}
1243
1244static __inline__ __m128h __DEFAULT_FN_ATTRS256
1245_mm256_cvtepu64_ph(__m256i __A) {
1246 return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
1247 (__v4du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1248}
1249
1250static __inline__ __m128h __DEFAULT_FN_ATTRS256
1251_mm256_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1252 return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask((__v4du)__A, (__v8hf)__W,
1253 (__mmask8)__U);
1254}
1255
1256static __inline__ __m128h __DEFAULT_FN_ATTRS256
1257_mm256_maskz_cvtepu64_ph(__mmask8 __U, __m256i __A) {
1258 return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
1259 (__v4du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1260}
1261
1262static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu64(__m128h __A) {
1263 return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
1264 (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
1265}
1266
1267static __inline__ __m128i __DEFAULT_FN_ATTRS128
1268_mm_mask_cvtph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
1269 return (__m128i)__builtin_ia32_vcvtph2uqq128_mask((__v8hf)__A, (__v2du)__W,
1270 (__mmask8)__U);
1271}
1272
1273static __inline__ __m128i __DEFAULT_FN_ATTRS128
1274_mm_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
1275 return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
1276 (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
1277}
1278
1279static __inline__ __m256i __DEFAULT_FN_ATTRS256
1280_mm256_cvtph_epu64(__m128h __A) {
1281 return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
1282 (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
1283}
1284
1285static __inline__ __m256i __DEFAULT_FN_ATTRS256
1286_mm256_mask_cvtph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
1287 return (__m256i)__builtin_ia32_vcvtph2uqq256_mask((__v8hf)__A, (__v4du)__W,
1288 (__mmask8)__U);
1289}
1290
1291static __inline__ __m256i __DEFAULT_FN_ATTRS256
1292_mm256_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
1293 return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
1294 (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
1295}
1296
1297static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi64(__m128h __A) {
1298 return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
1299 (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
1300}
1301
1302static __inline__ __m128i __DEFAULT_FN_ATTRS128
1303_mm_mask_cvttph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
1304 return (__m128i)__builtin_ia32_vcvttph2qq128_mask((__v8hf)__A, (__v2di)__W,
1305 (__mmask8)__U);
1306}
1307
1308static __inline__ __m128i __DEFAULT_FN_ATTRS128
1309_mm_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
1310 return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
1311 (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
1312}
1313
1314static __inline__ __m256i __DEFAULT_FN_ATTRS256
1315_mm256_cvttph_epi64(__m128h __A) {
1316 return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
1317 (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
1318}
1319
1320static __inline__ __m256i __DEFAULT_FN_ATTRS256
1321_mm256_mask_cvttph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
1322 return (__m256i)__builtin_ia32_vcvttph2qq256_mask((__v8hf)__A, (__v4di)__W,
1323 (__mmask8)__U);
1324}
1325
1326static __inline__ __m256i __DEFAULT_FN_ATTRS256
1327_mm256_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
1328 return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
1329 (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
1330}
1331
1332static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu64(__m128h __A) {
1333 return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
1334 (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
1335}
1336
1337static __inline__ __m128i __DEFAULT_FN_ATTRS128
1338_mm_mask_cvttph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
1339 return (__m128i)__builtin_ia32_vcvttph2uqq128_mask((__v8hf)__A, (__v2du)__W,
1340 (__mmask8)__U);
1341}
1342
1343static __inline__ __m128i __DEFAULT_FN_ATTRS128
1344_mm_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
1345 return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
1346 (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
1347}
1348
1349static __inline__ __m256i __DEFAULT_FN_ATTRS256
1350_mm256_cvttph_epu64(__m128h __A) {
1351 return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
1352 (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
1353}
1354
1355static __inline__ __m256i __DEFAULT_FN_ATTRS256
1356_mm256_mask_cvttph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
1357 return (__m256i)__builtin_ia32_vcvttph2uqq256_mask((__v8hf)__A, (__v4du)__W,
1358 (__mmask8)__U);
1359}
1360
1361static __inline__ __m256i __DEFAULT_FN_ATTRS256
1362_mm256_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
1363 return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
1364 (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
1365}
1366
1367static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtxph_ps(__m128h __A) {
1368 return (__m128)__builtin_ia32_vcvtph2psx128_mask(
1369 (__v8hf)__A, (__v4sf)_mm_undefined_ps(), (__mmask8)-1);
1370}
1371
1372static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtxph_ps(__m128 __W,
1373 __mmask8 __U,
1374 __m128h __A) {
1375 return (__m128)__builtin_ia32_vcvtph2psx128_mask((__v8hf)__A, (__v4sf)__W,
1376 (__mmask8)__U);
1377}
1378
1379static __inline__ __m128 __DEFAULT_FN_ATTRS128
1380_mm_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) {
1381 return (__m128)__builtin_ia32_vcvtph2psx128_mask(
1382 (__v8hf)__A, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);
1383}
1384
1385static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtxph_ps(__m128h __A) {
1386 return (__m256)__builtin_ia32_vcvtph2psx256_mask(
1387 (__v8hf)__A, (__v8sf)_mm256_undefined_ps(), (__mmask8)-1);
1388}
1389
1390static __inline__ __m256 __DEFAULT_FN_ATTRS256
1391_mm256_mask_cvtxph_ps(__m256 __W, __mmask8 __U, __m128h __A) {
1392 return (__m256)__builtin_ia32_vcvtph2psx256_mask((__v8hf)__A, (__v8sf)__W,
1393 (__mmask8)__U);
1394}
1395
1396static __inline__ __m256 __DEFAULT_FN_ATTRS256
1397_mm256_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) {
1398 return (__m256)__builtin_ia32_vcvtph2psx256_mask(
1399 (__v8hf)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)__U);
1400}
1401
1402static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtxps_ph(__m128 __A) {
1403 return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
1404 (__v4sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1405}
1406
1407static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtxps_ph(__m128h __W,
1408 __mmask8 __U,
1409 __m128 __A) {
1410 return (__m128h)__builtin_ia32_vcvtps2phx128_mask((__v4sf)__A, (__v8hf)__W,
1411 (__mmask8)__U);
1412}
1413
1414static __inline__ __m128h __DEFAULT_FN_ATTRS128
1415_mm_maskz_cvtxps_ph(__mmask8 __U, __m128 __A) {
1416 return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
1417 (__v4sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1418}
1419
1420static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtxps_ph(__m256 __A) {
1421 return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
1422 (__v8sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1423}
1424
1425static __inline__ __m128h __DEFAULT_FN_ATTRS256
1426_mm256_mask_cvtxps_ph(__m128h __W, __mmask8 __U, __m256 __A) {
1427 return (__m128h)__builtin_ia32_vcvtps2phx256_mask((__v8sf)__A, (__v8hf)__W,
1428 (__mmask8)__U);
1429}
1430
1431static __inline__ __m128h __DEFAULT_FN_ATTRS256
1432_mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A) {
1433 return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
1434 (__v8sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1435}
1436
1437static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_ph(__m128h __A,
1438 __m128h __B,
1439 __m128h __C) {
1440 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B,
1441 (__v8hf)__C);
1442}
1443
1444static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A,
1445 __mmask8 __U,
1446 __m128h __B,
1447 __m128h __C) {
1448 return (__m128h)__builtin_ia32_selectph_128(
1449 (__mmask8)__U,
1450 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1451 (__v8hf)__A);
1452}
1453
1454static __inline__ __m128h __DEFAULT_FN_ATTRS128
1455_mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1456 return (__m128h)__builtin_ia32_selectph_128(
1457 (__mmask8)__U,
1458 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1459 (__v8hf)__C);
1460}
1461
1462static __inline__ __m128h __DEFAULT_FN_ATTRS128
1463_mm_maskz_fmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1464 return (__m128h)__builtin_ia32_selectph_128(
1465 (__mmask8)__U,
1466 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1467 (__v8hf)_mm_setzero_ph());
1468}
1469
1470static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_ph(__m128h __A,
1471 __m128h __B,
1472 __m128h __C) {
1473 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B,
1474 -(__v8hf)__C);
1475}
1476
1477static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ph(__m128h __A,
1478 __mmask8 __U,
1479 __m128h __B,
1480 __m128h __C) {
1481 return (__m128h)__builtin_ia32_selectph_128(
1482 (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1483 (__v8hf)__A);
1484}
1485
1486static __inline__ __m128h __DEFAULT_FN_ATTRS128
1487_mm_maskz_fmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1488 return (__m128h)__builtin_ia32_selectph_128(
1489 (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1490 (__v8hf)_mm_setzero_ph());
1491}
1492
1493static __inline__ __m128h __DEFAULT_FN_ATTRS128
1494_mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1495 return (__m128h)__builtin_ia32_selectph_128(
1496 (__mmask8)__U,
1497 __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1498 (__v8hf)__C);
1499}
1500
1501static __inline__ __m128h __DEFAULT_FN_ATTRS128
1502_mm_maskz_fnmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1503 return (__m128h)__builtin_ia32_selectph_128(
1504 (__mmask8)__U,
1505 __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1506 (__v8hf)_mm_setzero_ph());
1507}
1508
1509static __inline__ __m128h __DEFAULT_FN_ATTRS128
1510_mm_maskz_fnmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1511 return (__m128h)__builtin_ia32_selectph_128(
1512 (__mmask8)__U,
1513 __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1514 (__v8hf)_mm_setzero_ph());
1515}
1516
1517static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_ph(__m256h __A,
1518 __m256h __B,
1519 __m256h __C) {
1520 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B,
1521 (__v16hf)__C);
1522}
1523
1524static __inline__ __m256h __DEFAULT_FN_ATTRS256
1525_mm256_mask_fmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1526 return (__m256h)__builtin_ia32_selectph_256(
1527 (__mmask16)__U,
1528 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1529 (__v16hf)__A);
1530}
1531
1532static __inline__ __m256h __DEFAULT_FN_ATTRS256
1533_mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1534 return (__m256h)__builtin_ia32_selectph_256(
1535 (__mmask16)__U,
1536 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1537 (__v16hf)__C);
1538}
1539
1540static __inline__ __m256h __DEFAULT_FN_ATTRS256
1541_mm256_maskz_fmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1542 return (__m256h)__builtin_ia32_selectph_256(
1543 (__mmask16)__U,
1544 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1545 (__v16hf)_mm256_setzero_ph());
1546}
1547
1548static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsub_ph(__m256h __A,
1549 __m256h __B,
1550 __m256h __C) {
1551 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B,
1552 -(__v16hf)__C);
1553}
1554
1555static __inline__ __m256h __DEFAULT_FN_ATTRS256
1556_mm256_mask_fmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1557 return (__m256h)__builtin_ia32_selectph_256(
1558 (__mmask16)__U,
1559 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1560 (__v16hf)__A);
1561}
1562
1563static __inline__ __m256h __DEFAULT_FN_ATTRS256
1564_mm256_maskz_fmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1565 return (__m256h)__builtin_ia32_selectph_256(
1566 (__mmask16)__U,
1567 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1568 (__v16hf)_mm256_setzero_ph());
1569}
1570
1571static __inline__ __m256h __DEFAULT_FN_ATTRS256
1572_mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1573 return (__m256h)__builtin_ia32_selectph_256(
1574 (__mmask16)__U,
1575 __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1576 (__v16hf)__C);
1577}
1578
1579static __inline__ __m256h __DEFAULT_FN_ATTRS256
1580_mm256_maskz_fnmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1581 return (__m256h)__builtin_ia32_selectph_256(
1582 (__mmask16)__U,
1583 __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1584 (__v16hf)_mm256_setzero_ph());
1585}
1586
1587static __inline__ __m256h __DEFAULT_FN_ATTRS256
1588_mm256_maskz_fnmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1589 return (__m256h)__builtin_ia32_selectph_256(
1590 (__mmask16)__U,
1591 __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1592 (__v16hf)_mm256_setzero_ph());
1593}
1594
1595static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmaddsub_ph(__m128h __A,
1596 __m128h __B,
1597 __m128h __C) {
1598 return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
1599 (__v8hf)__C);
1600}
1601
1602static __inline__ __m128h __DEFAULT_FN_ATTRS128
1603_mm_mask_fmaddsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1604 return (__m128h)__builtin_ia32_selectph_128(
1605 (__mmask8)__U,
1606 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1607 (__v8hf)__A);
1608}
1609
1610static __inline__ __m128h __DEFAULT_FN_ATTRS128
1611_mm_mask3_fmaddsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1612 return (__m128h)__builtin_ia32_selectph_128(
1613 (__mmask8)__U,
1614 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1615 (__v8hf)__C);
1616}
1617
1618static __inline__ __m128h __DEFAULT_FN_ATTRS128
1619_mm_maskz_fmaddsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1620 return (__m128h)__builtin_ia32_selectph_128(
1621 (__mmask8)__U,
1622 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1623 (__v8hf)_mm_setzero_ph());
1624}
1625
1626static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsubadd_ph(__m128h __A,
1627 __m128h __B,
1628 __m128h __C) {
1629 return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
1630 -(__v8hf)__C);
1631}
1632
1633static __inline__ __m128h __DEFAULT_FN_ATTRS128
1634_mm_mask_fmsubadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1635 return (__m128h)__builtin_ia32_selectph_128(
1636 (__mmask8)__U,
1637 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1638 (__v8hf)__A);
1639}
1640
1641static __inline__ __m128h __DEFAULT_FN_ATTRS128
1642_mm_maskz_fmsubadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1643 return (__m128h)__builtin_ia32_selectph_128(
1644 (__mmask8)__U,
1645 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1646 (__v8hf)_mm_setzero_ph());
1647}
1648
1649static __inline__ __m256h __DEFAULT_FN_ATTRS256
1650_mm256_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C) {
1651 return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
1652 (__v16hf)__C);
1653}
1654
1655static __inline__ __m256h __DEFAULT_FN_ATTRS256
1656_mm256_mask_fmaddsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1657 return (__m256h)__builtin_ia32_selectph_256(
1658 (__mmask16)__U,
1659 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1660 (__v16hf)__A);
1661}
1662
1663static __inline__ __m256h __DEFAULT_FN_ATTRS256
1664_mm256_mask3_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1665 return (__m256h)__builtin_ia32_selectph_256(
1666 (__mmask16)__U,
1667 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1668 (__v16hf)__C);
1669}
1670
1671static __inline__ __m256h __DEFAULT_FN_ATTRS256
1672_mm256_maskz_fmaddsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1673 return (__m256h)__builtin_ia32_selectph_256(
1674 (__mmask16)__U,
1675 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1676 (__v16hf)_mm256_setzero_ph());
1677}
1678
1679static __inline__ __m256h __DEFAULT_FN_ATTRS256
1680_mm256_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C) {
1681 return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
1682 -(__v16hf)__C);
1683}
1684
1685static __inline__ __m256h __DEFAULT_FN_ATTRS256
1686_mm256_mask_fmsubadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1687 return (__m256h)__builtin_ia32_selectph_256(
1688 (__mmask16)__U,
1689 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1690 (__v16hf)__A);
1691}
1692
1693static __inline__ __m256h __DEFAULT_FN_ATTRS256
1694_mm256_maskz_fmsubadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1695 return (__m256h)__builtin_ia32_selectph_256(
1696 (__mmask16)__U,
1697 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1698 (__v16hf)_mm256_setzero_ph());
1699}
1700
1701static __inline__ __m128h __DEFAULT_FN_ATTRS128
1702_mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1703 return (__m128h)__builtin_ia32_selectph_128(
1704 (__mmask8)__U,
1705 __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1706 (__v8hf)__C);
1707}
1708
1709static __inline__ __m256h __DEFAULT_FN_ATTRS256
1710_mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1711 return (__m256h)__builtin_ia32_selectph_256(
1712 (__mmask16)__U,
1713 __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1714 (__v16hf)__C);
1715}
1716
1717static __inline__ __m128h __DEFAULT_FN_ATTRS128
1718_mm_mask3_fmsubadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1719 return (__m128h)__builtin_ia32_selectph_128(
1720 (__mmask8)__U,
1721 __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1722 (__v8hf)__C);
1723}
1724
1725static __inline__ __m256h __DEFAULT_FN_ATTRS256
1726_mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1727 return (__m256h)__builtin_ia32_selectph_256(
1728 (__mmask16)__U,
1729 __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1730 (__v16hf)__C);
1731}
1732
1733static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_ph(__m128h __A,
1734 __m128h __B,
1735 __m128h __C) {
1736 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B,
1737 (__v8hf)__C);
1738}
1739
1740static __inline__ __m128h __DEFAULT_FN_ATTRS128
1741_mm_mask_fnmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1742 return (__m128h)__builtin_ia32_selectph_128(
1743 (__mmask8)__U,
1744 __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C),
1745 (__v8hf)__A);
1746}
1747
1748static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ph(__m256h __A,
1749 __m256h __B,
1750 __m256h __C) {
1751 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B,
1752 (__v16hf)__C);
1753}
1754
1755static __inline__ __m256h __DEFAULT_FN_ATTRS256
1756_mm256_mask_fnmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1757 return (__m256h)__builtin_ia32_selectph_256(
1758 (__mmask16)__U,
1759 __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C),
1760 (__v16hf)__A);
1761}
1762
1763static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_ph(__m128h __A,
1764 __m128h __B,
1765 __m128h __C) {
1766 return (__m128h)__builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B,
1767 -(__v8hf)__C);
1768}
1769
1770static __inline__ __m128h __DEFAULT_FN_ATTRS128
1771_mm_mask_fnmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1772 return (__m128h)__builtin_ia32_selectph_128(
1773 (__mmask8)__U,
1774 __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
1775 (__v8hf)__A);
1776}
1777
1778static __inline__ __m128h __DEFAULT_FN_ATTRS128
1779_mm_mask3_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1780 return (__m128h)__builtin_ia32_selectph_128(
1781 (__mmask8)__U,
1782 __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
1783 (__v8hf)__C);
1784}
1785
1786static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ph(__m256h __A,
1787 __m256h __B,
1788 __m256h __C) {
1789 return (__m256h)__builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B,
1790 -(__v16hf)__C);
1791}
1792
1793static __inline__ __m256h __DEFAULT_FN_ATTRS256
1794_mm256_mask_fnmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1795 return (__m256h)__builtin_ia32_selectph_256(
1796 (__mmask16)__U,
1797 __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
1798 (__v16hf)__A);
1799}
1800
1801static __inline__ __m256h __DEFAULT_FN_ATTRS256
1802_mm256_mask3_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1803 return (__m256h)__builtin_ia32_selectph_256(
1804 (__mmask16)__U,
1805 __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
1806 (__v16hf)__C);
1807}
1808
1809static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_pch(__m128h __A,
1810 __m128h __B) {
1811 return (__m128h)__builtin_ia32_vfcmulcph128_mask(
1812 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1);
1813}
1814
1815static __inline__ __m128h __DEFAULT_FN_ATTRS128
1816_mm_mask_fcmul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
1817 return (__m128h)__builtin_ia32_vfcmulcph128_mask((__v4sf)__A, (__v4sf)__B,
1818 (__v4sf)__W, (__mmask8)__U);
1819}
1820
1821static __inline__ __m128h __DEFAULT_FN_ATTRS128
1822_mm_maskz_fcmul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
1823 return (__m128h)__builtin_ia32_vfcmulcph128_mask(
1824 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U);
1825}
1826
1827static __inline__ __m256h __DEFAULT_FN_ATTRS128 _mm256_fcmul_pch(__m256h __A,
1828 __m256h __B) {
1829 return (__m256h)__builtin_ia32_vfcmulcph256_mask(
1830 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1);
1831}
1832
1833static __inline__ __m256h __DEFAULT_FN_ATTRS256
1834_mm256_mask_fcmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
1835 return (__m256h)__builtin_ia32_vfcmulcph256_mask((__v8sf)__A, (__v8sf)__B,
1836 (__v8sf)__W, (__mmask8)__U);
1837}
1838
1839static __inline__ __m256h __DEFAULT_FN_ATTRS256
1840_mm256_maskz_fcmul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
1841 return (__m256h)__builtin_ia32_vfcmulcph256_mask(
1842 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U);
1843}
1844
1845static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmadd_pch(__m128h __A,
1846 __m128h __B,
1847 __m128h __C) {
1848 return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1849 (__v4sf)__C, (__mmask8)-1);
1850}
1851
1852static __inline__ __m128h __DEFAULT_FN_ATTRS128
1853_mm_mask_fcmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1854 return (__m128h)__builtin_ia32_selectps_128(
1855 __U,
1856 __builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)(__m128h)__B,
1857 (__v4sf)__C, (__mmask8)__U),
1858 (__v4sf)__A);
1859}
1860
1861static __inline__ __m128h __DEFAULT_FN_ATTRS128
1862_mm_mask3_fcmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1863 return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1864 (__v4sf)__C, (__mmask8)__U);
1865}
1866
1867static __inline__ __m128h __DEFAULT_FN_ATTRS128
1868_mm_maskz_fcmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1869 return (__m128h)__builtin_ia32_vfcmaddcph128_maskz(
1870 (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U);
1871}
1872
1873static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fcmadd_pch(__m256h __A,
1874 __m256h __B,
1875 __m256h __C) {
1876 return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1877 (__v8sf)__C, (__mmask8)-1);
1878}
1879
1880static __inline__ __m256h __DEFAULT_FN_ATTRS256
1881_mm256_mask_fcmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) {
1882 return (__m256h)__builtin_ia32_selectps_256(
1883 __U,
1884 __builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C,
1885 (__mmask8)__U),
1886 (__v8sf)__A);
1887}
1888
1889static __inline__ __m256h __DEFAULT_FN_ATTRS256
1890_mm256_mask3_fcmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) {
1891 return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1892 (__v8sf)__C, (__mmask8)__U);
1893}
1894
1895static __inline__ __m256h __DEFAULT_FN_ATTRS256
1896_mm256_maskz_fcmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
1897 return (__m256h)__builtin_ia32_vfcmaddcph256_maskz(
1898 (__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U);
1899}
1900
1901static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmul_pch(__m128h __A,
1902 __m128h __B) {
1903 return (__m128h)__builtin_ia32_vfmulcph128_mask(
1904 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1);
1905}
1906
1907static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmul_pch(__m128h __W,
1908 __mmask8 __U,
1909 __m128h __A,
1910 __m128h __B) {
1911 return (__m128h)__builtin_ia32_vfmulcph128_mask((__v4sf)__A, (__v4sf)__B,
1912 (__v4sf)__W, (__mmask8)__U);
1913}
1914
1915static __inline__ __m128h __DEFAULT_FN_ATTRS128
1916_mm_maskz_fmul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
1917 return (__m128h)__builtin_ia32_vfmulcph128_mask(
1918 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U);
1919}
1920
1921static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmul_pch(__m256h __A,
1922 __m256h __B) {
1923 return (__m256h)__builtin_ia32_vfmulcph256_mask(
1924 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1);
1925}
1926
1927static __inline__ __m256h __DEFAULT_FN_ATTRS256
1928_mm256_mask_fmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
1929 return (__m256h)__builtin_ia32_vfmulcph256_mask((__v8sf)__A, (__v8sf)__B,
1930 (__v8sf)__W, (__mmask8)__U);
1931}
1932
1933static __inline__ __m256h __DEFAULT_FN_ATTRS256
1934_mm256_maskz_fmul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
1935 return (__m256h)__builtin_ia32_vfmulcph256_mask(
1936 (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U);
1937}
1938
1939static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_pch(__m128h __A,
1940 __m128h __B,
1941 __m128h __C) {
1942 return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1943 (__v4sf)__C, (__mmask8)-1);
1944}
1945
1946static __inline__ __m128h __DEFAULT_FN_ATTRS128
1947_mm_mask_fmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1948 return (__m128h)__builtin_ia32_selectps_128(
1949 __U,
1950 __builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C,
1951 (__mmask8)__U),
1952 (__v4sf)__A);
1953}
1954
1955static __inline__ __m128h __DEFAULT_FN_ATTRS128
1956_mm_mask3_fmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1957 return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1958 (__v4sf)__C, (__mmask8)__U);
1959}
1960
1961static __inline__ __m128h __DEFAULT_FN_ATTRS128
1962_mm_maskz_fmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1963 return (__m128h)__builtin_ia32_vfmaddcph128_maskz((__v4sf)__A, (__v4sf)__B,
1964 (__v4sf)__C, (__mmask8)__U);
1965}
1966
1967static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_pch(__m256h __A,
1968 __m256h __B,
1969 __m256h __C) {
1970 return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1971 (__v8sf)__C, (__mmask8)-1);
1972}
1973
1974static __inline__ __m256h __DEFAULT_FN_ATTRS256
1975_mm256_mask_fmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) {
1976 return (__m256h)__builtin_ia32_selectps_256(
1977 __U,
1978 __builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C,
1979 (__mmask8)__U),
1980 (__v8sf)__A);
1981}
1982
1983static __inline__ __m256h __DEFAULT_FN_ATTRS256
1984_mm256_mask3_fmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) {
1985 return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1986 (__v8sf)__C, (__mmask8)__U);
1987}
1988
1989static __inline__ __m256h __DEFAULT_FN_ATTRS256
1990_mm256_maskz_fmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
1991 return (__m256h)__builtin_ia32_vfmaddcph256_maskz((__v8sf)__A, (__v8sf)__B,
1992 (__v8sf)__C, (__mmask8)__U);
1993}
1994
1995static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
1996_mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) {
1997 return (__m128h)__builtin_ia32_selectph_128((__mmask8)__U, (__v8hf)__W,
1998 (__v8hf)__A);
1999}
2000
2001static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
2002_mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) {
2003 return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)__W,
2004 (__v16hf)__A);
2005}
2006
2007static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
2008_mm_permutex2var_ph(__m128h __A, __m128i __I, __m128h __B) {
2009 return (__m128h)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
2010 (__v8hi)__B);
2011}
2012
2013static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
2014_mm256_permutex2var_ph(__m256h __A, __m256i __I, __m256h __B) {
2015 return (__m256h)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
2016 (__v16hi)__B);
2017}
2018
2019static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
2020_mm_permutexvar_ph(__m128i __A, __m128h __B) {
2021 return (__m128h)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A);
2022}
2023
2024static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
2025_mm256_permutexvar_ph(__m256i __A, __m256h __B) {
2026 return (__m256h)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A);
2027}
2028
2029static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2030_mm256_reduce_add_ph(__m256h __W) {
2031 return __builtin_ia32_reduce_fadd_ph256(-0.0f16, __W);
2032}
2033
2034static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2035_mm256_reduce_mul_ph(__m256h __W) {
2036 return __builtin_ia32_reduce_fmul_ph256(1.0f16, __W);
2037}
2038
2039static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2040_mm256_reduce_max_ph(__m256h __V) {
2041 return __builtin_ia32_reduce_fmax_ph256(__V);
2042}
2043
2044static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2045_mm256_reduce_min_ph(__m256h __V) {
2046 return __builtin_ia32_reduce_fmin_ph256(__V);
2047}
2048
2049static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2050_mm_reduce_add_ph(__m128h __W) {
2051 return __builtin_ia32_reduce_fadd_ph128(-0.0f16, __W);
2052}
2053
2054static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2055_mm_reduce_mul_ph(__m128h __W) {
2056 return __builtin_ia32_reduce_fmul_ph128(1.0f16, __W);
2057}
2058
2059static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2060_mm_reduce_max_ph(__m128h __V) {
2061 return __builtin_ia32_reduce_fmax_ph128(__V);
2062}
2063
2064static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2065_mm_reduce_min_ph(__m128h __V) {
2066 return __builtin_ia32_reduce_fmin_ph128(__V);
2067}
2068
2069// intrinsics below are alias for f*mul_*ch
2070#define _mm_mul_pch(A, B) _mm_fmul_pch(A, B)
2071#define _mm_mask_mul_pch(W, U, A, B) _mm_mask_fmul_pch(W, U, A, B)
2072#define _mm_maskz_mul_pch(U, A, B) _mm_maskz_fmul_pch(U, A, B)
2073#define _mm256_mul_pch(A, B) _mm256_fmul_pch(A, B)
2074#define _mm256_mask_mul_pch(W, U, A, B) _mm256_mask_fmul_pch(W, U, A, B)
2075#define _mm256_maskz_mul_pch(U, A, B) _mm256_maskz_fmul_pch(U, A, B)
2076
2077#define _mm_cmul_pch(A, B) _mm_fcmul_pch(A, B)
2078#define _mm_mask_cmul_pch(W, U, A, B) _mm_mask_fcmul_pch(W, U, A, B)
2079#define _mm_maskz_cmul_pch(U, A, B) _mm_maskz_fcmul_pch(U, A, B)
2080#define _mm256_cmul_pch(A, B) _mm256_fcmul_pch(A, B)
2081#define _mm256_mask_cmul_pch(W, U, A, B) _mm256_mask_fcmul_pch(W, U, A, B)
2082#define _mm256_maskz_cmul_pch(U, A, B) _mm256_maskz_fcmul_pch(U, A, B)
2083
2084#undef __DEFAULT_FN_ATTRS128
2085#undef __DEFAULT_FN_ATTRS256
2086#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
2087#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
2088
2089#endif
2090#endif
__device__ _Float16
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
Definition avx2intrin.h:30
#define __DEFAULT_FN_ATTRS256_CONSTEXPR
Definition avx2intrin.h:29
unsigned char __mmask8
unsigned short __mmask16
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_and_epi32(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_and_epi32(__m128i __a, __m128i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
Definition avxintrin.h:3604
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_undefined_pd(void)
Create a 256-bit vector of [4 x double] with undefined values.
Definition avxintrin.h:3592
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
Definition avxintrin.h:3616
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_ps(float __w)
Constructs a 256-bit floating-point vector of [8 x float], with each of the eight single-precision fl...
Definition avxintrin.h:4185
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition avxintrin.h:4287
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi32(int __i)
Constructs a 256-bit integer vector of [8 x i32], with each of the 32-bit integral vector elements se...
Definition avxintrin.h:4204
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_xor_ps(__m256 __a, __m256 __b)
Performs a bitwise XOR of two 256-bit vectors of [8 x float].
Definition avxintrin.h:664
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition avxintrin.h:4275
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition avxintrin.h:4299
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition emmintrin.h:3878
static __inline__ void int __a
Definition emmintrin.h:4077
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition emmintrin.h:3493
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] with unspecified content.
Definition emmintrin.h:1765
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition emmintrin.h:1867
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value.
Definition emmintrin.h:3709
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_undefined_ps(void)
Create a 128-bit vector of [4 x float] with undefined values.
Definition xmmintrin.h:1890
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_xor_ps(__m128 __a, __m128 __b)
Performs a bitwise exclusive OR of two 128-bit vectors of [4 x float].
Definition xmmintrin.h:480
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_ps(float __w)
Constructs a 128-bit floating-point vector of [4 x float], with each of the four single-precision flo...
Definition xmmintrin.h:1927
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition xmmintrin.h:2012