clang  14.0.0git
avx512fp16intrin.h
Go to the documentation of this file.
1 /*===----------- avx512fp16intrin.h - AVX512-FP16 intrinsics ---------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 #ifndef __IMMINTRIN_H
10 #error "Never use <avx512fp16intrin.h> directly; include <immintrin.h> instead."
11 #endif
12 
13 #ifndef __AVX512FP16INTRIN_H
14 #define __AVX512FP16INTRIN_H
15 
16 /* Define the default attributes for the functions in this file. */
17 typedef _Float16 __v32hf __attribute__((__vector_size__(64), __aligned__(64)));
18 typedef _Float16 __m512h __attribute__((__vector_size__(64), __aligned__(64)));
19 typedef _Float16 __m512h_u __attribute__((__vector_size__(64), __aligned__(1)));
20 typedef _Float16 __v8hf __attribute__((__vector_size__(16), __aligned__(16)));
21 typedef _Float16 __m128h __attribute__((__vector_size__(16), __aligned__(16)));
22 typedef _Float16 __m128h_u __attribute__((__vector_size__(16), __aligned__(1)));
23 typedef _Float16 __v16hf __attribute__((__vector_size__(32), __aligned__(32)));
24 typedef _Float16 __m256h __attribute__((__vector_size__(32), __aligned__(32)));
25 typedef _Float16 __m256h_u __attribute__((__vector_size__(32), __aligned__(1)));
26 
27 /* Define the default attributes for the functions in this file. */
28 #define __DEFAULT_FN_ATTRS512 \
29  __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
30  __min_vector_width__(512)))
31 #define __DEFAULT_FN_ATTRS256 \
32  __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
33  __min_vector_width__(256)))
34 #define __DEFAULT_FN_ATTRS128 \
35  __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
36  __min_vector_width__(128)))
37 
38 static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_cvtsh_h(__m512h __a) {
39  return __a[0];
40 }
41 
42 static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_setzero_ph(void) {
43  return (__m128h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
44 }
45 
46 static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_setzero_ph(void) {
47  return (__m256h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
48  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
49 }
50 
51 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_undefined_ph(void) {
52  return (__m256h)__builtin_ia32_undef256();
53 }
54 
55 static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_setzero_ph(void) {
56  return (__m512h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
57  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
58  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
59 }
60 
61 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_undefined_ph(void) {
62  return (__m128h)__builtin_ia32_undef128();
63 }
64 
65 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_undefined_ph(void) {
66  return (__m512h)__builtin_ia32_undef512();
67 }
68 
69 static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_set1_ph(_Float16 __h) {
70  return (__m512h)(__v32hf){__h, __h, __h, __h, __h, __h, __h, __h,
71  __h, __h, __h, __h, __h, __h, __h, __h,
72  __h, __h, __h, __h, __h, __h, __h, __h,
73  __h, __h, __h, __h, __h, __h, __h, __h};
74 }
75 
76 static __inline __m512h __DEFAULT_FN_ATTRS512
78  _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
79  _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12,
80  _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16,
81  _Float16 __h17, _Float16 __h18, _Float16 __h19, _Float16 __h20,
82  _Float16 __h21, _Float16 __h22, _Float16 __h23, _Float16 __h24,
83  _Float16 __h25, _Float16 __h26, _Float16 __h27, _Float16 __h28,
84  _Float16 __h29, _Float16 __h30, _Float16 __h31, _Float16 __h32) {
85  return (__m512h)(__v32hf){__h32, __h31, __h30, __h29, __h28, __h27, __h26,
86  __h25, __h24, __h23, __h22, __h21, __h20, __h19,
87  __h18, __h17, __h16, __h15, __h14, __h13, __h12,
88  __h11, __h10, __h9, __h8, __h7, __h6, __h5,
89  __h4, __h3, __h2, __h1};
90 }
91 
92 #define _mm512_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, \
93  h14, h15, h16, h17, h18, h19, h20, h21, h22, h23, h24, \
94  h25, h26, h27, h28, h29, h30, h31, h32) \
95  _mm512_set_ph((h32), (h31), (h30), (h29), (h28), (h27), (h26), (h25), (h24), \
96  (h23), (h22), (h21), (h20), (h19), (h18), (h17), (h16), (h15), \
97  (h14), (h13), (h12), (h11), (h10), (h9), (h8), (h7), (h6), \
98  (h5), (h4), (h3), (h2), (h1))
99 
100 static __inline __m512h __DEFAULT_FN_ATTRS512
102  return (__m512h)_mm512_set1_ps(__builtin_bit_cast(float, h));
103 }
104 
105 static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_castph_ps(__m128h __a) {
106  return (__m128)__a;
107 }
108 
109 static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_castph_ps(__m256h __a) {
110  return (__m256)__a;
111 }
112 
113 static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castph_ps(__m512h __a) {
114  return (__m512)__a;
115 }
116 
117 static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_castph_pd(__m128h __a) {
118  return (__m128d)__a;
119 }
120 
121 static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_castph_pd(__m256h __a) {
122  return (__m256d)__a;
123 }
124 
125 static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castph_pd(__m512h __a) {
126  return (__m512d)__a;
127 }
128 
129 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_castph_si128(__m128h __a) {
130  return (__m128i)__a;
131 }
132 
133 static __inline__ __m256i __DEFAULT_FN_ATTRS256
135  return (__m256i)__a;
136 }
137 
138 static __inline__ __m512i __DEFAULT_FN_ATTRS512
140  return (__m512i)__a;
141 }
142 
143 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castps_ph(__m128 __a) {
144  return (__m128h)__a;
145 }
146 
147 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castps_ph(__m256 __a) {
148  return (__m256h)__a;
149 }
150 
151 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castps_ph(__m512 __a) {
152  return (__m512h)__a;
153 }
154 
155 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castpd_ph(__m128d __a) {
156  return (__m128h)__a;
157 }
158 
159 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castpd_ph(__m256d __a) {
160  return (__m256h)__a;
161 }
162 
163 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castpd_ph(__m512d __a) {
164  return (__m512h)__a;
165 }
166 
167 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castsi128_ph(__m128i __a) {
168  return (__m128h)__a;
169 }
170 
171 static __inline__ __m256h __DEFAULT_FN_ATTRS256
173  return (__m256h)__a;
174 }
175 
176 static __inline__ __m512h __DEFAULT_FN_ATTRS512
178  return (__m512h)__a;
179 }
180 
181 static __inline__ __m128h __DEFAULT_FN_ATTRS256
183  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
184 }
185 
186 static __inline__ __m128h __DEFAULT_FN_ATTRS512
188  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
189 }
190 
191 static __inline__ __m256h __DEFAULT_FN_ATTRS512
193  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
194  12, 13, 14, 15);
195 }
196 
197 static __inline__ __m256h __DEFAULT_FN_ATTRS256
199  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1,
200  -1, -1, -1, -1, -1);
201 }
202 
203 static __inline__ __m512h __DEFAULT_FN_ATTRS512
205  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1,
206  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
207  -1, -1, -1, -1, -1, -1, -1, -1, -1);
208 }
209 
210 static __inline__ __m512h __DEFAULT_FN_ATTRS512
212  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
213  12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1,
214  -1, -1, -1, -1, -1, -1, -1, -1);
215 }
216 
217 /// Constructs a 256-bit floating-point vector of [16 x half] from a
218 /// 128-bit floating-point vector of [8 x half]. The lower 128 bits
219 /// contain the value of the source vector. The upper 384 bits are set
220 /// to zero.
221 ///
222 /// \headerfile <x86intrin.h>
223 ///
224 /// This intrinsic has no corresponding instruction.
225 ///
226 /// \param __a
227 /// A 128-bit vector of [8 x half].
228 /// \returns A 512-bit floating-point vector of [16 x half]. The lower 128 bits
229 /// contain the value of the parameter. The upper 384 bits are set to zero.
230 static __inline__ __m256h __DEFAULT_FN_ATTRS256
232  return __builtin_shufflevector(__a, (__v8hf)_mm_setzero_ph(), 0, 1, 2, 3, 4,
233  5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
234 }
235 
236 /// Constructs a 512-bit floating-point vector of [32 x half] from a
237 /// 128-bit floating-point vector of [8 x half]. The lower 128 bits
238 /// contain the value of the source vector. The upper 384 bits are set
239 /// to zero.
240 ///
241 /// \headerfile <x86intrin.h>
242 ///
243 /// This intrinsic has no corresponding instruction.
244 ///
245 /// \param __a
246 /// A 128-bit vector of [8 x half].
247 /// \returns A 512-bit floating-point vector of [32 x half]. The lower 128 bits
248 /// contain the value of the parameter. The upper 384 bits are set to zero.
249 static __inline__ __m512h __DEFAULT_FN_ATTRS512
251  return __builtin_shufflevector(
252  __a, (__v8hf)_mm_setzero_ph(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
253  13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15);
254 }
255 
256 /// Constructs a 512-bit floating-point vector of [32 x half] from a
257 /// 256-bit floating-point vector of [16 x half]. The lower 256 bits
258 /// contain the value of the source vector. The upper 256 bits are set
259 /// to zero.
260 ///
261 /// \headerfile <x86intrin.h>
262 ///
263 /// This intrinsic has no corresponding instruction.
264 ///
265 /// \param __a
266 /// A 256-bit vector of [16 x half].
267 /// \returns A 512-bit floating-point vector of [32 x half]. The lower 256 bits
268 /// contain the value of the parameter. The upper 256 bits are set to zero.
269 static __inline__ __m512h __DEFAULT_FN_ATTRS512
271  return __builtin_shufflevector(__a, (__v16hf)_mm256_setzero_ph(), 0, 1, 2, 3,
272  4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
273  17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
274  29, 30, 31);
275 }
276 
277 #define _mm_comi_round_sh(A, B, P, R) \
278  __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, (int)(P), (int)(R))
279 
280 #define _mm_comi_sh(A, B, pred) \
281  _mm_comi_round_sh((A), (B), (pred), _MM_FROUND_CUR_DIRECTION)
282 
283 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sh(__m128h A,
284  __m128h B) {
285  return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_EQ_OS,
287 }
288 
289 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sh(__m128h A,
290  __m128h B) {
291  return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LT_OS,
293 }
294 
295 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sh(__m128h A,
296  __m128h B) {
297  return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LE_OS,
299 }
300 
301 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sh(__m128h A,
302  __m128h B) {
303  return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GT_OS,
305 }
306 
307 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sh(__m128h A,
308  __m128h B) {
309  return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GE_OS,
311 }
312 
313 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sh(__m128h A,
314  __m128h B) {
315  return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_NEQ_US,
317 }
318 
319 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomieq_sh(__m128h A,
320  __m128h B) {
321  return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_EQ_OQ,
323 }
324 
325 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomilt_sh(__m128h A,
326  __m128h B) {
327  return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LT_OQ,
329 }
330 
331 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomile_sh(__m128h A,
332  __m128h B) {
333  return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LE_OQ,
335 }
336 
337 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomigt_sh(__m128h A,
338  __m128h B) {
339  return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GT_OQ,
341 }
342 
343 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomige_sh(__m128h A,
344  __m128h B) {
345  return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GE_OQ,
347 }
348 
349 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomineq_sh(__m128h A,
350  __m128h B) {
351  return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_NEQ_UQ,
353 }
354 
355 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_add_ph(__m512h __A,
356  __m512h __B) {
357  return (__m512h)((__v32hf)__A + (__v32hf)__B);
358 }
359 
360 static __inline__ __m512h __DEFAULT_FN_ATTRS512
361 _mm512_mask_add_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
362  return (__m512h)__builtin_ia32_selectph_512(
363  (__mmask32)__U, (__v32hf)_mm512_add_ph(__A, __B), (__v32hf)__W);
364 }
365 
366 static __inline__ __m512h __DEFAULT_FN_ATTRS512
367 _mm512_maskz_add_ph(__mmask32 __U, __m512h __A, __m512h __B) {
368  return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
369  (__v32hf)_mm512_add_ph(__A, __B),
370  (__v32hf)_mm512_setzero_ph());
371 }
372 
373 #define _mm512_add_round_ph(A, B, R) \
374  ((__m512h)__builtin_ia32_addph512((__v32hf)(__m512h)(A), \
375  (__v32hf)(__m512h)(B), (int)(R)))
376 
377 #define _mm512_mask_add_round_ph(W, U, A, B, R) \
378  ((__m512h)__builtin_ia32_selectph_512( \
379  (__mmask32)(U), (__v32hf)_mm512_add_round_ph((A), (B), (R)), \
380  (__v32hf)(__m512h)(W)))
381 
382 #define _mm512_maskz_add_round_ph(U, A, B, R) \
383  ((__m512h)__builtin_ia32_selectph_512( \
384  (__mmask32)(U), (__v32hf)_mm512_add_round_ph((A), (B), (R)), \
385  (__v32hf)_mm512_setzero_ph()))
386 
387 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_sub_ph(__m512h __A,
388  __m512h __B) {
389  return (__m512h)((__v32hf)__A - (__v32hf)__B);
390 }
391 
392 static __inline__ __m512h __DEFAULT_FN_ATTRS512
393 _mm512_mask_sub_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
394  return (__m512h)__builtin_ia32_selectph_512(
395  (__mmask32)__U, (__v32hf)_mm512_sub_ph(__A, __B), (__v32hf)__W);
396 }
397 
398 static __inline__ __m512h __DEFAULT_FN_ATTRS512
399 _mm512_maskz_sub_ph(__mmask32 __U, __m512h __A, __m512h __B) {
400  return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
401  (__v32hf)_mm512_sub_ph(__A, __B),
402  (__v32hf)_mm512_setzero_ph());
403 }
404 
405 #define _mm512_sub_round_ph(A, B, R) \
406  ((__m512h)__builtin_ia32_subph512((__v32hf)(__m512h)(A), \
407  (__v32hf)(__m512h)(B), (int)(R)))
408 
409 #define _mm512_mask_sub_round_ph(W, U, A, B, R) \
410  ((__m512h)__builtin_ia32_selectph_512( \
411  (__mmask32)(U), (__v32hf)_mm512_sub_round_ph((A), (B), (R)), \
412  (__v32hf)(__m512h)(W)))
413 
414 #define _mm512_maskz_sub_round_ph(U, A, B, R) \
415  ((__m512h)__builtin_ia32_selectph_512( \
416  (__mmask32)(U), (__v32hf)_mm512_sub_round_ph((A), (B), (R)), \
417  (__v32hf)_mm512_setzero_ph()))
418 
419 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mul_ph(__m512h __A,
420  __m512h __B) {
421  return (__m512h)((__v32hf)__A * (__v32hf)__B);
422 }
423 
424 static __inline__ __m512h __DEFAULT_FN_ATTRS512
425 _mm512_mask_mul_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
426  return (__m512h)__builtin_ia32_selectph_512(
427  (__mmask32)__U, (__v32hf)_mm512_mul_ph(__A, __B), (__v32hf)__W);
428 }
429 
430 static __inline__ __m512h __DEFAULT_FN_ATTRS512
431 _mm512_maskz_mul_ph(__mmask32 __U, __m512h __A, __m512h __B) {
432  return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
433  (__v32hf)_mm512_mul_ph(__A, __B),
434  (__v32hf)_mm512_setzero_ph());
435 }
436 
437 #define _mm512_mul_round_ph(A, B, R) \
438  ((__m512h)__builtin_ia32_mulph512((__v32hf)(__m512h)(A), \
439  (__v32hf)(__m512h)(B), (int)(R)))
440 
441 #define _mm512_mask_mul_round_ph(W, U, A, B, R) \
442  ((__m512h)__builtin_ia32_selectph_512( \
443  (__mmask32)(U), (__v32hf)_mm512_mul_round_ph((A), (B), (R)), \
444  (__v32hf)(__m512h)(W)))
445 
446 #define _mm512_maskz_mul_round_ph(U, A, B, R) \
447  ((__m512h)__builtin_ia32_selectph_512( \
448  (__mmask32)(U), (__v32hf)_mm512_mul_round_ph((A), (B), (R)), \
449  (__v32hf)_mm512_setzero_ph()))
450 
451 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_div_ph(__m512h __A,
452  __m512h __B) {
453  return (__m512h)((__v32hf)__A / (__v32hf)__B);
454 }
455 
456 static __inline__ __m512h __DEFAULT_FN_ATTRS512
457 _mm512_mask_div_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
458  return (__m512h)__builtin_ia32_selectph_512(
459  (__mmask32)__U, (__v32hf)_mm512_div_ph(__A, __B), (__v32hf)__W);
460 }
461 
462 static __inline__ __m512h __DEFAULT_FN_ATTRS512
463 _mm512_maskz_div_ph(__mmask32 __U, __m512h __A, __m512h __B) {
464  return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
465  (__v32hf)_mm512_div_ph(__A, __B),
466  (__v32hf)_mm512_setzero_ph());
467 }
468 
469 #define _mm512_div_round_ph(A, B, R) \
470  ((__m512h)__builtin_ia32_divph512((__v32hf)(__m512h)(A), \
471  (__v32hf)(__m512h)(B), (int)(R)))
472 
473 #define _mm512_mask_div_round_ph(W, U, A, B, R) \
474  ((__m512h)__builtin_ia32_selectph_512( \
475  (__mmask32)(U), (__v32hf)_mm512_div_round_ph((A), (B), (R)), \
476  (__v32hf)(__m512h)(W)))
477 
478 #define _mm512_maskz_div_round_ph(U, A, B, R) \
479  ((__m512h)__builtin_ia32_selectph_512( \
480  (__mmask32)(U), (__v32hf)_mm512_div_round_ph((A), (B), (R)), \
481  (__v32hf)_mm512_setzero_ph()))
482 
483 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_min_ph(__m512h __A,
484  __m512h __B) {
485  return (__m512h)__builtin_ia32_minph512((__v32hf)__A, (__v32hf)__B,
487 }
488 
489 static __inline__ __m512h __DEFAULT_FN_ATTRS512
490 _mm512_mask_min_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
491  return (__m512h)__builtin_ia32_selectph_512(
492  (__mmask32)__U, (__v32hf)_mm512_min_ph(__A, __B), (__v32hf)__W);
493 }
494 
495 static __inline__ __m512h __DEFAULT_FN_ATTRS512
496 _mm512_maskz_min_ph(__mmask32 __U, __m512h __A, __m512h __B) {
497  return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
498  (__v32hf)_mm512_min_ph(__A, __B),
499  (__v32hf)_mm512_setzero_ph());
500 }
501 
502 #define _mm512_min_round_ph(A, B, R) \
503  ((__m512h)__builtin_ia32_minph512((__v32hf)(__m512h)(A), \
504  (__v32hf)(__m512h)(B), (int)(R)))
505 
506 #define _mm512_mask_min_round_ph(W, U, A, B, R) \
507  ((__m512h)__builtin_ia32_selectph_512( \
508  (__mmask32)(U), (__v32hf)_mm512_min_round_ph((A), (B), (R)), \
509  (__v32hf)(__m512h)(W)))
510 
511 #define _mm512_maskz_min_round_ph(U, A, B, R) \
512  ((__m512h)__builtin_ia32_selectph_512( \
513  (__mmask32)(U), (__v32hf)_mm512_min_round_ph((A), (B), (R)), \
514  (__v32hf)_mm512_setzero_ph()))
515 
516 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_max_ph(__m512h __A,
517  __m512h __B) {
518  return (__m512h)__builtin_ia32_maxph512((__v32hf)__A, (__v32hf)__B,
520 }
521 
522 static __inline__ __m512h __DEFAULT_FN_ATTRS512
523 _mm512_mask_max_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
524  return (__m512h)__builtin_ia32_selectph_512(
525  (__mmask32)__U, (__v32hf)_mm512_max_ph(__A, __B), (__v32hf)__W);
526 }
527 
528 static __inline__ __m512h __DEFAULT_FN_ATTRS512
529 _mm512_maskz_max_ph(__mmask32 __U, __m512h __A, __m512h __B) {
530  return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
531  (__v32hf)_mm512_max_ph(__A, __B),
532  (__v32hf)_mm512_setzero_ph());
533 }
534 
535 #define _mm512_max_round_ph(A, B, R) \
536  ((__m512h)__builtin_ia32_maxph512((__v32hf)(__m512h)(A), \
537  (__v32hf)(__m512h)(B), (int)(R)))
538 
539 #define _mm512_mask_max_round_ph(W, U, A, B, R) \
540  ((__m512h)__builtin_ia32_selectph_512( \
541  (__mmask32)(U), (__v32hf)_mm512_max_round_ph((A), (B), (R)), \
542  (__v32hf)(__m512h)(W)))
543 
544 #define _mm512_maskz_max_round_ph(U, A, B, R) \
545  ((__m512h)__builtin_ia32_selectph_512( \
546  (__mmask32)(U), (__v32hf)_mm512_max_round_ph((A), (B), (R)), \
547  (__v32hf)_mm512_setzero_ph()))
548 
549 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_abs_ph(__m512h __A) {
550  return (__m512h)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF), (__m512i)__A);
551 }
552 
553 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_conj_pch(__m512h __A) {
554  return (__m512h)_mm512_xor_ps((__m512)__A, _mm512_set1_ps(-0.0f));
555 }
556 
557 static __inline__ __m512h __DEFAULT_FN_ATTRS512
558 _mm512_mask_conj_pch(__m512h __W, __mmask16 __U, __m512h __A) {
559  return (__m512h)__builtin_ia32_selectps_512(
560  (__mmask16)__U, (__v16sf)_mm512_conj_pch(__A), (__v16sf)__W);
561 }
562 
563 static __inline__ __m512h __DEFAULT_FN_ATTRS512
564 _mm512_maskz_conj_pch(__mmask16 __U, __m512h __A) {
565  return (__m512h)__builtin_ia32_selectps_512((__mmask16)__U,
566  (__v16sf)_mm512_conj_pch(__A),
567  (__v16sf)_mm512_setzero_ps());
568 }
569 
570 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_sh(__m128h __A,
571  __m128h __B) {
572  __A[0] += __B[0];
573  return __A;
574 }
575 
576 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_sh(__m128h __W,
577  __mmask8 __U,
578  __m128h __A,
579  __m128h __B) {
580  __A = _mm_add_sh(__A, __B);
581  return __builtin_ia32_selectsh_128(__U, __A, __W);
582 }
583 
584 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_sh(__mmask8 __U,
585  __m128h __A,
586  __m128h __B) {
587  __A = _mm_add_sh(__A, __B);
588  return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
589 }
590 
591 #define _mm_add_round_sh(A, B, R) \
592  ((__m128h)__builtin_ia32_addsh_round_mask( \
593  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
594  (__mmask8)-1, (int)(R)))
595 
596 #define _mm_mask_add_round_sh(W, U, A, B, R) \
597  ((__m128h)__builtin_ia32_addsh_round_mask( \
598  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
599  (__mmask8)(U), (int)(R)))
600 
601 #define _mm_maskz_add_round_sh(U, A, B, R) \
602  ((__m128h)__builtin_ia32_addsh_round_mask( \
603  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
604  (__mmask8)(U), (int)(R)))
605 
606 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_sh(__m128h __A,
607  __m128h __B) {
608  __A[0] -= __B[0];
609  return __A;
610 }
611 
612 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_sh(__m128h __W,
613  __mmask8 __U,
614  __m128h __A,
615  __m128h __B) {
616  __A = _mm_sub_sh(__A, __B);
617  return __builtin_ia32_selectsh_128(__U, __A, __W);
618 }
619 
620 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sh(__mmask8 __U,
621  __m128h __A,
622  __m128h __B) {
623  __A = _mm_sub_sh(__A, __B);
624  return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
625 }
626 
627 #define _mm_sub_round_sh(A, B, R) \
628  ((__m128h)__builtin_ia32_subsh_round_mask( \
629  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
630  (__mmask8)-1, (int)(R)))
631 
632 #define _mm_mask_sub_round_sh(W, U, A, B, R) \
633  ((__m128h)__builtin_ia32_subsh_round_mask( \
634  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
635  (__mmask8)(U), (int)(R)))
636 
637 #define _mm_maskz_sub_round_sh(U, A, B, R) \
638  ((__m128h)__builtin_ia32_subsh_round_mask( \
639  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
640  (__mmask8)(U), (int)(R)))
641 
642 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_sh(__m128h __A,
643  __m128h __B) {
644  __A[0] *= __B[0];
645  return __A;
646 }
647 
648 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_sh(__m128h __W,
649  __mmask8 __U,
650  __m128h __A,
651  __m128h __B) {
652  __A = _mm_mul_sh(__A, __B);
653  return __builtin_ia32_selectsh_128(__U, __A, __W);
654 }
655 
656 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sh(__mmask8 __U,
657  __m128h __A,
658  __m128h __B) {
659  __A = _mm_mul_sh(__A, __B);
660  return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
661 }
662 
663 #define _mm_mul_round_sh(A, B, R) \
664  ((__m128h)__builtin_ia32_mulsh_round_mask( \
665  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
666  (__mmask8)-1, (int)(R)))
667 
668 #define _mm_mask_mul_round_sh(W, U, A, B, R) \
669  ((__m128h)__builtin_ia32_mulsh_round_mask( \
670  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
671  (__mmask8)(U), (int)(R)))
672 
673 #define _mm_maskz_mul_round_sh(U, A, B, R) \
674  ((__m128h)__builtin_ia32_mulsh_round_mask( \
675  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
676  (__mmask8)(U), (int)(R)))
677 
678 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_sh(__m128h __A,
679  __m128h __B) {
680  __A[0] /= __B[0];
681  return __A;
682 }
683 
684 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_sh(__m128h __W,
685  __mmask8 __U,
686  __m128h __A,
687  __m128h __B) {
688  __A = _mm_div_sh(__A, __B);
689  return __builtin_ia32_selectsh_128(__U, __A, __W);
690 }
691 
692 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_sh(__mmask8 __U,
693  __m128h __A,
694  __m128h __B) {
695  __A = _mm_div_sh(__A, __B);
696  return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
697 }
698 
699 #define _mm_div_round_sh(A, B, R) \
700  ((__m128h)__builtin_ia32_divsh_round_mask( \
701  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
702  (__mmask8)-1, (int)(R)))
703 
704 #define _mm_mask_div_round_sh(W, U, A, B, R) \
705  ((__m128h)__builtin_ia32_divsh_round_mask( \
706  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
707  (__mmask8)(U), (int)(R)))
708 
709 #define _mm_maskz_div_round_sh(U, A, B, R) \
710  ((__m128h)__builtin_ia32_divsh_round_mask( \
711  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
712  (__mmask8)(U), (int)(R)))
713 
714 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_sh(__m128h __A,
715  __m128h __B) {
716  return (__m128h)__builtin_ia32_minsh_round_mask(
717  (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
719 }
720 
721 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_sh(__m128h __W,
722  __mmask8 __U,
723  __m128h __A,
724  __m128h __B) {
725  return (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)__A, (__v8hf)__B,
726  (__v8hf)__W, (__mmask8)__U,
728 }
729 
730 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_sh(__mmask8 __U,
731  __m128h __A,
732  __m128h __B) {
733  return (__m128h)__builtin_ia32_minsh_round_mask(
734  (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
736 }
737 
738 #define _mm_min_round_sh(A, B, R) \
739  ((__m128h)__builtin_ia32_minsh_round_mask( \
740  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
741  (__mmask8)-1, (int)(R)))
742 
743 #define _mm_mask_min_round_sh(W, U, A, B, R) \
744  ((__m128h)__builtin_ia32_minsh_round_mask( \
745  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
746  (__mmask8)(U), (int)(R)))
747 
748 #define _mm_maskz_min_round_sh(U, A, B, R) \
749  ((__m128h)__builtin_ia32_minsh_round_mask( \
750  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
751  (__mmask8)(U), (int)(R)))
752 
753 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_sh(__m128h __A,
754  __m128h __B) {
755  return (__m128h)__builtin_ia32_maxsh_round_mask(
756  (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
758 }
759 
760 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_sh(__m128h __W,
761  __mmask8 __U,
762  __m128h __A,
763  __m128h __B) {
764  return (__m128h)__builtin_ia32_maxsh_round_mask((__v8hf)__A, (__v8hf)__B,
765  (__v8hf)__W, (__mmask8)__U,
767 }
768 
769 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_sh(__mmask8 __U,
770  __m128h __A,
771  __m128h __B) {
772  return (__m128h)__builtin_ia32_maxsh_round_mask(
773  (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
775 }
776 
777 #define _mm_max_round_sh(A, B, R) \
778  ((__m128h)__builtin_ia32_maxsh_round_mask( \
779  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
780  (__mmask8)-1, (int)(R)))
781 
782 #define _mm_mask_max_round_sh(W, U, A, B, R) \
783  ((__m128h)__builtin_ia32_maxsh_round_mask( \
784  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
785  (__mmask8)(U), (int)(R)))
786 
787 #define _mm_maskz_max_round_sh(U, A, B, R) \
788  ((__m128h)__builtin_ia32_maxsh_round_mask( \
789  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
790  (__mmask8)(U), (int)(R)))
791 
792 #define _mm512_cmp_round_ph_mask(A, B, P, R) \
793  ((__mmask32)__builtin_ia32_cmpph512_mask((__v32hf)(__m512h)(A), \
794  (__v32hf)(__m512h)(B), (int)(P), \
795  (__mmask32)-1, (int)(R)))
796 
797 #define _mm512_mask_cmp_round_ph_mask(U, A, B, P, R) \
798  ((__mmask32)__builtin_ia32_cmpph512_mask((__v32hf)(__m512h)(A), \
799  (__v32hf)(__m512h)(B), (int)(P), \
800  (__mmask32)(U), (int)(R)))
801 
802 #define _mm512_cmp_ph_mask(A, B, P) \
803  _mm512_cmp_round_ph_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
804 
805 #define _mm512_mask_cmp_ph_mask(U, A, B, P) \
806  _mm512_mask_cmp_round_ph_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
807 
808 #define _mm_cmp_round_sh_mask(X, Y, P, R) \
809  ((__mmask8)__builtin_ia32_cmpsh_mask((__v8hf)(__m128h)(X), \
810  (__v8hf)(__m128h)(Y), (int)(P), \
811  (__mmask8)-1, (int)(R)))
812 
813 #define _mm_mask_cmp_round_sh_mask(M, X, Y, P, R) \
814  ((__mmask8)__builtin_ia32_cmpsh_mask((__v8hf)(__m128h)(X), \
815  (__v8hf)(__m128h)(Y), (int)(P), \
816  (__mmask8)(M), (int)(R)))
817 
818 #define _mm_cmp_sh_mask(X, Y, P) \
819  ((__mmask8)__builtin_ia32_cmpsh_mask( \
820  (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(P), (__mmask8)-1, \
821  _MM_FROUND_CUR_DIRECTION))
822 
823 #define _mm_mask_cmp_sh_mask(M, X, Y, P) \
824  ((__mmask8)__builtin_ia32_cmpsh_mask( \
825  (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(P), (__mmask8)(M), \
826  _MM_FROUND_CUR_DIRECTION))
827 // loads with vmovsh:
828 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_load_sh(void const *__dp) {
829  struct __mm_load_sh_struct {
830  _Float16 __u;
831  } __attribute__((__packed__, __may_alias__));
832  _Float16 __u = ((struct __mm_load_sh_struct *)__dp)->__u;
833  return (__m128h){__u, 0, 0, 0, 0, 0, 0, 0};
834 }
835 
836 static __inline__ __m128h __DEFAULT_FN_ATTRS128
837 _mm_mask_load_sh(__m128h __W, __mmask8 __U, const void *__A) {
838  __m128h src = (__v8hf)__builtin_shufflevector(
839  (__v8hf)__W, (__v8hf)_mm_setzero_ph(), 0, 8, 8, 8, 8, 8, 8, 8);
840 
841  return (__m128h)__builtin_ia32_loadsh128_mask((__v8hf *)__A, src, __U & 1);
842 }
843 
844 static __inline__ __m128h __DEFAULT_FN_ATTRS128
845 _mm_maskz_load_sh(__mmask8 __U, const void *__A) {
846  return (__m128h)__builtin_ia32_loadsh128_mask(
847  (__v8hf *)__A, (__v8hf)_mm_setzero_ph(), __U & 1);
848 }
849 
850 static __inline__ __m512h __DEFAULT_FN_ATTRS512
851 _mm512_load_ph(void const *__p) {
852  return *(const __m512h *)__p;
853 }
854 
855 static __inline__ __m256h __DEFAULT_FN_ATTRS256
856 _mm256_load_ph(void const *__p) {
857  return *(const __m256h *)__p;
858 }
859 
860 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_load_ph(void const *__p) {
861  return *(const __m128h *)__p;
862 }
863 
864 static __inline__ __m512h __DEFAULT_FN_ATTRS512
865 _mm512_loadu_ph(void const *__p) {
866  struct __loadu_ph {
867  __m512h_u __v;
868  } __attribute__((__packed__, __may_alias__));
869  return ((const struct __loadu_ph *)__p)->__v;
870 }
871 
872 static __inline__ __m256h __DEFAULT_FN_ATTRS256
873 _mm256_loadu_ph(void const *__p) {
874  struct __loadu_ph {
875  __m256h_u __v;
876  } __attribute__((__packed__, __may_alias__));
877  return ((const struct __loadu_ph *)__p)->__v;
878 }
879 
880 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_loadu_ph(void const *__p) {
881  struct __loadu_ph {
882  __m128h_u __v;
883  } __attribute__((__packed__, __may_alias__));
884  return ((const struct __loadu_ph *)__p)->__v;
885 }
886 
887 // stores with vmovsh:
888 static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_sh(void *__dp,
889  __m128h __a) {
890  struct __mm_store_sh_struct {
891  _Float16 __u;
892  } __attribute__((__packed__, __may_alias__));
893  ((struct __mm_store_sh_struct *)__dp)->__u = __a[0];
894 }
895 
896 static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sh(void *__W,
897  __mmask8 __U,
898  __m128h __A) {
899  __builtin_ia32_storesh128_mask((__v8hf *)__W, __A, __U & 1);
900 }
901 
902 static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_store_ph(void *__P,
903  __m512h __A) {
904  *(__m512h *)__P = __A;
905 }
906 
907 static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_store_ph(void *__P,
908  __m256h __A) {
909  *(__m256h *)__P = __A;
910 }
911 
912 static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_ph(void *__P,
913  __m128h __A) {
914  *(__m128h *)__P = __A;
915 }
916 
917 static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_storeu_ph(void *__P,
918  __m512h __A) {
919  struct __storeu_ph {
920  __m512h_u __v;
921  } __attribute__((__packed__, __may_alias__));
922  ((struct __storeu_ph *)__P)->__v = __A;
923 }
924 
925 static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_storeu_ph(void *__P,
926  __m256h __A) {
927  struct __storeu_ph {
928  __m256h_u __v;
929  } __attribute__((__packed__, __may_alias__));
930  ((struct __storeu_ph *)__P)->__v = __A;
931 }
932 
933 static __inline__ void __DEFAULT_FN_ATTRS128 _mm_storeu_ph(void *__P,
934  __m128h __A) {
935  struct __storeu_ph {
936  __m128h_u __v;
937  } __attribute__((__packed__, __may_alias__));
938  ((struct __storeu_ph *)__P)->__v = __A;
939 }
940 
941 // moves with vmovsh:
942 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_move_sh(__m128h __a,
943  __m128h __b) {
944  __a[0] = __b[0];
945  return __a;
946 }
947 
948 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_move_sh(__m128h __W,
949  __mmask8 __U,
950  __m128h __A,
951  __m128h __B) {
952  return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B), __W);
953 }
954 
955 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_move_sh(__mmask8 __U,
956  __m128h __A,
957  __m128h __B) {
958  return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B),
959  _mm_setzero_ph());
960 }
961 
962 // vmovw:
963 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsi16_si128(short __a) {
964  return (__m128i)(__v8hi){__a, 0, 0, 0, 0, 0, 0, 0};
965 }
966 
967 static __inline__ short __DEFAULT_FN_ATTRS128 _mm_cvtsi128_si16(__m128i __a) {
968  __v8hi __b = (__v8hi)__a;
969  return __b[0];
970 }
971 
972 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_rcp_ph(__m512h __A) {
973  return (__m512h)__builtin_ia32_rcpph512_mask(
974  (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1);
975 }
976 
977 static __inline__ __m512h __DEFAULT_FN_ATTRS512
978 _mm512_mask_rcp_ph(__m512h __W, __mmask32 __U, __m512h __A) {
979  return (__m512h)__builtin_ia32_rcpph512_mask((__v32hf)__A, (__v32hf)__W,
980  (__mmask32)__U);
981 }
982 
983 static __inline__ __m512h __DEFAULT_FN_ATTRS512
984 _mm512_maskz_rcp_ph(__mmask32 __U, __m512h __A) {
985  return (__m512h)__builtin_ia32_rcpph512_mask(
986  (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U);
987 }
988 
989 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_rsqrt_ph(__m512h __A) {
990  return (__m512h)__builtin_ia32_rsqrtph512_mask(
991  (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1);
992 }
993 
994 static __inline__ __m512h __DEFAULT_FN_ATTRS512
995 _mm512_mask_rsqrt_ph(__m512h __W, __mmask32 __U, __m512h __A) {
996  return (__m512h)__builtin_ia32_rsqrtph512_mask((__v32hf)__A, (__v32hf)__W,
997  (__mmask32)__U);
998 }
999 
1000 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1002  return (__m512h)__builtin_ia32_rsqrtph512_mask(
1003  (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U);
1004 }
1005 
1006 #define _mm512_getmant_ph(A, B, C) \
1007  ((__m512h)__builtin_ia32_getmantph512_mask( \
1008  (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
1009  (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, \
1010  _MM_FROUND_CUR_DIRECTION))
1011 
1012 #define _mm512_mask_getmant_ph(W, U, A, B, C) \
1013  ((__m512h)__builtin_ia32_getmantph512_mask( \
1014  (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), (__v32hf)(__m512h)(W), \
1015  (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
1016 
1017 #define _mm512_maskz_getmant_ph(U, A, B, C) \
1018  ((__m512h)__builtin_ia32_getmantph512_mask( \
1019  (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
1020  (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
1021 
1022 #define _mm512_getmant_round_ph(A, B, C, R) \
1023  ((__m512h)__builtin_ia32_getmantph512_mask( \
1024  (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
1025  (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, (int)(R)))
1026 
1027 #define _mm512_mask_getmant_round_ph(W, U, A, B, C, R) \
1028  ((__m512h)__builtin_ia32_getmantph512_mask( \
1029  (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), (__v32hf)(__m512h)(W), \
1030  (__mmask32)(U), (int)(R)))
1031 
1032 #define _mm512_maskz_getmant_round_ph(U, A, B, C, R) \
1033  ((__m512h)__builtin_ia32_getmantph512_mask( \
1034  (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
1035  (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
1036 
1037 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_getexp_ph(__m512h __A) {
1038  return (__m512h)__builtin_ia32_getexpph512_mask(
1039  (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1,
1041 }
1042 
1043 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1044 _mm512_mask_getexp_ph(__m512h __W, __mmask32 __U, __m512h __A) {
1045  return (__m512h)__builtin_ia32_getexpph512_mask(
1046  (__v32hf)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1047 }
1048 
1049 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1051  return (__m512h)__builtin_ia32_getexpph512_mask(
1052  (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1054 }
1055 
1056 #define _mm512_getexp_round_ph(A, R) \
1057  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
1058  (__v32hf)_mm512_undefined_ph(), \
1059  (__mmask32)-1, (int)(R)))
1060 
1061 #define _mm512_mask_getexp_round_ph(W, U, A, R) \
1062  ((__m512h)__builtin_ia32_getexpph512_mask( \
1063  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(W), (__mmask32)(U), (int)(R)))
1064 
1065 #define _mm512_maskz_getexp_round_ph(U, A, R) \
1066  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
1067  (__v32hf)_mm512_setzero_ph(), \
1068  (__mmask32)(U), (int)(R)))
1069 
1070 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_scalef_ph(__m512h __A,
1071  __m512h __B) {
1072  return (__m512h)__builtin_ia32_scalefph512_mask(
1073  (__v32hf)__A, (__v32hf)__B, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1,
1075 }
1076 
1077 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1078 _mm512_mask_scalef_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
1079  return (__m512h)__builtin_ia32_scalefph512_mask((__v32hf)__A, (__v32hf)__B,
1080  (__v32hf)__W, (__mmask32)__U,
1082 }
1083 
1084 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1085 _mm512_maskz_scalef_ph(__mmask32 __U, __m512h __A, __m512h __B) {
1086  return (__m512h)__builtin_ia32_scalefph512_mask(
1087  (__v32hf)__A, (__v32hf)__B, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1089 }
1090 
1091 #define _mm512_scalef_round_ph(A, B, R) \
1092  ((__m512h)__builtin_ia32_scalefph512_mask( \
1093  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), \
1094  (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, (int)(R)))
1095 
1096 #define _mm512_mask_scalef_round_ph(W, U, A, B, R) \
1097  ((__m512h)__builtin_ia32_scalefph512_mask( \
1098  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(W), \
1099  (__mmask32)(U), (int)(R)))
1100 
1101 #define _mm512_maskz_scalef_round_ph(U, A, B, R) \
1102  ((__m512h)__builtin_ia32_scalefph512_mask( \
1103  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), \
1104  (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
1105 
1106 #define _mm512_roundscale_ph(A, B) \
1107  ((__m512h)__builtin_ia32_rndscaleph_mask( \
1108  (__v32hf)(__m512h)(A), (int)(B), (__v32hf)(__m512h)(A), (__mmask32)-1, \
1109  _MM_FROUND_CUR_DIRECTION))
1110 
1111 #define _mm512_mask_roundscale_ph(A, B, C, imm) \
1112  ((__m512h)__builtin_ia32_rndscaleph_mask( \
1113  (__v32hf)(__m512h)(C), (int)(imm), (__v32hf)(__m512h)(A), \
1114  (__mmask32)(B), _MM_FROUND_CUR_DIRECTION))
1115 
1116 #define _mm512_maskz_roundscale_ph(A, B, imm) \
1117  ((__m512h)__builtin_ia32_rndscaleph_mask( \
1118  (__v32hf)(__m512h)(B), (int)(imm), (__v32hf)_mm512_setzero_ph(), \
1119  (__mmask32)(A), _MM_FROUND_CUR_DIRECTION))
1120 
1121 #define _mm512_mask_roundscale_round_ph(A, B, C, imm, R) \
1122  ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(C), (int)(imm), \
1123  (__v32hf)(__m512h)(A), \
1124  (__mmask32)(B), (int)(R)))
1125 
1126 #define _mm512_maskz_roundscale_round_ph(A, B, imm, R) \
1127  ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(B), (int)(imm), \
1128  (__v32hf)_mm512_setzero_ph(), \
1129  (__mmask32)(A), (int)(R)))
1130 
1131 #define _mm512_roundscale_round_ph(A, imm, R) \
1132  ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(A), (int)(imm), \
1133  (__v32hf)_mm512_undefined_ph(), \
1134  (__mmask32)-1, (int)(R)))
1135 
1136 #define _mm512_reduce_ph(A, imm) \
1137  ((__m512h)__builtin_ia32_reduceph512_mask( \
1138  (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)_mm512_undefined_ph(), \
1139  (__mmask32)-1, _MM_FROUND_CUR_DIRECTION))
1140 
1141 #define _mm512_mask_reduce_ph(W, U, A, imm) \
1142  ((__m512h)__builtin_ia32_reduceph512_mask( \
1143  (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)(__m512h)(W), \
1144  (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
1145 
1146 #define _mm512_maskz_reduce_ph(U, A, imm) \
1147  ((__m512h)__builtin_ia32_reduceph512_mask( \
1148  (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)_mm512_setzero_ph(), \
1149  (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
1150 
1151 #define _mm512_mask_reduce_round_ph(W, U, A, imm, R) \
1152  ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \
1153  (__v32hf)(__m512h)(W), \
1154  (__mmask32)(U), (int)(R)))
1155 
1156 #define _mm512_maskz_reduce_round_ph(U, A, imm, R) \
1157  ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \
1158  (__v32hf)_mm512_setzero_ph(), \
1159  (__mmask32)(U), (int)(R)))
1160 
1161 #define _mm512_reduce_round_ph(A, imm, R) \
1162  ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \
1163  (__v32hf)_mm512_undefined_ph(), \
1164  (__mmask32)-1, (int)(R)))
1165 
1166 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_sh(__m128h __A,
1167  __m128h __B) {
1168  return (__m128h)__builtin_ia32_rcpsh_mask(
1169  (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
1170 }
1171 
1172 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rcp_sh(__m128h __W,
1173  __mmask8 __U,
1174  __m128h __A,
1175  __m128h __B) {
1176  return (__m128h)__builtin_ia32_rcpsh_mask((__v8hf)__A, (__v8hf)__B,
1177  (__v8hf)__W, (__mmask8)__U);
1178 }
1179 
1180 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rcp_sh(__mmask8 __U,
1181  __m128h __A,
1182  __m128h __B) {
1183  return (__m128h)__builtin_ia32_rcpsh_mask(
1184  (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1185 }
1186 
1187 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_sh(__m128h __A,
1188  __m128h __B) {
1189  return (__m128h)__builtin_ia32_rsqrtsh_mask(
1190  (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
1191 }
1192 
1193 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt_sh(__m128h __W,
1194  __mmask8 __U,
1195  __m128h __A,
1196  __m128h __B) {
1197  return (__m128h)__builtin_ia32_rsqrtsh_mask((__v8hf)__A, (__v8hf)__B,
1198  (__v8hf)__W, (__mmask8)__U);
1199 }
1200 
1201 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1202 _mm_maskz_rsqrt_sh(__mmask8 __U, __m128h __A, __m128h __B) {
1203  return (__m128h)__builtin_ia32_rsqrtsh_mask(
1204  (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1205 }
1206 
1207 #define _mm_getmant_round_sh(A, B, C, D, R) \
1208  ((__m128h)__builtin_ia32_getmantsh_round_mask( \
1209  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
1210  (__v8hf)_mm_setzero_ph(), (__mmask8)-1, (int)(R)))
1211 
1212 #define _mm_getmant_sh(A, B, C, D) \
1213  ((__m128h)__builtin_ia32_getmantsh_round_mask( \
1214  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
1215  (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
1216 
1217 #define _mm_mask_getmant_sh(W, U, A, B, C, D) \
1218  ((__m128h)__builtin_ia32_getmantsh_round_mask( \
1219  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
1220  (__v8hf)(__m128h)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
1221 
1222 #define _mm_mask_getmant_round_sh(W, U, A, B, C, D, R) \
1223  ((__m128h)__builtin_ia32_getmantsh_round_mask( \
1224  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
1225  (__v8hf)(__m128h)(W), (__mmask8)(U), (int)(R)))
1226 
1227 #define _mm_maskz_getmant_sh(U, A, B, C, D) \
1228  ((__m128h)__builtin_ia32_getmantsh_round_mask( \
1229  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
1230  (__v8hf)_mm_setzero_ph(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
1231 
1232 #define _mm_maskz_getmant_round_sh(U, A, B, C, D, R) \
1233  ((__m128h)__builtin_ia32_getmantsh_round_mask( \
1234  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
1235  (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
1236 
1237 #define _mm_getexp_round_sh(A, B, R) \
1238  ((__m128h)__builtin_ia32_getexpsh128_round_mask( \
1239  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1240  (__mmask8)-1, (int)(R)))
1241 
1242 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_sh(__m128h __A,
1243  __m128h __B) {
1244  return (__m128h)__builtin_ia32_getexpsh128_round_mask(
1245  (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
1247 }
1248 
1249 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1250 _mm_mask_getexp_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
1251  return (__m128h)__builtin_ia32_getexpsh128_round_mask(
1252  (__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U,
1254 }
1255 
1256 #define _mm_mask_getexp_round_sh(W, U, A, B, R) \
1257  ((__m128h)__builtin_ia32_getexpsh128_round_mask( \
1258  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1259  (__mmask8)(U), (int)(R)))
1260 
1261 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1262 _mm_maskz_getexp_sh(__mmask8 __U, __m128h __A, __m128h __B) {
1263  return (__m128h)__builtin_ia32_getexpsh128_round_mask(
1264  (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1266 }
1267 
1268 #define _mm_maskz_getexp_round_sh(U, A, B, R) \
1269  ((__m128h)__builtin_ia32_getexpsh128_round_mask( \
1270  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1271  (__mmask8)(U), (int)(R)))
1272 
1273 #define _mm_scalef_round_sh(A, B, R) \
1274  ((__m128h)__builtin_ia32_scalefsh_round_mask( \
1275  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1276  (__mmask8)-1, (int)(R)))
1277 
1278 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_sh(__m128h __A,
1279  __m128h __B) {
1280  return (__m128h)__builtin_ia32_scalefsh_round_mask(
1281  (__v8hf)__A, (__v8hf)(__B), (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
1283 }
1284 
1285 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1286 _mm_mask_scalef_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
1287  return (__m128h)__builtin_ia32_scalefsh_round_mask((__v8hf)__A, (__v8hf)__B,
1288  (__v8hf)__W, (__mmask8)__U,
1290 }
1291 
1292 #define _mm_mask_scalef_round_sh(W, U, A, B, R) \
1293  ((__m128h)__builtin_ia32_scalefsh_round_mask( \
1294  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1295  (__mmask8)(U), (int)(R)))
1296 
1297 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1298 _mm_maskz_scalef_sh(__mmask8 __U, __m128h __A, __m128h __B) {
1299  return (__m128h)__builtin_ia32_scalefsh_round_mask(
1300  (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1302 }
1303 
1304 #define _mm_maskz_scalef_round_sh(U, A, B, R) \
1305  ((__m128h)__builtin_ia32_scalefsh_round_mask( \
1306  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1307  (__mmask8)(U), (int)(R)))
1308 
1309 #define _mm_roundscale_round_sh(A, B, imm, R) \
1310  ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
1311  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1312  (__mmask8)-1, (int)(imm), (int)(R)))
1313 
1314 #define _mm_roundscale_sh(A, B, imm) \
1315  ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
1316  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1317  (__mmask8)-1, (int)(imm), _MM_FROUND_CUR_DIRECTION))
1318 
1319 #define _mm_mask_roundscale_sh(W, U, A, B, I) \
1320  ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
1321  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1322  (__mmask8)(U), (int)(I), _MM_FROUND_CUR_DIRECTION))
1323 
1324 #define _mm_mask_roundscale_round_sh(W, U, A, B, I, R) \
1325  ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
1326  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1327  (__mmask8)(U), (int)(I), (int)(R)))
1328 
1329 #define _mm_maskz_roundscale_sh(U, A, B, I) \
1330  ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
1331  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1332  (__mmask8)(U), (int)(I), _MM_FROUND_CUR_DIRECTION))
1333 
1334 #define _mm_maskz_roundscale_round_sh(U, A, B, I, R) \
1335  ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
1336  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1337  (__mmask8)(U), (int)(I), (int)(R)))
1338 
1339 #define _mm_reduce_sh(A, B, C) \
1340  ((__m128h)__builtin_ia32_reducesh_mask( \
1341  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1342  (__mmask8)-1, (int)(C), _MM_FROUND_CUR_DIRECTION))
1343 
1344 #define _mm_mask_reduce_sh(W, U, A, B, C) \
1345  ((__m128h)__builtin_ia32_reducesh_mask( \
1346  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1347  (__mmask8)(U), (int)(C), _MM_FROUND_CUR_DIRECTION))
1348 
1349 #define _mm_maskz_reduce_sh(U, A, B, C) \
1350  ((__m128h)__builtin_ia32_reducesh_mask( \
1351  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1352  (__mmask8)(U), (int)(C), _MM_FROUND_CUR_DIRECTION))
1353 
1354 #define _mm_reduce_round_sh(A, B, C, R) \
1355  ((__m128h)__builtin_ia32_reducesh_mask( \
1356  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1357  (__mmask8)-1, (int)(C), (int)(R)))
1358 
1359 #define _mm_mask_reduce_round_sh(W, U, A, B, C, R) \
1360  ((__m128h)__builtin_ia32_reducesh_mask( \
1361  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1362  (__mmask8)(U), (int)(C), (int)(R)))
1363 
1364 #define _mm_maskz_reduce_round_sh(U, A, B, C, R) \
1365  ((__m128h)__builtin_ia32_reducesh_mask( \
1366  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1367  (__mmask8)(U), (int)(C), (int)(R)))
1368 
1369 #define _mm512_sqrt_round_ph(A, R) \
1370  ((__m512h)__builtin_ia32_sqrtph512((__v32hf)(__m512h)(A), (int)(R)))
1371 
1372 #define _mm512_mask_sqrt_round_ph(W, U, A, R) \
1373  ((__m512h)__builtin_ia32_selectph_512( \
1374  (__mmask32)(U), (__v32hf)_mm512_sqrt_round_ph((A), (R)), \
1375  (__v32hf)(__m512h)(W)))
1376 
1377 #define _mm512_maskz_sqrt_round_ph(U, A, R) \
1378  ((__m512h)__builtin_ia32_selectph_512( \
1379  (__mmask32)(U), (__v32hf)_mm512_sqrt_round_ph((A), (R)), \
1380  (__v32hf)_mm512_setzero_ph()))
1381 
1382 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_sqrt_ph(__m512h __A) {
1383  return (__m512h)__builtin_ia32_sqrtph512((__v32hf)__A,
1385 }
1386 
1387 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1388 _mm512_mask_sqrt_ph(__m512h __W, __mmask32 __U, __m512h __A) {
1389  return (__m512h)__builtin_ia32_selectph_512(
1390  (__mmask32)(__U),
1391  (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)),
1392  (__v32hf)(__m512h)(__W));
1393 }
1394 
1395 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1396 _mm512_maskz_sqrt_ph(__mmask32 __U, __m512h __A) {
1397  return (__m512h)__builtin_ia32_selectph_512(
1398  (__mmask32)(__U),
1399  (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)),
1400  (__v32hf)_mm512_setzero_ph());
1401 }
1402 
1403 #define _mm_sqrt_round_sh(A, B, R) \
1404  ((__m128h)__builtin_ia32_sqrtsh_round_mask( \
1405  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1406  (__mmask8)-1, (int)(R)))
1407 
1408 #define _mm_mask_sqrt_round_sh(W, U, A, B, R) \
1409  ((__m128h)__builtin_ia32_sqrtsh_round_mask( \
1410  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1411  (__mmask8)(U), (int)(R)))
1412 
1413 #define _mm_maskz_sqrt_round_sh(U, A, B, R) \
1414  ((__m128h)__builtin_ia32_sqrtsh_round_mask( \
1415  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1416  (__mmask8)(U), (int)(R)))
1417 
1418 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_sh(__m128h __A,
1419  __m128h __B) {
1420  return (__m128h)__builtin_ia32_sqrtsh_round_mask(
1421  (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)_mm_setzero_ph(),
1423 }
1424 
1425 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sh(__m128h __W,
1426  __mmask32 __U,
1427  __m128h __A,
1428  __m128h __B) {
1429  return (__m128h)__builtin_ia32_sqrtsh_round_mask(
1430  (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)(__m128h)(__W),
1432 }
1433 
1434 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sh(__mmask32 __U,
1435  __m128h __A,
1436  __m128h __B) {
1437  return (__m128h)__builtin_ia32_sqrtsh_round_mask(
1438  (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)_mm_setzero_ph(),
1440 }
1441 
1442 #define _mm512_mask_fpclass_ph_mask(U, A, imm) \
1443  ((__mmask32)__builtin_ia32_fpclassph512_mask((__v32hf)(__m512h)(A), \
1444  (int)(imm), (__mmask32)(U)))
1445 
1446 #define _mm512_fpclass_ph_mask(A, imm) \
1447  ((__mmask32)__builtin_ia32_fpclassph512_mask((__v32hf)(__m512h)(A), \
1448  (int)(imm), (__mmask32)-1))
1449 
1450 #define _mm_fpclass_sh_mask(A, imm) \
1451  ((__mmask8)__builtin_ia32_fpclasssh_mask((__v8hf)(__m128h)(A), (int)(imm), \
1452  (__mmask8)-1))
1453 
1454 #define _mm_mask_fpclass_sh_mask(U, A, imm) \
1455  ((__mmask8)__builtin_ia32_fpclasssh_mask((__v8hf)(__m128h)(A), (int)(imm), \
1456  (__mmask8)(U)))
1457 
1458 #define _mm512_cvt_roundpd_ph(A, R) \
1459  ((__m128h)__builtin_ia32_vcvtpd2ph512_mask( \
1460  (__v8df)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
1461 
1462 #define _mm512_mask_cvt_roundpd_ph(W, U, A, R) \
1463  ((__m128h)__builtin_ia32_vcvtpd2ph512_mask((__v8df)(A), (__v8hf)(W), \
1464  (__mmask8)(U), (int)(R)))
1465 
1466 #define _mm512_maskz_cvt_roundpd_ph(U, A, R) \
1467  ((__m128h)__builtin_ia32_vcvtpd2ph512_mask( \
1468  (__v8df)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
1469 
1470 static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ph(__m512d __A) {
1471  return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
1472  (__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
1474 }
1475 
1476 static __inline__ __m128h __DEFAULT_FN_ATTRS512
1477 _mm512_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m512d __A) {
1478  return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
1479  (__v8df)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
1480 }
1481 
1482 static __inline__ __m128h __DEFAULT_FN_ATTRS512
1483 _mm512_maskz_cvtpd_ph(__mmask8 __U, __m512d __A) {
1484  return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
1485  (__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1487 }
1488 
1489 #define _mm512_cvt_roundph_pd(A, R) \
1490  ((__m512d)__builtin_ia32_vcvtph2pd512_mask( \
1491  (__v8hf)(A), (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), (int)(R)))
1492 
1493 #define _mm512_mask_cvt_roundph_pd(W, U, A, R) \
1494  ((__m512d)__builtin_ia32_vcvtph2pd512_mask((__v8hf)(A), (__v8df)(W), \
1495  (__mmask8)(U), (int)(R)))
1496 
1497 #define _mm512_maskz_cvt_roundph_pd(U, A, R) \
1498  ((__m512d)__builtin_ia32_vcvtph2pd512_mask( \
1499  (__v8hf)(A), (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (int)(R)))
1500 
1501 static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtph_pd(__m128h __A) {
1502  return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
1503  (__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)-1,
1505 }
1506 
1507 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1508 _mm512_mask_cvtph_pd(__m512d __W, __mmask8 __U, __m128h __A) {
1509  return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
1510  (__v8hf)__A, (__v8df)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
1511 }
1512 
1513 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1514 _mm512_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
1515  return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
1516  (__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U,
1518 }
1519 
1520 #define _mm_cvt_roundsh_ss(A, B, R) \
1521  ((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B), \
1522  (__v4sf)_mm_undefined_ps(), \
1523  (__mmask8)(-1), (int)(R)))
1524 
1525 #define _mm_mask_cvt_roundsh_ss(W, U, A, B, R) \
1526  ((__m128)__builtin_ia32_vcvtsh2ss_round_mask( \
1527  (__v4sf)(A), (__v8hf)(B), (__v4sf)(W), (__mmask8)(U), (int)(R)))
1528 
1529 #define _mm_maskz_cvt_roundsh_ss(U, A, B, R) \
1530  ((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B), \
1531  (__v4sf)_mm_setzero_ps(), \
1532  (__mmask8)(U), (int)(R)))
1533 
1534 static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtsh_ss(__m128 __A,
1535  __m128h __B) {
1536  return (__m128)__builtin_ia32_vcvtsh2ss_round_mask(
1537  (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_undefined_ps(), (__mmask8)-1,
1539 }
1540 
1541 static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_ss(__m128 __W,
1542  __mmask8 __U,
1543  __m128 __A,
1544  __m128h __B) {
1545  return (__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)__A, (__v8hf)__B,
1546  (__v4sf)__W, (__mmask8)__U,
1548 }
1549 
1551  __m128 __A,
1552  __m128h __B) {
1553  return (__m128)__builtin_ia32_vcvtsh2ss_round_mask(
1554  (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_setzero_ps(), (__mmask8)__U,
1556 }
1557 
1558 #define _mm_cvt_roundss_sh(A, B, R) \
1559  ((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B), \
1560  (__v8hf)_mm_undefined_ph(), \
1561  (__mmask8)(-1), (int)(R)))
1562 
1563 #define _mm_mask_cvt_roundss_sh(W, U, A, B, R) \
1564  ((__m128h)__builtin_ia32_vcvtss2sh_round_mask( \
1565  (__v8hf)(A), (__v4sf)(B), (__v8hf)(W), (__mmask8)(U), (int)(R)))
1566 
1567 #define _mm_maskz_cvt_roundss_sh(U, A, B, R) \
1568  ((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B), \
1569  (__v8hf)_mm_setzero_ph(), \
1570  (__mmask8)(U), (int)(R)))
1571 
1572 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtss_sh(__m128h __A,
1573  __m128 __B) {
1574  return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
1575  (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1,
1577 }
1578 
1579 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sh(__m128h __W,
1580  __mmask8 __U,
1581  __m128h __A,
1582  __m128 __B) {
1583  return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
1584  (__v8hf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U,
1586 }
1587 
1588 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sh(__mmask8 __U,
1589  __m128h __A,
1590  __m128 __B) {
1591  return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
1592  (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1594 }
1595 
1596 #define _mm_cvt_roundsd_sh(A, B, R) \
1597  ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B), \
1598  (__v8hf)_mm_undefined_ph(), \
1599  (__mmask8)(-1), (int)(R)))
1600 
1601 #define _mm_mask_cvt_roundsd_sh(W, U, A, B, R) \
1602  ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask( \
1603  (__v8hf)(A), (__v2df)(B), (__v8hf)(W), (__mmask8)(U), (int)(R)))
1604 
1605 #define _mm_maskz_cvt_roundsd_sh(U, A, B, R) \
1606  ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B), \
1607  (__v8hf)_mm_setzero_ph(), \
1608  (__mmask8)(U), (int)(R)))
1609 
1610 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtsd_sh(__m128h __A,
1611  __m128d __B) {
1612  return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
1613  (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1,
1615 }
1616 
1617 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_sh(__m128h __W,
1618  __mmask8 __U,
1619  __m128h __A,
1620  __m128d __B) {
1621  return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
1622  (__v8hf)__A, (__v2df)__B, (__v8hf)__W, (__mmask8)__U,
1624 }
1625 
1626 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1627 _mm_maskz_cvtsd_sh(__mmask8 __U, __m128h __A, __m128d __B) {
1628  return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
1629  (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1631 }
1632 
1633 #define _mm_cvt_roundsh_sd(A, B, R) \
1634  ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B), \
1635  (__v2df)_mm_undefined_pd(), \
1636  (__mmask8)(-1), (int)(R)))
1637 
1638 #define _mm_mask_cvt_roundsh_sd(W, U, A, B, R) \
1639  ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask( \
1640  (__v2df)(A), (__v8hf)(B), (__v2df)(W), (__mmask8)(U), (int)(R)))
1641 
1642 #define _mm_maskz_cvt_roundsh_sd(U, A, B, R) \
1643  ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B), \
1644  (__v2df)_mm_setzero_pd(), \
1645  (__mmask8)(U), (int)(R)))
1646 
1647 static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtsh_sd(__m128d __A,
1648  __m128h __B) {
1649  return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
1650  (__v2df)__A, (__v8hf)__B, (__v2df)_mm_undefined_pd(), (__mmask8)-1,
1652 }
1653 
1654 static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_sd(__m128d __W,
1655  __mmask8 __U,
1656  __m128d __A,
1657  __m128h __B) {
1658  return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
1659  (__v2df)__A, (__v8hf)__B, (__v2df)__W, (__mmask8)__U,
1661 }
1662 
1663 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1664 _mm_maskz_cvtsh_sd(__mmask8 __U, __m128d __A, __m128h __B) {
1665  return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
1666  (__v2df)__A, (__v8hf)__B, (__v2df)_mm_setzero_pd(), (__mmask8)__U,
1668 }
1669 
1670 #define _mm512_cvt_roundph_epi16(A, R) \
1671  ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), \
1672  (__v32hi)_mm512_undefined_epi32(), \
1673  (__mmask32)(-1), (int)(R)))
1674 
1675 #define _mm512_mask_cvt_roundph_epi16(W, U, A, R) \
1676  ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), (__v32hi)(W), \
1677  (__mmask32)(U), (int)(R)))
1678 
1679 #define _mm512_maskz_cvt_roundph_epi16(U, A, R) \
1680  ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), \
1681  (__v32hi)_mm512_setzero_epi32(), \
1682  (__mmask32)(U), (int)(R)))
1683 
1684 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1685 _mm512_cvtph_epi16(__m512h __A) {
1686  return (__m512i)__builtin_ia32_vcvtph2w512_mask(
1687  (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1,
1689 }
1690 
1691 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1692 _mm512_mask_cvtph_epi16(__m512i __W, __mmask32 __U, __m512h __A) {
1693  return (__m512i)__builtin_ia32_vcvtph2w512_mask(
1694  (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1695 }
1696 
1697 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1699  return (__m512i)__builtin_ia32_vcvtph2w512_mask(
1700  (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U,
1702 }
1703 
1704 #define _mm512_cvtt_roundph_epi16(A, R) \
1705  ((__m512i)__builtin_ia32_vcvttph2w512_mask( \
1706  (__v32hf)(A), (__v32hi)_mm512_undefined_epi32(), (__mmask32)(-1), \
1707  (int)(R)))
1708 
1709 #define _mm512_mask_cvtt_roundph_epi16(W, U, A, R) \
1710  ((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A), (__v32hi)(W), \
1711  (__mmask32)(U), (int)(R)))
1712 
1713 #define _mm512_maskz_cvtt_roundph_epi16(U, A, R) \
1714  ((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A), \
1715  (__v32hi)_mm512_setzero_epi32(), \
1716  (__mmask32)(U), (int)(R)))
1717 
1718 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1719 _mm512_cvttph_epi16(__m512h __A) {
1720  return (__m512i)__builtin_ia32_vcvttph2w512_mask(
1721  (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1,
1723 }
1724 
1725 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1726 _mm512_mask_cvttph_epi16(__m512i __W, __mmask32 __U, __m512h __A) {
1727  return (__m512i)__builtin_ia32_vcvttph2w512_mask(
1728  (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1729 }
1730 
1731 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1733  return (__m512i)__builtin_ia32_vcvttph2w512_mask(
1734  (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U,
1736 }
1737 
1738 #define _mm512_cvt_roundepi16_ph(A, R) \
1739  ((__m512h)__builtin_ia32_vcvtw2ph512_mask((__v32hi)(A), \
1740  (__v32hf)_mm512_undefined_ph(), \
1741  (__mmask32)(-1), (int)(R)))
1742 
1743 #define _mm512_mask_cvt_roundepi16_ph(W, U, A, R) \
1744  ((__m512h)__builtin_ia32_vcvtw2ph512_mask((__v32hi)(A), (__v32hf)(W), \
1745  (__mmask32)(U), (int)(R)))
1746 
1747 #define _mm512_maskz_cvt_roundepi16_ph(U, A, R) \
1748  ((__m512h)__builtin_ia32_vcvtw2ph512_mask( \
1749  (__v32hi)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
1750 
1751 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1752 _mm512_cvtepi16_ph(__m512i __A) {
1753  return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
1754  (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1,
1756 }
1757 
1758 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1759 _mm512_mask_cvtepi16_ph(__m512h __W, __mmask32 __U, __m512i __A) {
1760  return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
1761  (__v32hi)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1762 }
1763 
1764 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1766  return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
1767  (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1769 }
1770 
1771 #define _mm512_cvt_roundph_epu16(A, R) \
1772  ((__m512i)__builtin_ia32_vcvtph2uw512_mask( \
1773  (__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1), \
1774  (int)(R)))
1775 
1776 #define _mm512_mask_cvt_roundph_epu16(W, U, A, R) \
1777  ((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A), (__v32hu)(W), \
1778  (__mmask32)(U), (int)(R)))
1779 
1780 #define _mm512_maskz_cvt_roundph_epu16(U, A, R) \
1781  ((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A), \
1782  (__v32hu)_mm512_setzero_epi32(), \
1783  (__mmask32)(U), (int)(R)))
1784 
1785 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1786 _mm512_cvtph_epu16(__m512h __A) {
1787  return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
1788  (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1,
1790 }
1791 
1792 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1793 _mm512_mask_cvtph_epu16(__m512i __W, __mmask32 __U, __m512h __A) {
1794  return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
1795  (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1796 }
1797 
1798 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1800  return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
1801  (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U,
1803 }
1804 
1805 #define _mm512_cvtt_roundph_epu16(A, R) \
1806  ((__m512i)__builtin_ia32_vcvttph2uw512_mask( \
1807  (__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1), \
1808  (int)(R)))
1809 
1810 #define _mm512_mask_cvtt_roundph_epu16(W, U, A, R) \
1811  ((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A), (__v32hu)(W), \
1812  (__mmask32)(U), (int)(R)))
1813 
1814 #define _mm512_maskz_cvtt_roundph_epu16(U, A, R) \
1815  ((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A), \
1816  (__v32hu)_mm512_setzero_epi32(), \
1817  (__mmask32)(U), (int)(R)))
1818 
1819 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1820 _mm512_cvttph_epu16(__m512h __A) {
1821  return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
1822  (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1,
1824 }
1825 
1826 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1827 _mm512_mask_cvttph_epu16(__m512i __W, __mmask32 __U, __m512h __A) {
1828  return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
1829  (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1830 }
1831 
1832 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1834  return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
1835  (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U,
1837 }
1838 
1839 #define _mm512_cvt_roundepu16_ph(A, R) \
1840  ((__m512h)__builtin_ia32_vcvtuw2ph512_mask((__v32hu)(A), \
1841  (__v32hf)_mm512_undefined_ph(), \
1842  (__mmask32)(-1), (int)(R)))
1843 
1844 #define _mm512_mask_cvt_roundepu16_ph(W, U, A, R) \
1845  ((__m512h)__builtin_ia32_vcvtuw2ph512_mask((__v32hu)(A), (__v32hf)(W), \
1846  (__mmask32)(U), (int)(R)))
1847 
1848 #define _mm512_maskz_cvt_roundepu16_ph(U, A, R) \
1849  ((__m512h)__builtin_ia32_vcvtuw2ph512_mask( \
1850  (__v32hu)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
1851 
1852 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1853 _mm512_cvtepu16_ph(__m512i __A) {
1854  return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
1855  (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1,
1857 }
1858 
1859 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1860 _mm512_mask_cvtepu16_ph(__m512h __W, __mmask32 __U, __m512i __A) {
1861  return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
1862  (__v32hu)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1863 }
1864 
1865 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1867  return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
1868  (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1870 }
1871 
1872 #define _mm512_cvt_roundph_epi32(A, R) \
1873  ((__m512i)__builtin_ia32_vcvtph2dq512_mask( \
1874  (__v16hf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)(-1), \
1875  (int)(R)))
1876 
1877 #define _mm512_mask_cvt_roundph_epi32(W, U, A, R) \
1878  ((__m512i)__builtin_ia32_vcvtph2dq512_mask((__v16hf)(A), (__v16si)(W), \
1879  (__mmask16)(U), (int)(R)))
1880 
1881 #define _mm512_maskz_cvt_roundph_epi32(U, A, R) \
1882  ((__m512i)__builtin_ia32_vcvtph2dq512_mask((__v16hf)(A), \
1883  (__v16si)_mm512_setzero_epi32(), \
1884  (__mmask16)(U), (int)(R)))
1885 
1886 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1887 _mm512_cvtph_epi32(__m256h __A) {
1888  return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
1889  (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1,
1891 }
1892 
1893 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1894 _mm512_mask_cvtph_epi32(__m512i __W, __mmask16 __U, __m256h __A) {
1895  return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
1896  (__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
1897 }
1898 
1899 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1901  return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
1902  (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U,
1904 }
1905 
1906 #define _mm512_cvt_roundph_epu32(A, R) \
1907  ((__m512i)__builtin_ia32_vcvtph2udq512_mask( \
1908  (__v16hf)(A), (__v16su)_mm512_undefined_epi32(), (__mmask16)(-1), \
1909  (int)(R)))
1910 
1911 #define _mm512_mask_cvt_roundph_epu32(W, U, A, R) \
1912  ((__m512i)__builtin_ia32_vcvtph2udq512_mask((__v16hf)(A), (__v16su)(W), \
1913  (__mmask16)(U), (int)(R)))
1914 
1915 #define _mm512_maskz_cvt_roundph_epu32(U, A, R) \
1916  ((__m512i)__builtin_ia32_vcvtph2udq512_mask((__v16hf)(A), \
1917  (__v16su)_mm512_setzero_epi32(), \
1918  (__mmask16)(U), (int)(R)))
1919 
1920 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1921 _mm512_cvtph_epu32(__m256h __A) {
1922  return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
1923  (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1,
1925 }
1926 
1927 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1928 _mm512_mask_cvtph_epu32(__m512i __W, __mmask16 __U, __m256h __A) {
1929  return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
1930  (__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
1931 }
1932 
1933 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1935  return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
1936  (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U,
1938 }
1939 
1940 #define _mm512_cvt_roundepi32_ph(A, R) \
1941  ((__m256h)__builtin_ia32_vcvtdq2ph512_mask((__v16si)(A), \
1942  (__v16hf)_mm256_undefined_ph(), \
1943  (__mmask16)(-1), (int)(R)))
1944 
1945 #define _mm512_mask_cvt_roundepi32_ph(W, U, A, R) \
1946  ((__m256h)__builtin_ia32_vcvtdq2ph512_mask((__v16si)(A), (__v16hf)(W), \
1947  (__mmask16)(U), (int)(R)))
1948 
1949 #define _mm512_maskz_cvt_roundepi32_ph(U, A, R) \
1950  ((__m256h)__builtin_ia32_vcvtdq2ph512_mask( \
1951  (__v16si)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
1952 
1953 static __inline__ __m256h __DEFAULT_FN_ATTRS512
1954 _mm512_cvtepi32_ph(__m512i __A) {
1955  return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
1956  (__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
1958 }
1959 
1960 static __inline__ __m256h __DEFAULT_FN_ATTRS512
1961 _mm512_mask_cvtepi32_ph(__m256h __W, __mmask16 __U, __m512i __A) {
1962  return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
1963  (__v16si)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
1964 }
1965 
1966 static __inline__ __m256h __DEFAULT_FN_ATTRS512
1968  return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
1969  (__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
1971 }
1972 
1973 #define _mm512_cvt_roundepu32_ph(A, R) \
1974  ((__m256h)__builtin_ia32_vcvtudq2ph512_mask((__v16su)(A), \
1975  (__v16hf)_mm256_undefined_ph(), \
1976  (__mmask16)(-1), (int)(R)))
1977 
1978 #define _mm512_mask_cvt_roundepu32_ph(W, U, A, R) \
1979  ((__m256h)__builtin_ia32_vcvtudq2ph512_mask((__v16su)(A), (__v16hf)(W), \
1980  (__mmask16)(U), (int)(R)))
1981 
1982 #define _mm512_maskz_cvt_roundepu32_ph(U, A, R) \
1983  ((__m256h)__builtin_ia32_vcvtudq2ph512_mask( \
1984  (__v16su)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
1985 
1986 static __inline__ __m256h __DEFAULT_FN_ATTRS512
1987 _mm512_cvtepu32_ph(__m512i __A) {
1988  return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
1989  (__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
1991 }
1992 
1993 static __inline__ __m256h __DEFAULT_FN_ATTRS512
1994 _mm512_mask_cvtepu32_ph(__m256h __W, __mmask16 __U, __m512i __A) {
1995  return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
1996  (__v16su)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
1997 }
1998 
1999 static __inline__ __m256h __DEFAULT_FN_ATTRS512
2001  return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
2002  (__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
2004 }
2005 
2006 #define _mm512_cvtt_roundph_epi32(A, R) \
2007  ((__m512i)__builtin_ia32_vcvttph2dq512_mask( \
2008  (__v16hf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)(-1), \
2009  (int)(R)))
2010 
2011 #define _mm512_mask_cvtt_roundph_epi32(W, U, A, R) \
2012  ((__m512i)__builtin_ia32_vcvttph2dq512_mask((__v16hf)(A), (__v16si)(W), \
2013  (__mmask16)(U), (int)(R)))
2014 
2015 #define _mm512_maskz_cvtt_roundph_epi32(U, A, R) \
2016  ((__m512i)__builtin_ia32_vcvttph2dq512_mask((__v16hf)(A), \
2017  (__v16si)_mm512_setzero_epi32(), \
2018  (__mmask16)(U), (int)(R)))
2019 
2020 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2021 _mm512_cvttph_epi32(__m256h __A) {
2022  return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
2023  (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1,
2025 }
2026 
2027 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2028 _mm512_mask_cvttph_epi32(__m512i __W, __mmask16 __U, __m256h __A) {
2029  return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
2030  (__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
2031 }
2032 
2033 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2035  return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
2036  (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U,
2038 }
2039 
2040 #define _mm512_cvtt_roundph_epu32(A, R) \
2041  ((__m512i)__builtin_ia32_vcvttph2udq512_mask( \
2042  (__v16hf)(A), (__v16su)_mm512_undefined_epi32(), (__mmask16)(-1), \
2043  (int)(R)))
2044 
2045 #define _mm512_mask_cvtt_roundph_epu32(W, U, A, R) \
2046  ((__m512i)__builtin_ia32_vcvttph2udq512_mask((__v16hf)(A), (__v16su)(W), \
2047  (__mmask16)(U), (int)(R)))
2048 
2049 #define _mm512_maskz_cvtt_roundph_epu32(U, A, R) \
2050  ((__m512i)__builtin_ia32_vcvttph2udq512_mask( \
2051  (__v16hf)(A), (__v16su)_mm512_setzero_epi32(), (__mmask16)(U), \
2052  (int)(R)))
2053 
2054 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2055 _mm512_cvttph_epu32(__m256h __A) {
2056  return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
2057  (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1,
2059 }
2060 
2061 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2062 _mm512_mask_cvttph_epu32(__m512i __W, __mmask16 __U, __m256h __A) {
2063  return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
2064  (__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
2065 }
2066 
2067 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2069  return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
2070  (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U,
2072 }
2073 
2074 #define _mm512_cvt_roundepi64_ph(A, R) \
2075  ((__m128h)__builtin_ia32_vcvtqq2ph512_mask( \
2076  (__v8di)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
2077 
2078 #define _mm512_mask_cvt_roundepi64_ph(W, U, A, R) \
2079  ((__m128h)__builtin_ia32_vcvtqq2ph512_mask((__v8di)(A), (__v8hf)(W), \
2080  (__mmask8)(U), (int)(R)))
2081 
2082 #define _mm512_maskz_cvt_roundepi64_ph(U, A, R) \
2083  ((__m128h)__builtin_ia32_vcvtqq2ph512_mask( \
2084  (__v8di)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
2085 
2086 static __inline__ __m128h __DEFAULT_FN_ATTRS512
2087 _mm512_cvtepi64_ph(__m512i __A) {
2088  return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
2089  (__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
2091 }
2092 
2093 static __inline__ __m128h __DEFAULT_FN_ATTRS512
2094 _mm512_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m512i __A) {
2095  return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
2096  (__v8di)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2097 }
2098 
2099 static __inline__ __m128h __DEFAULT_FN_ATTRS512
2101  return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
2102  (__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
2104 }
2105 
2106 #define _mm512_cvt_roundph_epi64(A, R) \
2107  ((__m512i)__builtin_ia32_vcvtph2qq512_mask((__v8hf)(A), \
2108  (__v8di)_mm512_undefined_epi32(), \
2109  (__mmask8)(-1), (int)(R)))
2110 
2111 #define _mm512_mask_cvt_roundph_epi64(W, U, A, R) \
2112  ((__m512i)__builtin_ia32_vcvtph2qq512_mask((__v8hf)(A), (__v8di)(W), \
2113  (__mmask8)(U), (int)(R)))
2114 
2115 #define _mm512_maskz_cvt_roundph_epi64(U, A, R) \
2116  ((__m512i)__builtin_ia32_vcvtph2qq512_mask( \
2117  (__v8hf)(A), (__v8di)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
2118 
2119 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2120 _mm512_cvtph_epi64(__m128h __A) {
2121  return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
2122  (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1,
2124 }
2125 
2126 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2127 _mm512_mask_cvtph_epi64(__m512i __W, __mmask8 __U, __m128h __A) {
2128  return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
2129  (__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2130 }
2131 
2132 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2134  return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
2135  (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U,
2137 }
2138 
2139 #define _mm512_cvt_roundepu64_ph(A, R) \
2140  ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask( \
2141  (__v8du)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
2142 
2143 #define _mm512_mask_cvt_roundepu64_ph(W, U, A, R) \
2144  ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask((__v8du)(A), (__v8hf)(W), \
2145  (__mmask8)(U), (int)(R)))
2146 
2147 #define _mm512_maskz_cvt_roundepu64_ph(U, A, R) \
2148  ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask( \
2149  (__v8du)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
2150 
2151 static __inline__ __m128h __DEFAULT_FN_ATTRS512
2152 _mm512_cvtepu64_ph(__m512i __A) {
2153  return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
2154  (__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
2156 }
2157 
2158 static __inline__ __m128h __DEFAULT_FN_ATTRS512
2159 _mm512_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m512i __A) {
2160  return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
2161  (__v8du)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2162 }
2163 
2164 static __inline__ __m128h __DEFAULT_FN_ATTRS512
2166  return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
2167  (__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
2169 }
2170 
2171 #define _mm512_cvt_roundph_epu64(A, R) \
2172  ((__m512i)__builtin_ia32_vcvtph2uqq512_mask( \
2173  (__v8hf)(A), (__v8du)_mm512_undefined_epi32(), (__mmask8)(-1), \
2174  (int)(R)))
2175 
2176 #define _mm512_mask_cvt_roundph_epu64(W, U, A, R) \
2177  ((__m512i)__builtin_ia32_vcvtph2uqq512_mask((__v8hf)(A), (__v8du)(W), \
2178  (__mmask8)(U), (int)(R)))
2179 
2180 #define _mm512_maskz_cvt_roundph_epu64(U, A, R) \
2181  ((__m512i)__builtin_ia32_vcvtph2uqq512_mask( \
2182  (__v8hf)(A), (__v8du)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
2183 
2184 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2185 _mm512_cvtph_epu64(__m128h __A) {
2186  return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
2187  (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1,
2189 }
2190 
2191 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2192 _mm512_mask_cvtph_epu64(__m512i __W, __mmask8 __U, __m128h __A) {
2193  return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
2194  (__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2195 }
2196 
2197 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2199  return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
2200  (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U,
2202 }
2203 
2204 #define _mm512_cvtt_roundph_epi64(A, R) \
2205  ((__m512i)__builtin_ia32_vcvttph2qq512_mask( \
2206  (__v8hf)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8)(-1), \
2207  (int)(R)))
2208 
2209 #define _mm512_mask_cvtt_roundph_epi64(W, U, A, R) \
2210  ((__m512i)__builtin_ia32_vcvttph2qq512_mask((__v8hf)(A), (__v8di)(W), \
2211  (__mmask8)(U), (int)(R)))
2212 
2213 #define _mm512_maskz_cvtt_roundph_epi64(U, A, R) \
2214  ((__m512i)__builtin_ia32_vcvttph2qq512_mask( \
2215  (__v8hf)(A), (__v8di)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
2216 
2217 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2218 _mm512_cvttph_epi64(__m128h __A) {
2219  return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
2220  (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1,
2222 }
2223 
2224 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2225 _mm512_mask_cvttph_epi64(__m512i __W, __mmask8 __U, __m128h __A) {
2226  return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
2227  (__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2228 }
2229 
2230 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2232  return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
2233  (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U,
2235 }
2236 
2237 #define _mm512_cvtt_roundph_epu64(A, R) \
2238  ((__m512i)__builtin_ia32_vcvttph2uqq512_mask( \
2239  (__v8hf)(A), (__v8du)_mm512_undefined_epi32(), (__mmask8)(-1), \
2240  (int)(R)))
2241 
2242 #define _mm512_mask_cvtt_roundph_epu64(W, U, A, R) \
2243  ((__m512i)__builtin_ia32_vcvttph2uqq512_mask((__v8hf)(A), (__v8du)(W), \
2244  (__mmask8)(U), (int)(R)))
2245 
2246 #define _mm512_maskz_cvtt_roundph_epu64(U, A, R) \
2247  ((__m512i)__builtin_ia32_vcvttph2uqq512_mask( \
2248  (__v8hf)(A), (__v8du)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
2249 
2250 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2251 _mm512_cvttph_epu64(__m128h __A) {
2252  return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
2253  (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1,
2255 }
2256 
2257 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2258 _mm512_mask_cvttph_epu64(__m512i __W, __mmask8 __U, __m128h __A) {
2259  return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
2260  (__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2261 }
2262 
2263 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2265  return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
2266  (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U,
2268 }
2269 
2270 #define _mm_cvt_roundsh_i32(A, R) \
2271  ((int)__builtin_ia32_vcvtsh2si32((__v8hf)(A), (int)(R)))
2272 
2273 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvtsh_i32(__m128h __A) {
2274  return (int)__builtin_ia32_vcvtsh2si32((__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
2275 }
2276 
2277 #define _mm_cvt_roundsh_u32(A, R) \
2278  ((unsigned int)__builtin_ia32_vcvtsh2usi32((__v8hf)(A), (int)(R)))
2279 
2280 static __inline__ unsigned int __DEFAULT_FN_ATTRS128
2281 _mm_cvtsh_u32(__m128h __A) {
2282  return (unsigned int)__builtin_ia32_vcvtsh2usi32((__v8hf)__A,
2284 }
2285 
2286 #ifdef __x86_64__
2287 #define _mm_cvt_roundsh_i64(A, R) \
2288  ((long long)__builtin_ia32_vcvtsh2si64((__v8hf)(A), (int)(R)))
2289 
2290 static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvtsh_i64(__m128h __A) {
2291  return (long long)__builtin_ia32_vcvtsh2si64((__v8hf)__A,
2293 }
2294 
2295 #define _mm_cvt_roundsh_u64(A, R) \
2296  ((unsigned long long)__builtin_ia32_vcvtsh2usi64((__v8hf)(A), (int)(R)))
2297 
2298 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
2299 _mm_cvtsh_u64(__m128h __A) {
2300  return (unsigned long long)__builtin_ia32_vcvtsh2usi64(
2301  (__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
2302 }
2303 #endif // __x86_64__
2304 
2305 #define _mm_cvt_roundu32_sh(A, B, R) \
2306  ((__m128h)__builtin_ia32_vcvtusi2sh((__v8hf)(A), (unsigned int)(B), (int)(R)))
2307 
2308 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2309 _mm_cvtu32_sh(__m128h __A, unsigned int __B) {
2310  __A[0] = __B;
2311  return __A;
2312 }
2313 
2314 #ifdef __x86_64__
2315 #define _mm_cvt_roundu64_sh(A, B, R) \
2316  ((__m128h)__builtin_ia32_vcvtusi642sh((__v8hf)(A), (unsigned long long)(B), \
2317  (int)(R)))
2318 
2319 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2320 _mm_cvtu64_sh(__m128h __A, unsigned long long __B) {
2321  __A[0] = __B;
2322  return __A;
2323 }
2324 #endif
2325 
2326 #define _mm_cvt_roundi32_sh(A, B, R) \
2327  ((__m128h)__builtin_ia32_vcvtsi2sh((__v8hf)(A), (int)(B), (int)(R)))
2328 
2329 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti32_sh(__m128h __A,
2330  int __B) {
2331  __A[0] = __B;
2332  return __A;
2333 }
2334 
2335 #ifdef __x86_64__
2336 #define _mm_cvt_roundi64_sh(A, B, R) \
2337  ((__m128h)__builtin_ia32_vcvtsi642sh((__v8hf)(A), (long long)(B), (int)(R)))
2338 
2339 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti64_sh(__m128h __A,
2340  long long __B) {
2341  __A[0] = __B;
2342  return __A;
2343 }
2344 #endif
2345 
2346 #define _mm_cvtt_roundsh_i32(A, R) \
2347  ((int)__builtin_ia32_vcvttsh2si32((__v8hf)(A), (int)(R)))
2348 
2349 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsh_i32(__m128h __A) {
2350  return (int)__builtin_ia32_vcvttsh2si32((__v8hf)__A,
2352 }
2353 
2354 #ifdef __x86_64__
2355 #define _mm_cvtt_roundsh_i64(A, R) \
2356  ((long long)__builtin_ia32_vcvttsh2si64((__v8hf)(A), (int)(R)))
2357 
2358 static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttsh_i64(__m128h __A) {
2359  return (long long)__builtin_ia32_vcvttsh2si64((__v8hf)__A,
2361 }
2362 #endif
2363 
2364 #define _mm_cvtt_roundsh_u32(A, R) \
2365  ((unsigned int)__builtin_ia32_vcvttsh2usi32((__v8hf)(A), (int)(R)))
2366 
2367 static __inline__ unsigned int __DEFAULT_FN_ATTRS128
2368 _mm_cvttsh_u32(__m128h __A) {
2369  return (unsigned int)__builtin_ia32_vcvttsh2usi32((__v8hf)__A,
2371 }
2372 
2373 #ifdef __x86_64__
2374 #define _mm_cvtt_roundsh_u64(A, R) \
2375  ((unsigned long long)__builtin_ia32_vcvttsh2usi64((__v8hf)(A), (int)(R)))
2376 
2377 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
2378 _mm_cvttsh_u64(__m128h __A) {
2379  return (unsigned long long)__builtin_ia32_vcvttsh2usi64(
2380  (__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
2381 }
2382 #endif
2383 
2384 #define _mm512_cvtx_roundph_ps(A, R) \
2385  ((__m512)__builtin_ia32_vcvtph2psx512_mask((__v16hf)(A), \
2386  (__v16sf)_mm512_undefined_ps(), \
2387  (__mmask16)(-1), (int)(R)))
2388 
2389 #define _mm512_mask_cvtx_roundph_ps(W, U, A, R) \
2390  ((__m512)__builtin_ia32_vcvtph2psx512_mask((__v16hf)(A), (__v16sf)(W), \
2391  (__mmask16)(U), (int)(R)))
2392 
2393 #define _mm512_maskz_cvtx_roundph_ps(U, A, R) \
2394  ((__m512)__builtin_ia32_vcvtph2psx512_mask( \
2395  (__v16hf)(A), (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (int)(R)))
2396 
2397 static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtxph_ps(__m256h __A) {
2398  return (__m512)__builtin_ia32_vcvtph2psx512_mask(
2399  (__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)-1,
2401 }
2402 
2403 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2404 _mm512_mask_cvtxph_ps(__m512 __W, __mmask16 __U, __m256h __A) {
2405  return (__m512)__builtin_ia32_vcvtph2psx512_mask(
2406  (__v16hf)__A, (__v16sf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
2407 }
2408 
2409 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2411  return (__m512)__builtin_ia32_vcvtph2psx512_mask(
2412  (__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U,
2414 }
2415 
2416 #define _mm512_cvtx_roundps_ph(A, R) \
2417  ((__m256h)__builtin_ia32_vcvtps2phx512_mask((__v16sf)(A), \
2418  (__v16hf)_mm256_undefined_ph(), \
2419  (__mmask16)(-1), (int)(R)))
2420 
2421 #define _mm512_mask_cvtx_roundps_ph(W, U, A, R) \
2422  ((__m256h)__builtin_ia32_vcvtps2phx512_mask((__v16sf)(A), (__v16hf)(W), \
2423  (__mmask16)(U), (int)(R)))
2424 
2425 #define _mm512_maskz_cvtx_roundps_ph(U, A, R) \
2426  ((__m256h)__builtin_ia32_vcvtps2phx512_mask( \
2427  (__v16sf)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
2428 
2429 static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_cvtxps_ph(__m512 __A) {
2430  return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
2431  (__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
2433 }
2434 
2435 static __inline__ __m256h __DEFAULT_FN_ATTRS512
2436 _mm512_mask_cvtxps_ph(__m256h __W, __mmask16 __U, __m512 __A) {
2437  return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
2438  (__v16sf)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
2439 }
2440 
2441 static __inline__ __m256h __DEFAULT_FN_ATTRS512
2443  return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
2444  (__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
2446 }
2447 
2448 #define _mm512_fmadd_round_ph(A, B, C, R) \
2449  ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2450  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2451  (__mmask32)-1, (int)(R)))
2452 
2453 #define _mm512_mask_fmadd_round_ph(A, U, B, C, R) \
2454  ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2455  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2456  (__mmask32)(U), (int)(R)))
2457 
2458 #define _mm512_mask3_fmadd_round_ph(A, B, C, U, R) \
2459  ((__m512h)__builtin_ia32_vfmaddph512_mask3( \
2460  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2461  (__mmask32)(U), (int)(R)))
2462 
2463 #define _mm512_maskz_fmadd_round_ph(U, A, B, C, R) \
2464  ((__m512h)__builtin_ia32_vfmaddph512_maskz( \
2465  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2466  (__mmask32)(U), (int)(R)))
2467 
2468 #define _mm512_fmsub_round_ph(A, B, C, R) \
2469  ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2470  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2471  (__mmask32)-1, (int)(R)))
2472 
2473 #define _mm512_mask_fmsub_round_ph(A, U, B, C, R) \
2474  ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2475  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2476  (__mmask32)(U), (int)(R)))
2477 
2478 #define _mm512_maskz_fmsub_round_ph(U, A, B, C, R) \
2479  ((__m512h)__builtin_ia32_vfmaddph512_maskz( \
2480  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2481  (__mmask32)(U), (int)(R)))
2482 
2483 #define _mm512_fnmadd_round_ph(A, B, C, R) \
2484  ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2485  (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2486  (__mmask32)-1, (int)(R)))
2487 
2488 #define _mm512_mask3_fnmadd_round_ph(A, B, C, U, R) \
2489  ((__m512h)__builtin_ia32_vfmaddph512_mask3( \
2490  -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2491  (__mmask32)(U), (int)(R)))
2492 
2493 #define _mm512_maskz_fnmadd_round_ph(U, A, B, C, R) \
2494  ((__m512h)__builtin_ia32_vfmaddph512_maskz( \
2495  -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2496  (__mmask32)(U), (int)(R)))
2497 
2498 #define _mm512_fnmsub_round_ph(A, B, C, R) \
2499  ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2500  (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2501  (__mmask32)-1, (int)(R)))
2502 
2503 #define _mm512_maskz_fnmsub_round_ph(U, A, B, C, R) \
2504  ((__m512h)__builtin_ia32_vfmaddph512_maskz( \
2505  -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2506  (__mmask32)(U), (int)(R)))
2507 
2508 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmadd_ph(__m512h __A,
2509  __m512h __B,
2510  __m512h __C) {
2511  return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2512  (__v32hf)__C, (__mmask32)-1,
2514 }
2515 
2516 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2517 _mm512_mask_fmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2518  return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2519  (__v32hf)__C, (__mmask32)__U,
2521 }
2522 
2523 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2524 _mm512_mask3_fmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2525  return (__m512h)__builtin_ia32_vfmaddph512_mask3((__v32hf)__A, (__v32hf)__B,
2526  (__v32hf)__C, (__mmask32)__U,
2528 }
2529 
2530 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2531 _mm512_maskz_fmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2532  return (__m512h)__builtin_ia32_vfmaddph512_maskz((__v32hf)__A, (__v32hf)__B,
2533  (__v32hf)__C, (__mmask32)__U,
2535 }
2536 
2537 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmsub_ph(__m512h __A,
2538  __m512h __B,
2539  __m512h __C) {
2540  return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2541  -(__v32hf)__C, (__mmask32)-1,
2543 }
2544 
2545 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2546 _mm512_mask_fmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2547  return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2548  -(__v32hf)__C, (__mmask32)__U,
2550 }
2551 
2552 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2553 _mm512_maskz_fmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2554  return (__m512h)__builtin_ia32_vfmaddph512_maskz(
2555  (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2557 }
2558 
2559 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ph(__m512h __A,
2560  __m512h __B,
2561  __m512h __C) {
2562  return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2563  (__v32hf)__C, (__mmask32)-1,
2565 }
2566 
2567 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2568 _mm512_mask3_fnmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2569  return (__m512h)__builtin_ia32_vfmaddph512_mask3(-(__v32hf)__A, (__v32hf)__B,
2570  (__v32hf)__C, (__mmask32)__U,
2572 }
2573 
2574 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2575 _mm512_maskz_fnmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2576  return (__m512h)__builtin_ia32_vfmaddph512_maskz(-(__v32hf)__A, (__v32hf)__B,
2577  (__v32hf)__C, (__mmask32)__U,
2579 }
2580 
2581 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ph(__m512h __A,
2582  __m512h __B,
2583  __m512h __C) {
2584  return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2585  -(__v32hf)__C, (__mmask32)-1,
2587 }
2588 
2589 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2590 _mm512_maskz_fnmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2591  return (__m512h)__builtin_ia32_vfmaddph512_maskz(
2592  -(__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2594 }
2595 
2596 #define _mm512_fmaddsub_round_ph(A, B, C, R) \
2597  ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
2598  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2599  (__mmask32)-1, (int)(R)))
2600 
2601 #define _mm512_mask_fmaddsub_round_ph(A, U, B, C, R) \
2602  ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
2603  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2604  (__mmask32)(U), (int)(R)))
2605 
2606 #define _mm512_mask3_fmaddsub_round_ph(A, B, C, U, R) \
2607  ((__m512h)__builtin_ia32_vfmaddsubph512_mask3( \
2608  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2609  (__mmask32)(U), (int)(R)))
2610 
2611 #define _mm512_maskz_fmaddsub_round_ph(U, A, B, C, R) \
2612  ((__m512h)__builtin_ia32_vfmaddsubph512_maskz( \
2613  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2614  (__mmask32)(U), (int)(R)))
2615 
2616 #define _mm512_fmsubadd_round_ph(A, B, C, R) \
2617  ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
2618  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2619  (__mmask32)-1, (int)(R)))
2620 
2621 #define _mm512_mask_fmsubadd_round_ph(A, U, B, C, R) \
2622  ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
2623  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2624  (__mmask32)(U), (int)(R)))
2625 
2626 #define _mm512_maskz_fmsubadd_round_ph(U, A, B, C, R) \
2627  ((__m512h)__builtin_ia32_vfmaddsubph512_maskz( \
2628  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2629  (__mmask32)(U), (int)(R)))
2630 
2631 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2632 _mm512_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C) {
2633  return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
2634  (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)-1,
2636 }
2637 
2638 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2639 _mm512_mask_fmaddsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2640  return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
2641  (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2643 }
2644 
2645 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2646 _mm512_mask3_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2647  return (__m512h)__builtin_ia32_vfmaddsubph512_mask3(
2648  (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2650 }
2651 
2652 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2653 _mm512_maskz_fmaddsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2654  return (__m512h)__builtin_ia32_vfmaddsubph512_maskz(
2655  (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2657 }
2658 
2659 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2660 _mm512_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C) {
2661  return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
2662  (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)-1,
2664 }
2665 
2666 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2667 _mm512_mask_fmsubadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2668  return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
2669  (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2671 }
2672 
2673 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2674 _mm512_maskz_fmsubadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2675  return (__m512h)__builtin_ia32_vfmaddsubph512_maskz(
2676  (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2678 }
2679 
2680 #define _mm512_mask3_fmsub_round_ph(A, B, C, U, R) \
2681  ((__m512h)__builtin_ia32_vfmsubph512_mask3( \
2682  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2683  (__mmask32)(U), (int)(R)))
2684 
2685 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2686 _mm512_mask3_fmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2687  return (__m512h)__builtin_ia32_vfmsubph512_mask3((__v32hf)__A, (__v32hf)__B,
2688  (__v32hf)__C, (__mmask32)__U,
2690 }
2691 
2692 #define _mm512_mask3_fmsubadd_round_ph(A, B, C, U, R) \
2693  ((__m512h)__builtin_ia32_vfmsubaddph512_mask3( \
2694  (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2695  (__mmask32)(U), (int)(R)))
2696 
2697 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2698 _mm512_mask3_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2699  return (__m512h)__builtin_ia32_vfmsubaddph512_mask3(
2700  (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2702 }
2703 
2704 #define _mm512_mask_fnmadd_round_ph(A, U, B, C, R) \
2705  ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2706  (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2707  (__mmask32)(U), (int)(R)))
2708 
2709 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2710 _mm512_mask_fnmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2711  return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2712  (__v32hf)__C, (__mmask32)__U,
2714 }
2715 
2716 #define _mm512_mask_fnmsub_round_ph(A, U, B, C, R) \
2717  ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2718  (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2719  (__mmask32)(U), (int)(R)))
2720 
2721 #define _mm512_mask3_fnmsub_round_ph(A, B, C, U, R) \
2722  ((__m512h)__builtin_ia32_vfmsubph512_mask3( \
2723  -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2724  (__mmask32)(U), (int)(R)))
2725 
2726 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2727 _mm512_mask_fnmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2728  return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2729  -(__v32hf)__C, (__mmask32)__U,
2731 }
2732 
2733 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2734 _mm512_mask3_fnmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2735  return (__m512h)__builtin_ia32_vfmsubph512_mask3(-(__v32hf)__A, (__v32hf)__B,
2736  (__v32hf)__C, (__mmask32)__U,
2738 }
2739 
2740 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_sh(__m128h __W,
2741  __m128h __A,
2742  __m128h __B) {
2743  return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A, (__v8hf)__B,
2745 }
2746 
2747 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sh(__m128h __W,
2748  __mmask8 __U,
2749  __m128h __A,
2750  __m128h __B) {
2751  return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A, (__v8hf)__B,
2753 }
2754 
2755 #define _mm_fmadd_round_sh(A, B, C, R) \
2756  ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2757  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
2758  (__mmask8)-1, (int)(R)))
2759 
2760 #define _mm_mask_fmadd_round_sh(W, U, A, B, R) \
2761  ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2762  (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), \
2763  (__mmask8)(U), (int)(R)))
2764 
2765 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2766 _mm_maskz_fmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2767  return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, (__v8hf)__B, (__v8hf)__C,
2768  (__mmask8)__U,
2770 }
2771 
2772 #define _mm_maskz_fmadd_round_sh(U, A, B, C, R) \
2773  ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
2774  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
2775  (__mmask8)(U), (int)(R)))
2776 
2777 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2778 _mm_mask3_fmadd_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
2779  return __builtin_ia32_vfmaddsh3_mask3((__v8hf)__W, (__v8hf)__X, (__v8hf)__Y,
2780  (__mmask8)__U,
2782 }
2783 
2784 #define _mm_mask3_fmadd_round_sh(W, X, Y, U, R) \
2785  ((__m128h)__builtin_ia32_vfmaddsh3_mask3( \
2786  (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
2787  (__mmask8)(U), (int)(R)))
2788 
2789 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_sh(__m128h __W,
2790  __m128h __A,
2791  __m128h __B) {
2792  return (__m128h)__builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A,
2793  -(__v8hf)__B, (__mmask8)-1,
2795 }
2796 
2797 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sh(__m128h __W,
2798  __mmask8 __U,
2799  __m128h __A,
2800  __m128h __B) {
2801  return (__m128h)__builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A,
2802  -(__v8hf)__B, (__mmask8)__U,
2804 }
2805 
2806 #define _mm_fmsub_round_sh(A, B, C, R) \
2807  ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2808  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
2809  (__mmask8)-1, (int)(R)))
2810 
2811 #define _mm_mask_fmsub_round_sh(W, U, A, B, R) \
2812  ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2813  (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), \
2814  (__mmask8)(U), (int)(R)))
2815 
2816 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2817 _mm_maskz_fmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2818  return (__m128h)__builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, (__v8hf)__B,
2819  -(__v8hf)__C, (__mmask8)__U,
2821 }
2822 
2823 #define _mm_maskz_fmsub_round_sh(U, A, B, C, R) \
2824  ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
2825  (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
2826  (__mmask8)(U), (int)R))
2827 
2828 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2829 _mm_mask3_fmsub_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
2830  return __builtin_ia32_vfmsubsh3_mask3((__v8hf)__W, (__v8hf)__X, (__v8hf)__Y,
2831  (__mmask8)__U,
2833 }
2834 
2835 #define _mm_mask3_fmsub_round_sh(W, X, Y, U, R) \
2836  ((__m128h)__builtin_ia32_vfmsubsh3_mask3( \
2837  (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
2838  (__mmask8)(U), (int)(R)))
2839 
2840 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_sh(__m128h __W,
2841  __m128h __A,
2842  __m128h __B) {
2843  return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, (__v8hf)__B,
2845 }
2846 
2847 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2848 _mm_mask_fnmadd_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
2849  return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, (__v8hf)__B,
2851 }
2852 
2853 #define _mm_fnmadd_round_sh(A, B, C, R) \
2854  ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2855  (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
2856  (__mmask8)-1, (int)(R)))
2857 
2858 #define _mm_mask_fnmadd_round_sh(W, U, A, B, R) \
2859  ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2860  (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), \
2861  (__mmask8)(U), (int)(R)))
2862 
2863 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2864 _mm_maskz_fnmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2865  return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C,
2866  (__mmask8)__U,
2868 }
2869 
2870 #define _mm_maskz_fnmadd_round_sh(U, A, B, C, R) \
2871  ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
2872  (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
2873  (__mmask8)(U), (int)(R)))
2874 
2875 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2876 _mm_mask3_fnmadd_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
2877  return __builtin_ia32_vfmaddsh3_mask3((__v8hf)__W, -(__v8hf)__X, (__v8hf)__Y,
2878  (__mmask8)__U,
2880 }
2881 
2882 #define _mm_mask3_fnmadd_round_sh(W, X, Y, U, R) \
2883  ((__m128h)__builtin_ia32_vfmaddsh3_mask3( \
2884  (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
2885  (__mmask8)(U), (int)(R)))
2886 
2887 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_sh(__m128h __W,
2888  __m128h __A,
2889  __m128h __B) {
2890  return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, -(__v8hf)__B,
2892 }
2893 
2894 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2895 _mm_mask_fnmsub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
2896  return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, -(__v8hf)__B,
2898 }
2899 
2900 #define _mm_fnmsub_round_sh(A, B, C, R) \
2901  ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2902  (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
2903  (__mmask8)-1, (int)(R)))
2904 
2905 #define _mm_mask_fnmsub_round_sh(W, U, A, B, R) \
2906  ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2907  (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), \
2908  (__mmask8)(U), (int)(R)))
2909 
2910 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2911 _mm_maskz_fnmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2912  return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C,
2913  (__mmask8)__U,
2915 }
2916 
2917 #define _mm_maskz_fnmsub_round_sh(U, A, B, C, R) \
2918  ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
2919  (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
2920  (__mmask8)(U), (int)(R)))
2921 
2922 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2923 _mm_mask3_fnmsub_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
2924  return __builtin_ia32_vfmsubsh3_mask3((__v8hf)__W, -(__v8hf)__X, (__v8hf)__Y,
2925  (__mmask8)__U,
2927 }
2928 
2929 #define _mm_mask3_fnmsub_round_sh(W, X, Y, U, R) \
2930  ((__m128h)__builtin_ia32_vfmsubsh3_mask3( \
2931  (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
2932  (__mmask8)(U), (int)(R)))
2933 
2934 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmadd_sch(__m128h __A,
2935  __m128h __B,
2936  __m128h __C) {
2937  return (__m128h)__builtin_ia32_vfcmaddcsh_mask((__v4sf)__A, (__v4sf)__B,
2938  (__v4sf)__C, (__mmask8)-1,
2940 }
2941 
2942 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2943 _mm_mask_fcmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
2944  return (__m128h)__builtin_ia32_vfcmaddcsh_round_mask(
2945  (__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION);
2946 }
2947 
2948 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2949 _mm_maskz_fcmadd_sch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2950  return (__m128h)__builtin_ia32_vfcmaddcsh_maskz((__v4sf)__A, (__v4sf)__B,
2951  (__v4sf)__C, (__mmask8)__U,
2953 }
2954 
2955 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2956 _mm_mask3_fcmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
2957  return (__m128h)__builtin_ia32_vfcmaddcsh_round_mask3(
2958  (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION);
2959 }
2960 
2961 #define _mm_fcmadd_round_sch(A, B, C, R) \
2962  ((__m128h)__builtin_ia32_vfcmaddcsh_mask( \
2963  (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
2964  (__mmask8)-1, (int)(R)))
2965 
2966 #define _mm_mask_fcmadd_round_sch(A, U, B, C, R) \
2967  ((__m128h)__builtin_ia32_vfcmaddcsh_round_mask( \
2968  (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
2969  (__mmask8)(U), (int)(R)))
2970 
2971 #define _mm_maskz_fcmadd_round_sch(U, A, B, C, R) \
2972  ((__m128h)__builtin_ia32_vfcmaddcsh_maskz( \
2973  (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
2974  (__mmask8)(U), (int)(R)))
2975 
2976 #define _mm_mask3_fcmadd_round_sch(A, B, C, U, R) \
2977  ((__m128h)__builtin_ia32_vfcmaddcsh_round_mask3( \
2978  (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
2979  (__mmask8)(U), (int)(R)))
2980 
2981 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_sch(__m128h __A,
2982  __m128h __B,
2983  __m128h __C) {
2984  return (__m128h)__builtin_ia32_vfmaddcsh_mask((__v4sf)__A, (__v4sf)__B,
2985  (__v4sf)__C, (__mmask8)-1,
2987 }
2988 
2989 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2990 _mm_mask_fmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
2991  return (__m128h)__builtin_ia32_vfmaddcsh_round_mask(
2992  (__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION);
2993 }
2994 
2995 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2996 _mm_maskz_fmadd_sch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2997  return (__m128h)__builtin_ia32_vfmaddcsh_maskz((__v4sf)__A, (__v4sf)__B,
2998  (__v4sf)__C, (__mmask8)__U,
3000 }
3001 
3002 static __inline__ __m128h __DEFAULT_FN_ATTRS128
3003 _mm_mask3_fmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
3004  return (__m128h)__builtin_ia32_vfmaddcsh_round_mask3(
3005  (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION);
3006 }
3007 
3008 #define _mm_fmadd_round_sch(A, B, C, R) \
3009  ((__m128h)__builtin_ia32_vfmaddcsh_mask( \
3010  (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
3011  (__mmask8)-1, (int)(R)))
3012 
3013 #define _mm_mask_fmadd_round_sch(A, U, B, C, R) \
3014  ((__m128h)__builtin_ia32_vfmaddcsh_round_mask( \
3015  (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
3016  (__mmask8)(U), (int)(R)))
3017 
3018 #define _mm_maskz_fmadd_round_sch(U, A, B, C, R) \
3019  ((__m128h)__builtin_ia32_vfmaddcsh_maskz( \
3020  (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
3021  (__mmask8)(U), (int)(R)))
3022 
3023 #define _mm_mask3_fmadd_round_sch(A, B, C, U, R) \
3024  ((__m128h)__builtin_ia32_vfmaddcsh_round_mask3( \
3025  (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
3026  (__mmask8)(U), (int)(R)))
3027 
3028 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_sch(__m128h __A,
3029  __m128h __B) {
3030  return (__m128h)__builtin_ia32_vfcmulcsh_mask(
3031  (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1,
3033 }
3034 
3035 static __inline__ __m128h __DEFAULT_FN_ATTRS128
3036 _mm_mask_fcmul_sch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
3037  return (__m128h)__builtin_ia32_vfcmulcsh_mask((__v4sf)__A, (__v4sf)__B,
3038  (__v4sf)__W, (__mmask8)__U,
3040 }
3041 
3042 static __inline__ __m128h __DEFAULT_FN_ATTRS128
3043 _mm_maskz_fcmul_sch(__mmask8 __U, __m128h __A, __m128h __B) {
3044  return (__m128h)__builtin_ia32_vfcmulcsh_mask(
3045  (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U,
3047 }
3048 
3049 #define _mm_fcmul_round_sch(A, B, R) \
3050  ((__m128h)__builtin_ia32_vfcmulcsh_mask( \
3051  (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \
3052  (__v4sf)(__m128h)_mm_undefined_ph(), (__mmask8)-1, (int)(R)))
3053 
3054 #define _mm_mask_fcmul_round_sch(W, U, A, B, R) \
3055  ((__m128h)__builtin_ia32_vfcmulcsh_mask( \
3056  (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(W), \
3057  (__mmask8)(U), (int)(R)))
3058 
3059 #define _mm_maskz_fcmul_round_sch(U, A, B, R) \
3060  ((__m128h)__builtin_ia32_vfcmulcsh_mask( \
3061  (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \
3062  (__v4sf)(__m128h)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
3063 
3064 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmul_sch(__m128h __A,
3065  __m128h __B) {
3066  return (__m128h)__builtin_ia32_vfmulcsh_mask(
3067  (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1,
3069 }
3070 
3071 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmul_sch(__m128h __W,
3072  __mmask8 __U,
3073  __m128h __A,
3074  __m128h __B) {
3075  return (__m128h)__builtin_ia32_vfmulcsh_mask((__v4sf)__A, (__v4sf)__B,
3076  (__v4sf)__W, (__mmask8)__U,
3078 }
3079 
3080 static __inline__ __m128h __DEFAULT_FN_ATTRS128
3081 _mm_maskz_fmul_sch(__mmask8 __U, __m128h __A, __m128h __B) {
3082  return (__m128h)__builtin_ia32_vfmulcsh_mask(
3083  (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U,
3085 }
3086 
3087 #define _mm_fmul_round_sch(A, B, R) \
3088  ((__m128h)__builtin_ia32_vfmulcsh_mask( \
3089  (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \
3090  (__v4sf)(__m128h)_mm_undefined_ph(), (__mmask8)-1, (int)(R)))
3091 
3092 #define _mm_mask_fmul_round_sch(W, U, A, B, R) \
3093  ((__m128h)__builtin_ia32_vfmulcsh_mask( \
3094  (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(W), \
3095  (__mmask8)(U), (int)(R)))
3096 
3097 #define _mm_maskz_fmul_round_sch(U, A, B, R) \
3098  ((__m128h)__builtin_ia32_vfmulcsh_mask( \
3099  (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \
3100  (__v4sf)(__m128h)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
3101 
3102 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fcmul_pch(__m512h __A,
3103  __m512h __B) {
3104  return (__m512h)__builtin_ia32_vfcmulcph512_mask(
3105  (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_undefined_ph(), (__mmask16)-1,
3107 }
3108 
3109 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3110 _mm512_mask_fcmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
3111  return (__m512h)__builtin_ia32_vfcmulcph512_mask((__v16sf)__A, (__v16sf)__B,
3112  (__v16sf)__W, (__mmask16)__U,
3114 }
3115 
3116 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3117 _mm512_maskz_fcmul_pch(__mmask16 __U, __m512h __A, __m512h __B) {
3118  return (__m512h)__builtin_ia32_vfcmulcph512_mask(
3119  (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_setzero_ph(), (__mmask16)__U,
3121 }
3122 
3123 #define _mm512_fcmul_round_pch(A, B, R) \
3124  ((__m512h)__builtin_ia32_vfcmulcph512_mask( \
3125  (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \
3126  (__v16sf)(__m512h)_mm512_undefined_ph(), (__mmask16)-1, (int)(R)))
3127 
3128 #define _mm512_mask_fcmul_round_pch(W, U, A, B, R) \
3129  ((__m512h)__builtin_ia32_vfcmulcph512_mask( \
3130  (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(W), \
3131  (__mmask16)(U), (int)(R)))
3132 
3133 #define _mm512_maskz_fcmul_round_pch(U, A, B, R) \
3134  ((__m512h)__builtin_ia32_vfcmulcph512_mask( \
3135  (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \
3136  (__v16sf)(__m512h)_mm512_setzero_ph(), (__mmask16)(U), (int)(R)))
3137 
3138 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmul_pch(__m512h __A,
3139  __m512h __B) {
3140  return (__m512h)__builtin_ia32_vfmulcph512_mask(
3141  (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_undefined_ph(), (__mmask16)-1,
3143 }
3144 
3145 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3146 _mm512_mask_fmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
3147  return (__m512h)__builtin_ia32_vfmulcph512_mask((__v16sf)__A, (__v16sf)__B,
3148  (__v16sf)__W, (__mmask16)__U,
3150 }
3151 
3152 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3153 _mm512_maskz_fmul_pch(__mmask16 __U, __m512h __A, __m512h __B) {
3154  return (__m512h)__builtin_ia32_vfmulcph512_mask(
3155  (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_setzero_ph(), (__mmask16)__U,
3157 }
3158 
3159 #define _mm512_fmul_round_pch(A, B, R) \
3160  ((__m512h)__builtin_ia32_vfmulcph512_mask( \
3161  (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \
3162  (__v16sf)(__m512h)_mm512_undefined_ph(), (__mmask16)-1, (int)(R)))
3163 
3164 #define _mm512_mask_fmul_round_pch(W, U, A, B, R) \
3165  ((__m512h)__builtin_ia32_vfmulcph512_mask( \
3166  (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(W), \
3167  (__mmask16)(U), (int)(R)))
3168 
3169 #define _mm512_maskz_fmul_round_pch(U, A, B, R) \
3170  ((__m512h)__builtin_ia32_vfmulcph512_mask( \
3171  (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \
3172  (__v16sf)(__m512h)_mm512_setzero_ph(), (__mmask16)(U), (int)(R)))
3173 
3174 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fcmadd_pch(__m512h __A,
3175  __m512h __B,
3176  __m512h __C) {
3177  return (__m512h)__builtin_ia32_vfcmaddcph512_mask3(
3178  (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)-1,
3180 }
3181 
3182 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3183 _mm512_mask_fcmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C) {
3184  return (__m512h)__builtin_ia32_vfcmaddcph512_mask(
3185  (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3187 }
3188 
3189 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3190 _mm512_mask3_fcmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U) {
3191  return (__m512h)__builtin_ia32_vfcmaddcph512_mask3(
3192  (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3194 }
3195 
3196 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3197 _mm512_maskz_fcmadd_pch(__mmask16 __U, __m512h __A, __m512h __B, __m512h __C) {
3198  return (__m512h)__builtin_ia32_vfcmaddcph512_maskz(
3199  (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3201 }
3202 
3203 #define _mm512_fcmadd_round_pch(A, B, C, R) \
3204  ((__m512h)__builtin_ia32_vfcmaddcph512_mask3( \
3205  (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3206  (__mmask16)-1, (int)(R)))
3207 
3208 #define _mm512_mask_fcmadd_round_pch(A, U, B, C, R) \
3209  ((__m512h)__builtin_ia32_vfcmaddcph512_mask( \
3210  (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3211  (__mmask16)(U), (int)(R)))
3212 
3213 #define _mm512_mask3_fcmadd_round_pch(A, B, C, U, R) \
3214  ((__m512h)__builtin_ia32_vfcmaddcph512_mask3( \
3215  (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3216  (__mmask16)(U), (int)(R)))
3217 
3218 #define _mm512_maskz_fcmadd_round_pch(U, A, B, C, R) \
3219  ((__m512h)__builtin_ia32_vfcmaddcph512_maskz( \
3220  (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3221  (__mmask16)(U), (int)(R)))
3222 
3223 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmadd_pch(__m512h __A,
3224  __m512h __B,
3225  __m512h __C) {
3226  return (__m512h)__builtin_ia32_vfmaddcph512_mask3((__v16sf)__A, (__v16sf)__B,
3227  (__v16sf)__C, (__mmask16)-1,
3229 }
3230 
3231 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3232 _mm512_mask_fmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C) {
3233  return (__m512h)__builtin_ia32_vfmaddcph512_mask((__v16sf)__A, (__v16sf)__B,
3234  (__v16sf)__C, (__mmask16)__U,
3236 }
3237 
3238 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3239 _mm512_mask3_fmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U) {
3240  return (__m512h)__builtin_ia32_vfmaddcph512_mask3(
3241  (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3243 }
3244 
3245 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3246 _mm512_maskz_fmadd_pch(__mmask16 __U, __m512h __A, __m512h __B, __m512h __C) {
3247  return (__m512h)__builtin_ia32_vfmaddcph512_maskz(
3248  (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3250 }
3251 
3252 #define _mm512_fmadd_round_pch(A, B, C, R) \
3253  ((__m512h)__builtin_ia32_vfmaddcph512_mask3( \
3254  (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3255  (__mmask16)-1, (int)(R)))
3256 
3257 #define _mm512_mask_fmadd_round_pch(A, U, B, C, R) \
3258  ((__m512h)__builtin_ia32_vfmaddcph512_mask( \
3259  (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3260  (__mmask16)(U), (int)(R)))
3261 
3262 #define _mm512_mask3_fmadd_round_pch(A, B, C, U, R) \
3263  ((__m512h)__builtin_ia32_vfmaddcph512_mask3( \
3264  (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3265  (__mmask16)(U), (int)(R)))
3266 
3267 #define _mm512_maskz_fmadd_round_pch(U, A, B, C, R) \
3268  ((__m512h)__builtin_ia32_vfmaddcph512_maskz( \
3269  (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3270  (__mmask16)(U), (int)(R)))
3271 
3272 static __inline__ _Float16 __DEFAULT_FN_ATTRS512
3273 _mm512_reduce_add_ph(__m512h __W) {
3274  return __builtin_ia32_reduce_fadd_ph512(-0.0f16, __W);
3275 }
3276 
3277 static __inline__ _Float16 __DEFAULT_FN_ATTRS512
3278 _mm512_reduce_mul_ph(__m512h __W) {
3279  return __builtin_ia32_reduce_fmul_ph512(1.0f16, __W);
3280 }
3281 
3282 static __inline__ _Float16 __DEFAULT_FN_ATTRS512
3283 _mm512_reduce_max_ph(__m512h __V) {
3284  return __builtin_ia32_reduce_fmax_ph512(__V);
3285 }
3286 
3287 static __inline__ _Float16 __DEFAULT_FN_ATTRS512
3288 _mm512_reduce_min_ph(__m512h __V) {
3289  return __builtin_ia32_reduce_fmin_ph512(__V);
3290 }
3291 
3292 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3293 _mm512_mask_blend_ph(__mmask32 __U, __m512h __A, __m512h __W) {
3294  return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)__W,
3295  (__v32hf)__A);
3296 }
3297 
3298 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3299 _mm512_permutex2var_ph(__m512h __A, __m512i __I, __m512h __B) {
3300  return (__m512h)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
3301  (__v32hi)__B);
3302 }
3303 
3304 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3305 _mm512_permutexvar_ph(__m512i __A, __m512h __B) {
3306  return (__m512h)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
3307 }
3308 
3309 #undef __DEFAULT_FN_ATTRS128
3310 #undef __DEFAULT_FN_ATTRS256
3311 #undef __DEFAULT_FN_ATTRS512
3312 
3313 #endif
_mm512_mask_cvtph_epi16
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_epi16(__m512i __W, __mmask32 __U, __m512h __A)
Definition: avx512fp16intrin.h:1692
_mm_mask_getexp_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:1250
_mm_scalef_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_sh(__m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:1278
_mm512_scalef_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_scalef_ph(__m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:1070
_mm_setzero_ph
static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_setzero_ph(void)
Definition: avx512fp16intrin.h:42
_mm_mask_fnmadd_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:2848
_mm512_fmaddsub_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:2632
_mm_mask_fmsub_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:2797
_mm256_undefined_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_undefined_ph(void)
Definition: avx512fp16intrin.h:51
_mm_max_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_sh(__m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:753
_mm_undefined_pd
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] with unspecified content.
Definition: emmintrin.h:1803
_mm_mask_fmadd_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:2747
_mm512_mask_rsqrt_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt_ph(__m512h __W, __mmask32 __U, __m512h __A)
Definition: avx512fp16intrin.h:995
_mm_maskz_min_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_sh(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:730
_mm512_cvtph_epu16
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtph_epu16(__m512h __A)
Definition: avx512fp16intrin.h:1786
_mm_move_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_move_sh(__m128h __a, __m128h __b)
Definition: avx512fp16intrin.h:942
_CMP_GT_OQ
#define _CMP_GT_OQ
Definition: avxintrin.h:1590
_mm512_mask_fnmsub_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:2727
_mm512_sub_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_sub_ph(__m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:387
_mm512_mask_cvtph_epu64
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_epu64(__m512i __W, __mmask8 __U, __m128h __A)
Definition: avx512fp16intrin.h:2192
_mm_cvtss_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtss_sh(__m128h __A, __m128 __B)
Definition: avx512fp16intrin.h:1572
_mm512_mask_cvtepi32_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_ph(__m256h __W, __mmask16 __U, __m512i __A)
Definition: avx512fp16intrin.h:1961
__attribute__
_Float16 __v32hf __attribute__((__vector_size__(64), __aligned__(64)))
Definition: avx512fp16intrin.h:17
_mm512_rsqrt_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_rsqrt_ph(__m512h __A)
Definition: avx512fp16intrin.h:989
_mm512_sqrt_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_sqrt_ph(__m512h __A)
Definition: avx512fp16intrin.h:1382
_mm_cvtsh_sd
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtsh_sd(__m128d __A, __m128h __B)
Definition: avx512fp16intrin.h:1647
_mm_cvtsh_u32
static __inline__ unsigned int __DEFAULT_FN_ATTRS128 _mm_cvtsh_u32(__m128h __A)
Definition: avx512fp16intrin.h:2281
_mm512_mask_cvttph_epu64
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttph_epu64(__m512i __W, __mmask8 __U, __m128h __A)
Definition: avx512fp16intrin.h:2258
_mm512_maskz_cvtph_epu64
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_epu64(__mmask8 __U, __m128h __A)
Definition: avx512fp16intrin.h:2198
_mm512_maskz_mul_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_ph(__mmask32 __U, __m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:431
_mm512_maskz_cvtepu16_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_ph(__mmask32 __U, __m512i __A)
Definition: avx512fp16intrin.h:1866
_mm512_cvtxph_ps
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtxph_ps(__m256h __A)
Definition: avx512fp16intrin.h:2397
__v
struct __storeu_i16 *__P __v
Definition: immintrin.h:373
_mm512_mask_cvtepi64_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m512i __A)
Definition: avx512fp16intrin.h:2094
_mm512_mask3_fnmadd_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
Definition: avx512fp16intrin.h:2568
_mm512_store_ph
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_store_ph(void *__P, __m512h __A)
Definition: avx512fp16intrin.h:902
_mm512_setzero_ph
static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_setzero_ph(void)
Definition: avx512fp16intrin.h:55
_mm_ucomile_sh
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomile_sh(__m128h A, __m128h B)
Definition: avx512fp16intrin.h:331
_mm512_maskz_cvtph_epu32
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_epu32(__mmask16 __U, __m256h __A)
Definition: avx512fp16intrin.h:1934
_mm512_castph_ps
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castph_ps(__m512h __a)
Definition: avx512fp16intrin.h:113
_mm512_permutex2var_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_permutex2var_ph(__m512h __A, __m512i __I, __m512h __B)
Definition: avx512fp16intrin.h:3299
_mm512_maskz_cvtph_epi16
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_epi16(__mmask32 __U, __m512h __A)
Definition: avx512fp16intrin.h:1698
_mm_loadu_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_loadu_ph(void const *__p)
Definition: avx512fp16intrin.h:880
_mm512_mask_getexp_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ph(__m512h __W, __mmask32 __U, __m512h __A)
Definition: avx512fp16intrin.h:1044
_mm_castps_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castps_ph(__m128 __a)
Definition: avx512fp16intrin.h:143
_mm_maskz_sub_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sh(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:620
_mm_cvtsi16_si128
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsi16_si128(short __a)
Definition: avx512fp16intrin.h:963
_mm512_maskz_fmadd_pch
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pch(__mmask16 __U, __m512h __A, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:3246
_mm_ucomigt_sh
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomigt_sh(__m128h A, __m128h B)
Definition: avx512fp16intrin.h:337
_mm512_mask_fcmadd_pch
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fcmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:3183
_mm512_mask_cvtph_epi32
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_epi32(__m512i __W, __mmask16 __U, __m256h __A)
Definition: avx512fp16intrin.h:1894
_mm_castsi128_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castsi128_ph(__m128i __a)
Definition: avx512fp16intrin.h:167
_mm512_cvtph_epi64
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtph_epi64(__m128h __A)
Definition: avx512fp16intrin.h:2120
_mm512_mask_fmsub_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:2546
_mm512_reduce_add_ph
static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ph(__m512h __W)
Definition: avx512fp16intrin.h:3273
_mm512_maskz_fmsubadd_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:2674
_mm512_mask_scalef_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:1078
_mm_mask_rsqrt_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:1193
_mm_fnmadd_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_sh(__m128h __W, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:2840
_mm_mask3_fmsub_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U)
Definition: avx512fp16intrin.h:2829
_mm_mask_move_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_move_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:948
_mm_maskz_rcp_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rcp_sh(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:1180
_mm512_mask_cvtepu16_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_ph(__m512h __W, __mmask32 __U, __m512i __A)
Definition: avx512fp16intrin.h:1860
_mm_fmsub_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_sh(__m128h __W, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:2789
_mm512_castph512_ph128
static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_castph512_ph128(__m512h __a)
Definition: avx512fp16intrin.h:187
_mm_comigt_sh
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sh(__m128h A, __m128h B)
Definition: avx512fp16intrin.h:301
_mm512_mask_cvtxph_ps
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtxph_ps(__m512 __W, __mmask16 __U, __m256h __A)
Definition: avx512fp16intrin.h:2404
__a
static __inline__ void int __a
Definition: emmintrin.h:4189
_mm512_maskz_cvtph_epi64
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_epi64(__mmask8 __U, __m128h __A)
Definition: avx512fp16intrin.h:2133
_mm512_mask3_fnmsub_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
Definition: avx512fp16intrin.h:2734
_mm512_mask_cvttph_epi16
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttph_epi16(__m512i __W, __mmask32 __U, __m512h __A)
Definition: avx512fp16intrin.h:1726
_mm_mask_rcp_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rcp_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:1172
_mm512_mask_max_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_max_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:523
_mm_mask_cvtsh_sd
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128h __B)
Definition: avx512fp16intrin.h:1654
_mm_fmul_sch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmul_sch(__m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:3064
_mm_cvttsh_i32
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsh_i32(__m128h __A)
Definition: avx512fp16intrin.h:2349
_mm512_mask_cvtepi16_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_ph(__m512h __W, __mmask32 __U, __m512i __A)
Definition: avx512fp16intrin.h:1759
_mm_maskz_scalef_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sh(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:1298
_mm512_mask3_fmaddsub_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
Definition: avx512fp16intrin.h:2646
_mm256_castpd_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castpd_ph(__m256d __a)
Definition: avx512fp16intrin.h:159
_mm512_castph128_ph512
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castph128_ph512(__m128h __a)
Definition: avx512fp16intrin.h:204
_CMP_NEQ_UQ
#define _CMP_NEQ_UQ
Definition: avxintrin.h:1564
_mm512_maskz_cvttph_epi16
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttph_epi16(__mmask32 __U, __m512h __A)
Definition: avx512fp16intrin.h:1732
_mm512_maskz_cvtph_epu16
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_epu16(__mmask32 __U, __m512h __A)
Definition: avx512fp16intrin.h:1799
_mm_cvtsh_ss
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtsh_ss(__m128 __A, __m128h __B)
Definition: avx512fp16intrin.h:1534
_mm512_mask_cvttph_epu16
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttph_epu16(__m512i __W, __mmask32 __U, __m512h __A)
Definition: avx512fp16intrin.h:1827
_mm_mask3_fnmsub_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U)
Definition: avx512fp16intrin.h:2923
_mm256_castsi256_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castsi256_ph(__m256i __a)
Definition: avx512fp16intrin.h:172
__DEFAULT_FN_ATTRS512
#define __DEFAULT_FN_ATTRS512
Definition: avx512fp16intrin.h:28
_CMP_EQ_OQ
#define _CMP_EQ_OQ
Definition: avxintrin.h:1560
_mm512_castph_pd
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castph_pd(__m512h __a)
Definition: avx512fp16intrin.h:125
_mm_comige_sh
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sh(__m128h A, __m128h B)
Definition: avx512fp16intrin.h:307
_mm512_setzero_ps
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_setzero_ps(void)
Definition: avx512fintrin.h:253
_mm_castph_si128
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_castph_si128(__m128h __a)
Definition: avx512fp16intrin.h:129
_mm512_maskz_cvtxps_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtxps_ph(__mmask16 __U, __m512 __A)
Definition: avx512fp16intrin.h:2442
_mm512_maskz_sub_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_ph(__mmask32 __U, __m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:399
_mm512_and_epi32
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi32(__m512i __a, __m512i __b)
Definition: avx512fintrin.h:626
_mm512_mask_cvtph_pd
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_pd(__m512d __W, __mmask8 __U, __m128h __A)
Definition: avx512fp16intrin.h:1508
_mm_comilt_sh
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sh(__m128h A, __m128h B)
Definition: avx512fp16intrin.h:289
_mm512_mask3_fmadd_pch
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U)
Definition: avx512fp16intrin.h:3239
_mm_maskz_sqrt_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sh(__mmask32 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:1434
_mm_mask_min_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:721
_mm512_fmsub_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmsub_ph(__m512h __A, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:2537
_mm_comile_sh
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sh(__m128h A, __m128h B)
Definition: avx512fp16intrin.h:295
_mm_load_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_load_ph(void const *__p)
Definition: avx512fp16intrin.h:860
_mm512_maskz_cvttph_epu32
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttph_epu32(__mmask16 __U, __m256h __A)
Definition: avx512fp16intrin.h:2068
_mm_mask3_fmadd_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U)
Definition: avx512fp16intrin.h:2778
_mm512_mask_conj_pch
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_conj_pch(__m512h __W, __mmask16 __U, __m512h __A)
Definition: avx512fp16intrin.h:558
_CMP_LE_OS
#define _CMP_LE_OS
Definition: avxintrin.h:1562
_mm_min_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_sh(__m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:714
_mm512_mask_cvttph_epi64
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttph_epi64(__m512i __W, __mmask8 __U, __m128h __A)
Definition: avx512fp16intrin.h:2225
_mm512_maskz_cvttph_epi64
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttph_epi64(__mmask8 __U, __m128h __A)
Definition: avx512fp16intrin.h:2231
_mm512_mask_fmul_pch
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:3146
_mm512_undefined_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_undefined_ph(void)
Definition: avx512fp16intrin.h:65
_mm512_cvttph_epu64
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttph_epu64(__m128h __A)
Definition: avx512fp16intrin.h:2251
__mmask32
unsigned int __mmask32
Definition: avx512bwintrin.h:17
_mm512_maskz_fcmadd_pch
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fcmadd_pch(__mmask16 __U, __m512h __A, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:3197
_mm_mask_cvtsd_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128d __B)
Definition: avx512fp16intrin.h:1617
_mm_mask_add_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:576
_mm_fnmsub_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_sh(__m128h __W, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:2887
_mm512_fcmadd_pch
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fcmadd_pch(__m512h __A, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:3174
_mm256_castph_pd
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_castph_pd(__m256h __a)
Definition: avx512fp16intrin.h:121
_mm512_mask_cvtph_epu32
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_epu32(__m512i __W, __mmask16 __U, __m256h __A)
Definition: avx512fp16intrin.h:1928
_mm_mask_div_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:684
_mm512_mask_fmsubadd_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:2667
_mm512_cvtsh_h
static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_cvtsh_h(__m512h __a)
Definition: avx512fp16intrin.h:38
_mm_mask3_fnmadd_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U)
Definition: avx512fp16intrin.h:2876
_mm256_zextph128_ph256
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_zextph128_ph256(__m128h __a)
Constructs a 256-bit floating-point vector of [16 x half] from a 128-bit floating-point vector of [8 ...
Definition: avx512fp16intrin.h:231
_CMP_GE_OS
#define _CMP_GE_OS
Definition: avxintrin.h:1573
_mm512_fmadd_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmadd_ph(__m512h __A, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:2508
_mm_ucomineq_sh
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomineq_sh(__m128h A, __m128h B)
Definition: avx512fp16intrin.h:349
_mm512_maskz_cvtepi64_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_ph(__mmask8 __U, __m512i __A)
Definition: avx512fp16intrin.h:2100
_mm256_castps_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castps_ph(__m256 __a)
Definition: avx512fp16intrin.h:147
_mm_fmadd_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_sh(__m128h __W, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:2740
_mm_mask_fcmadd_sch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fcmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C)
Definition: avx512fp16intrin.h:2943
_mm512_mask_sub_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_sub_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:393
_mm512_maskz_fmsub_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:2553
_mm512_cvttph_epi32
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttph_epi32(__m256h __A)
Definition: avx512fp16intrin.h:2021
_mm512_cvtph_pd
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtph_pd(__m128h __A)
Definition: avx512fp16intrin.h:1501
_mm512_setzero_pd
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_setzero_pd(void)
Definition: avx512fintrin.h:262
_mm256_castph_si256
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_castph_si256(__m256h __a)
Definition: avx512fp16intrin.h:134
_mm512_mask_cvtxps_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_mask_cvtxps_ph(__m256h __W, __mmask16 __U, __m512 __A)
Definition: avx512fp16intrin.h:2436
_mm512_cvttph_epi64
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttph_epi64(__m128h __A)
Definition: avx512fp16intrin.h:2218
_mm512_mask_cvtepu64_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m512i __A)
Definition: avx512fp16intrin.h:2159
_mm512_mul_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mul_ph(__m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:419
_mm_cvtu32_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtu32_sh(__m128h __A, unsigned int __B)
Definition: avx512fp16intrin.h:2309
_mm512_mask_cvttph_epi32
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttph_epi32(__m512i __W, __mmask16 __U, __m256h __A)
Definition: avx512fp16intrin.h:2028
_mm512_fmadd_pch
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmadd_pch(__m512h __A, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:3223
_mm256_castph256_ph128
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_castph256_ph128(__m256h __a)
Definition: avx512fp16intrin.h:182
_mm512_mask_mul_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_mul_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:425
_mm_load_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_load_sh(void const *__dp)
Definition: avx512fp16intrin.h:828
_mm512_cvtph_epi16
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtph_epi16(__m512h __A)
Definition: avx512fp16intrin.h:1685
_mm512_set1_ph
static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_set1_ph(_Float16 __h)
Definition: avx512fp16intrin.h:69
_mm512_cvtpd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ph(__m512d __A)
Definition: avx512fp16intrin.h:1470
_mm256_store_ph
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_store_ph(void *__P, __m256h __A)
Definition: avx512fp16intrin.h:907
_mm512_fmul_pch
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmul_pch(__m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:3138
_mm512_mask_blend_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_blend_ph(__mmask32 __U, __m512h __A, __m512h __W)
Definition: avx512fp16intrin.h:3293
_mm256_loadu_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_loadu_ph(void const *__p)
Definition: avx512fp16intrin.h:873
_mm512_maskz_cvttph_epu16
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttph_epu16(__mmask32 __U, __m512h __A)
Definition: avx512fp16intrin.h:1833
_mm512_fnmadd_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ph(__m512h __A, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:2559
_mm_cvtsd_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtsd_sh(__m128h __A, __m128d __B)
Definition: avx512fp16intrin.h:1610
_mm512_mask_sqrt_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ph(__m512h __W, __mmask32 __U, __m512h __A)
Definition: avx512fp16intrin.h:1388
_mm512_fnmsub_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ph(__m512h __A, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:2581
_mm_maskz_mul_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sh(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:656
_mm_maskz_div_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_sh(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:692
_mm_cvtsh_i32
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvtsh_i32(__m128h __A)
Definition: avx512fp16intrin.h:2273
_mm_maskz_cvtsh_ss
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsh_ss(__mmask8 __U, __m128 __A, __m128h __B)
Definition: avx512fp16intrin.h:1550
_mm_mask_cvtss_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128 __B)
Definition: avx512fp16intrin.h:1579
_mm_add_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_sh(__m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:570
_mm512_mask_add_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_add_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:361
_mm512_maskz_min_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ph(__mmask32 __U, __m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:496
_mm512_add_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_add_ph(__m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:355
_mm512_mask_cvttph_epu32
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttph_epu32(__m512i __W, __mmask16 __U, __m256h __A)
Definition: avx512fp16intrin.h:2062
_mm512_load_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_load_ph(void const *__p)
Definition: avx512fp16intrin.h:851
_mm_castph_ps
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_castph_ps(__m128h __a)
Definition: avx512fp16intrin.h:105
_mm512_rcp_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_rcp_ph(__m512h __A)
Definition: avx512fp16intrin.h:972
_mm512_cvtxps_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_cvtxps_ph(__m512 __A)
Definition: avx512fp16intrin.h:2429
_mm512_cvtepu16_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_ph(__m512i __A)
Definition: avx512fp16intrin.h:1853
_mm512_castps_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castps_ph(__m512 __a)
Definition: avx512fp16intrin.h:151
_mm512_maskz_cvtepu32_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_ph(__mmask16 __U, __m512i __A)
Definition: avx512fp16intrin.h:2000
_mm_mask_scalef_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:1286
_mm512_maskz_cvtxph_ps
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtxph_ps(__mmask16 __U, __m256h __A)
Definition: avx512fp16intrin.h:2410
_mm512_maskz_cvtph_pd
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_pd(__mmask8 __U, __m128h __A)
Definition: avx512fp16intrin.h:1514
_mm512_getexp_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_getexp_ph(__m512h __A)
Definition: avx512fp16intrin.h:1037
_mm_store_ph
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_ph(void *__P, __m128h __A)
Definition: avx512fp16intrin.h:912
_mm_cvtsi128_si16
static __inline__ short __DEFAULT_FN_ATTRS128 _mm_cvtsi128_si16(__m128i __a)
Definition: avx512fp16intrin.h:967
_mm512_cvtepi64_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_ph(__m512i __A)
Definition: avx512fp16intrin.h:2087
_mm_mul_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_sh(__m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:642
_mm512_maskz_conj_pch
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_conj_pch(__mmask16 __U, __m512h __A)
Definition: avx512fp16intrin.h:564
_mm_mask_fnmsub_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:2895
_mm_mask_fmadd_sch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C)
Definition: avx512fp16intrin.h:2990
_mm_maskz_getexp_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sh(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:1262
_mm512_cvtepi16_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_ph(__m512i __A)
Definition: avx512fp16intrin.h:1752
_mm_ucomilt_sh
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomilt_sh(__m128h A, __m128h B)
Definition: avx512fp16intrin.h:325
_mm_maskz_cvtsh_sd
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsh_sd(__mmask8 __U, __m128d __A, __m128h __B)
Definition: avx512fp16intrin.h:1664
_mm_maskz_fmadd_sch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C)
Definition: avx512fp16intrin.h:2996
_mm512_xor_ps
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_xor_ps(__m512 __A, __m512 __B)
Definition: avx512dqintrin.h:188
_mm_mask_fcmul_sch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fcmul_sch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:3036
_mm_maskz_fmul_sch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmul_sch(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:3081
_mm_maskz_cvtsd_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_sh(__mmask8 __U, __m128h __A, __m128d __B)
Definition: avx512fp16intrin.h:1627
_CMP_LT_OQ
#define _CMP_LT_OQ
Definition: avxintrin.h:1577
_mm512_permutexvar_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_permutexvar_ph(__m512i __A, __m512h __B)
Definition: avx512fp16intrin.h:3305
_mm_rcp_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_sh(__m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:1166
_mm512_cvtph_epu32
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtph_epu32(__m256h __A)
Definition: avx512fp16intrin.h:1921
_mm512_fmsubadd_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:2660
_CMP_NEQ_US
#define _CMP_NEQ_US
Definition: avxintrin.h:1580
_mm512_cvttph_epu16
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttph_epu16(__m512h __A)
Definition: avx512fp16intrin.h:1820
_mm512_mask_div_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_div_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:457
_CMP_LE_OQ
#define _CMP_LE_OQ
Definition: avxintrin.h:1578
_mm_castph_pd
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_castph_pd(__m128h __a)
Definition: avx512fp16intrin.h:117
_mm_mask_max_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:760
_mm_mask_mul_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:648
_mm512_maskz_div_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_div_ph(__mmask32 __U, __m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:463
_CMP_GT_OS
#define _CMP_GT_OS
Definition: avxintrin.h:1574
_mm_comineq_sh
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sh(__m128h A, __m128h B)
Definition: avx512fp16intrin.h:313
_mm_undefined_ps
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_undefined_ps(void)
Create a 128-bit vector of [4 x float] with undefined values.
Definition: xmmintrin.h:1780
_mm_rsqrt_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_sh(__m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:1187
_mm_mask_sqrt_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sh(__m128h __W, __mmask32 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:1425
_mm256_load_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_load_ph(void const *__p)
Definition: avx512fp16intrin.h:856
_mm_fcmul_sch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_sch(__m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:3028
_mm_maskz_fnmadd_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C)
Definition: avx512fp16intrin.h:2864
_mm512_setzero_epi32
#define _mm512_setzero_epi32
Definition: avx512fintrin.h:178
_mm512_mask_fmadd_pch
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:3232
_mm256_castph128_ph256
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castph128_ph256(__m128h __a)
Definition: avx512fp16intrin.h:198
_mm512_mask_fmadd_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:2517
_mm512_set1_epi32
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi32(int __s)
Definition: avx512fintrin.h:305
_mm512_set_ph
static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8, _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12, _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16, _Float16 __h17, _Float16 __h18, _Float16 __h19, _Float16 __h20, _Float16 __h21, _Float16 __h22, _Float16 __h23, _Float16 __h24, _Float16 __h25, _Float16 __h26, _Float16 __h27, _Float16 __h28, _Float16 __h29, _Float16 __h30, _Float16 __h31, _Float16 __h32)
Definition: avx512fp16intrin.h:77
_mm_storeu_ph
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_storeu_ph(void *__P, __m128h __A)
Definition: avx512fp16intrin.h:933
_mm512_loadu_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_loadu_ph(void const *__p)
Definition: avx512fp16intrin.h:865
_mm_maskz_load_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_load_sh(__mmask8 __U, const void *__A)
Definition: avx512fp16intrin.h:845
_mm512_castph256_ph512
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castph256_ph512(__m256h __a)
Definition: avx512fp16intrin.h:211
_mm512_maskz_getexp_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ph(__mmask32 __U, __m512h __A)
Definition: avx512fp16intrin.h:1050
_mm_getexp_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_sh(__m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:1242
_mm_mask_load_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_load_sh(__m128h __W, __mmask8 __U, const void *__A)
Definition: avx512fp16intrin.h:837
_mm512_mask3_fmsub_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
Definition: avx512fp16intrin.h:2686
_mm_sub_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_sh(__m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:606
_mm512_maskz_cvtepi16_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_ph(__mmask32 __U, __m512i __A)
Definition: avx512fp16intrin.h:1765
_Float16
__device__ _Float16
Definition: __clang_hip_libdevice_declares.h:295
_mm_ucomieq_sh
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomieq_sh(__m128h A, __m128h B)
Definition: avx512fp16intrin.h:319
_mm_maskz_fcmadd_sch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fcmadd_sch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C)
Definition: avx512fp16intrin.h:2949
__p
static __inline unsigned char unsigned int unsigned int unsigned int * __p
Definition: adxintrin.h:24
_mm_castpd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castpd_ph(__m128d __a)
Definition: avx512fp16intrin.h:155
_mm512_fcmul_pch
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fcmul_pch(__m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:3102
_mm512_maskz_add_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_add_ph(__mmask32 __U, __m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:367
_mm512_cvtph_epi32
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtph_epi32(__m256h __A)
Definition: avx512fp16intrin.h:1887
_mm512_maskz_fnmsub_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:2590
_mm_sqrt_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_sh(__m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:1418
_mm512_castph512_ph256
static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_castph512_ph256(__m512h __a)
Definition: avx512fp16intrin.h:192
_mm512_mask_cvtph_epu16
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_epu16(__m512i __W, __mmask32 __U, __m512h __A)
Definition: avx512fp16intrin.h:1793
_mm256_storeu_ph
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_storeu_ph(void *__P, __m256h __A)
Definition: avx512fp16intrin.h:925
_mm512_mask_rcp_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_rcp_ph(__m512h __W, __mmask32 __U, __m512h __A)
Definition: avx512fp16intrin.h:978
_mm512_cvttph_epu32
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttph_epu32(__m256h __A)
Definition: avx512fp16intrin.h:2055
_mm_ucomige_sh
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomige_sh(__m128h A, __m128h B)
Definition: avx512fp16intrin.h:343
__mmask16
unsigned short __mmask16
Definition: avx512fintrin.h:38
_mm512_cvtepu64_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_cvtepu64_ph(__m512i __A)
Definition: avx512fp16intrin.h:2152
_mm_undefined_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_undefined_ph(void)
Definition: avx512fp16intrin.h:61
_mm_fcmadd_sch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmadd_sch(__m128h __A, __m128h __B, __m128h __C)
Definition: avx512fp16intrin.h:2934
_mm512_maskz_cvttph_epi32
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttph_epi32(__mmask16 __U, __m256h __A)
Definition: avx512fp16intrin.h:2034
_mm_mask_cvtsh_ss
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128h __B)
Definition: avx512fp16intrin.h:1541
_mm256_castph_ps
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_castph_ps(__m256h __a)
Definition: avx512fp16intrin.h:109
_mm_mask3_fmadd_sch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U)
Definition: avx512fp16intrin.h:3003
_mm512_maskz_fmaddsub_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:2653
_mm512_maskz_max_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ph(__mmask32 __U, __m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:529
_mm512_mask_fmaddsub_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:2639
_mm_maskz_fnmsub_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C)
Definition: avx512fp16intrin.h:2911
_mm512_cvtepu32_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_ph(__m512i __A)
Definition: avx512fp16intrin.h:1987
_mm_cvttsh_u32
static __inline__ unsigned int __DEFAULT_FN_ATTRS128 _mm_cvttsh_u32(__m128h __A)
Definition: avx512fp16intrin.h:2368
_mm256_setzero_ph
static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_setzero_ph(void)
Definition: avx512fp16intrin.h:46
__DEFAULT_FN_ATTRS256
#define __DEFAULT_FN_ATTRS256
Definition: avx512fp16intrin.h:31
_mm512_maskz_cvtpd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_ph(__mmask8 __U, __m512d __A)
Definition: avx512fp16intrin.h:1483
_mm_maskz_fmsub_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C)
Definition: avx512fp16intrin.h:2817
_mm512_maskz_cvtepi32_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_ph(__mmask16 __U, __m512i __A)
Definition: avx512fp16intrin.h:1967
_mm512_maskz_rcp_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp_ph(__mmask32 __U, __m512h __A)
Definition: avx512fp16intrin.h:984
_mm512_mask3_fcmadd_pch
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask3_fcmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U)
Definition: avx512fp16intrin.h:3190
_mm512_maskz_fnmadd_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:2575
_mm512_max_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_max_ph(__m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:516
_mm512_abs_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_abs_ph(__m512h __A)
Definition: avx512fp16intrin.h:549
_mm_cvti32_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti32_sh(__m128h __A, int __B)
Definition: avx512fp16intrin.h:2329
_mm512_maskz_cvtepu64_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu64_ph(__mmask8 __U, __m512i __A)
Definition: avx512fp16intrin.h:2165
_mm512_set1_ps
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set1_ps(float __w)
Definition: avx512fintrin.h:268
_mm512_mask_cvtpd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m512d __A)
Definition: avx512fp16intrin.h:1477
_mm512_cvtph_epu64
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtph_epu64(__m128h __A)
Definition: avx512fp16intrin.h:2185
_mm_maskz_add_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_sh(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:584
_mm_mask3_fcmadd_sch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fcmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U)
Definition: avx512fp16intrin.h:2956
_mm_setzero_ps
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition: xmmintrin.h:1907
__b
static __inline__ vector float vector float __b
Definition: altivec.h:566
_mm512_maskz_scalef_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ph(__mmask32 __U, __m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:1085
_mm512_mask_min_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_min_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:490
_mm512_set1_pch
static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_set1_pch(_Float16 _Complex h)
Definition: avx512fp16intrin.h:101
_mm_mask_sub_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:612
_mm512_maskz_cvttph_epu64
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttph_epu64(__mmask8 __U, __m128h __A)
Definition: avx512fp16intrin.h:2264
_mm512_mask_fcmul_pch
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fcmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:3110
_mm_maskz_fmadd_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C)
Definition: avx512fp16intrin.h:2766
_mm512_reduce_max_ph
static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ph(__m512h __V)
Definition: avx512fp16intrin.h:3283
_mm512_maskz_rsqrt_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt_ph(__mmask32 __U, __m512h __A)
Definition: avx512fp16intrin.h:1001
_mm_maskz_rsqrt_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt_sh(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:1202
_mm512_min_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_min_ph(__m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:483
_mm512_cvtepi32_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_ph(__m512i __A)
Definition: avx512fp16intrin.h:1954
_mm512_maskz_cvtph_epi32
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_epi32(__mmask16 __U, __m256h __A)
Definition: avx512fp16intrin.h:1900
_mm512_zextph128_ph512
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_zextph128_ph512(__m128h __a)
Constructs a 512-bit floating-point vector of [32 x half] from a 128-bit floating-point vector of [8 ...
Definition: avx512fp16intrin.h:250
_mm512_cvttph_epi16
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttph_epi16(__m512h __A)
Definition: avx512fp16intrin.h:1719
_mm512_reduce_min_ph
static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ph(__m512h __V)
Definition: avx512fp16intrin.h:3288
_mm_maskz_move_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_move_sh(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:955
_MM_FROUND_CUR_DIRECTION
#define _MM_FROUND_CUR_DIRECTION
Definition: avx512fintrin.h:45
_mm_maskz_cvtss_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sh(__mmask8 __U, __m128h __A, __m128 __B)
Definition: avx512fp16intrin.h:1588
_mm_maskz_max_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_sh(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:769
_mm_comieq_sh
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sh(__m128h A, __m128h B)
Definition: avx512fp16intrin.h:283
_CMP_GE_OQ
#define _CMP_GE_OQ
Definition: avxintrin.h:1589
_mm512_reduce_mul_ph
static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ph(__m512h __W)
Definition: avx512fp16intrin.h:3278
_CMP_LT_OS
#define _CMP_LT_OS
Definition: avxintrin.h:1561
_mm512_maskz_fmadd_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:2531
_mm_store_sh
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_sh(void *__dp, __m128h __a)
Definition: avx512fp16intrin.h:888
__DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS128
Definition: avx512fp16intrin.h:34
_mm512_maskz_fcmul_pch
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fcmul_pch(__mmask16 __U, __m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:3117
_mm512_mask_fnmadd_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
Definition: avx512fp16intrin.h:2710
__mmask8
unsigned char __mmask8
Definition: avx512fintrin.h:37
_CMP_EQ_OS
#define _CMP_EQ_OS
Definition: avxintrin.h:1576
_mm_maskz_fcmul_sch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fcmul_sch(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:3043
_mm_setzero_pd
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition: emmintrin.h:1915
_mm512_castpd_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castpd_ph(__m512d __a)
Definition: avx512fp16intrin.h:163
_mm512_conj_pch
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_conj_pch(__m512h __A)
Definition: avx512fp16intrin.h:553
_mm512_div_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_div_ph(__m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:451
_mm_mask_store_sh
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sh(void *__W, __mmask8 __U, __m128h __A)
Definition: avx512fp16intrin.h:896
_mm_div_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_sh(__m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:678
_mm512_storeu_ph
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_storeu_ph(void *__P, __m512h __A)
Definition: avx512fp16intrin.h:917
_mm512_mask3_fmadd_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
Definition: avx512fp16intrin.h:2524
_mm512_maskz_fmul_pch
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_fmul_pch(__mmask16 __U, __m512h __A, __m512h __B)
Definition: avx512fp16intrin.h:3153
_mm_fmadd_sch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_sch(__m128h __A, __m128h __B, __m128h __C)
Definition: avx512fp16intrin.h:2981
_mm512_maskz_sqrt_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ph(__mmask32 __U, __m512h __A)
Definition: avx512fp16intrin.h:1396
_mm_mask_fmul_sch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmul_sch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512fp16intrin.h:3071
_mm512_mask_cvtepu32_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_ph(__m256h __W, __mmask16 __U, __m512i __A)
Definition: avx512fp16intrin.h:1994
_mm512_castph_si512
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castph_si512(__m512h __a)
Definition: avx512fp16intrin.h:139
_mm512_zextph256_ph512
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_zextph256_ph512(__m256h __a)
Constructs a 512-bit floating-point vector of [32 x half] from a 256-bit floating-point vector of [16...
Definition: avx512fp16intrin.h:270
_mm512_mask3_fmsubadd_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
Definition: avx512fp16intrin.h:2698
_mm512_castsi512_ph
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castsi512_ph(__m512i __a)
Definition: avx512fp16intrin.h:177
_mm512_mask_cvtph_epi64
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_epi64(__m512i __W, __mmask8 __U, __m128h __A)
Definition: avx512fp16intrin.h:2127