clang  14.0.0git
avx512vlfp16intrin.h
Go to the documentation of this file.
1 /*===---------- avx512vlfp16intrin.h - AVX512-FP16 intrinsics --------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 #ifndef __IMMINTRIN_H
10 #error \
11  "Never use <avx512vlfp16intrin.h> directly; include <immintrin.h> instead."
12 #endif
13 
14 #ifndef __AVX512VLFP16INTRIN_H
15 #define __AVX512VLFP16INTRIN_H
16 
17 /* Define the default attributes for the functions in this file. */
18 #define __DEFAULT_FN_ATTRS256 \
19  __attribute__((__always_inline__, __nodebug__, \
20  __target__("avx512fp16, avx512vl"), \
21  __min_vector_width__(256)))
22 #define __DEFAULT_FN_ATTRS128 \
23  __attribute__((__always_inline__, __nodebug__, \
24  __target__("avx512fp16, avx512vl"), \
25  __min_vector_width__(128)))
26 
27 static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_cvtsh_h(__m128h __a) {
28  return __a[0];
29 }
30 
31 static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_cvtsh_h(__m256h __a) {
32  return __a[0];
33 }
34 
35 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_set_sh(_Float16 __h) {
36  return __extension__(__m128h){__h, 0, 0, 0, 0, 0, 0, 0};
37 }
38 
39 static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_set1_ph(_Float16 __h) {
40  return (__m128h)(__v8hf){__h, __h, __h, __h, __h, __h, __h, __h};
41 }
42 
43 static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_set1_ph(_Float16 __h) {
44  return (__m256h)(__v16hf){__h, __h, __h, __h, __h, __h, __h, __h,
45  __h, __h, __h, __h, __h, __h, __h, __h};
46 }
47 
48 static __inline __m128h __DEFAULT_FN_ATTRS128
49 _mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
50  _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) {
51  return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1};
52 }
53 
54 static __inline __m256h __DEFAULT_FN_ATTRS256
56  return (__m256h)_mm256_set1_ps(__builtin_bit_cast(float, h));
57 }
58 
59 static __inline __m128h __DEFAULT_FN_ATTRS128
60 _mm_set1_pch(_Float16 _Complex h) {
61  return (__m128h)_mm_set1_ps(__builtin_bit_cast(float, h));
62 }
63 
64 static __inline __m256h __DEFAULT_FN_ATTRS256
66  _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
67  _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12,
68  _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16) {
69  return (__m256h)(__v16hf){__h16, __h15, __h14, __h13, __h12, __h11,
70  __h10, __h9, __h8, __h7, __h6, __h5,
71  __h4, __h3, __h2, __h1};
72 }
73 
74 #define _mm_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8) \
75  _mm_set_ph((h8), (h7), (h6), (h5), (h4), (h3), (h2), (h1))
76 
77 #define _mm256_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, \
78  h14, h15, h16) \
79  _mm256_set_ph((h16), (h15), (h14), (h13), (h12), (h11), (h10), (h9), (h8), \
80  (h7), (h6), (h5), (h4), (h3), (h2), (h1))
81 
82 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_add_ph(__m256h __A,
83  __m256h __B) {
84  return (__m256h)((__v16hf)__A + (__v16hf)__B);
85 }
86 
87 static __inline__ __m256h __DEFAULT_FN_ATTRS256
88 _mm256_mask_add_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
89  return (__m256h)__builtin_ia32_selectph_256(
90  __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)__W);
91 }
92 
93 static __inline__ __m256h __DEFAULT_FN_ATTRS256
94 _mm256_maskz_add_ph(__mmask16 __U, __m256h __A, __m256h __B) {
95  return (__m256h)__builtin_ia32_selectph_256(
96  __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
97 }
98 
99 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_ph(__m128h __A,
100  __m128h __B) {
101  return (__m128h)((__v8hf)__A + (__v8hf)__B);
102 }
103 
104 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_ph(__m128h __W,
105  __mmask8 __U,
106  __m128h __A,
107  __m128h __B) {
108  return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B),
109  (__v8hf)__W);
110 }
111 
112 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_ph(__mmask8 __U,
113  __m128h __A,
114  __m128h __B) {
115  return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B),
116  (__v8hf)_mm_setzero_ph());
117 }
118 
119 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_sub_ph(__m256h __A,
120  __m256h __B) {
121  return (__m256h)((__v16hf)__A - (__v16hf)__B);
122 }
123 
124 static __inline__ __m256h __DEFAULT_FN_ATTRS256
125 _mm256_mask_sub_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
126  return (__m256h)__builtin_ia32_selectph_256(
127  __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)__W);
128 }
129 
130 static __inline__ __m256h __DEFAULT_FN_ATTRS256
131 _mm256_maskz_sub_ph(__mmask16 __U, __m256h __A, __m256h __B) {
132  return (__m256h)__builtin_ia32_selectph_256(
133  __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
134 }
135 
136 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_ph(__m128h __A,
137  __m128h __B) {
138  return (__m128h)((__v8hf)__A - (__v8hf)__B);
139 }
140 
141 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_ph(__m128h __W,
142  __mmask8 __U,
143  __m128h __A,
144  __m128h __B) {
145  return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B),
146  (__v8hf)__W);
147 }
148 
149 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ph(__mmask8 __U,
150  __m128h __A,
151  __m128h __B) {
152  return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B),
153  (__v8hf)_mm_setzero_ph());
154 }
155 
156 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mul_ph(__m256h __A,
157  __m256h __B) {
158  return (__m256h)((__v16hf)__A * (__v16hf)__B);
159 }
160 
161 static __inline__ __m256h __DEFAULT_FN_ATTRS256
162 _mm256_mask_mul_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
163  return (__m256h)__builtin_ia32_selectph_256(
164  __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)__W);
165 }
166 
167 static __inline__ __m256h __DEFAULT_FN_ATTRS256
168 _mm256_maskz_mul_ph(__mmask16 __U, __m256h __A, __m256h __B) {
169  return (__m256h)__builtin_ia32_selectph_256(
170  __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
171 }
172 
173 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_ph(__m128h __A,
174  __m128h __B) {
175  return (__m128h)((__v8hf)__A * (__v8hf)__B);
176 }
177 
178 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_ph(__m128h __W,
179  __mmask8 __U,
180  __m128h __A,
181  __m128h __B) {
182  return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B),
183  (__v8hf)__W);
184 }
185 
186 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ph(__mmask8 __U,
187  __m128h __A,
188  __m128h __B) {
189  return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B),
190  (__v8hf)_mm_setzero_ph());
191 }
192 
193 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_div_ph(__m256h __A,
194  __m256h __B) {
195  return (__m256h)((__v16hf)__A / (__v16hf)__B);
196 }
197 
198 static __inline__ __m256h __DEFAULT_FN_ATTRS256
199 _mm256_mask_div_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
200  return (__m256h)__builtin_ia32_selectph_256(
201  __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)__W);
202 }
203 
204 static __inline__ __m256h __DEFAULT_FN_ATTRS256
205 _mm256_maskz_div_ph(__mmask16 __U, __m256h __A, __m256h __B) {
206  return (__m256h)__builtin_ia32_selectph_256(
207  __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
208 }
209 
210 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_ph(__m128h __A,
211  __m128h __B) {
212  return (__m128h)((__v8hf)__A / (__v8hf)__B);
213 }
214 
215 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_ph(__m128h __W,
216  __mmask8 __U,
217  __m128h __A,
218  __m128h __B) {
219  return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B),
220  (__v8hf)__W);
221 }
222 
223 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_ph(__mmask8 __U,
224  __m128h __A,
225  __m128h __B) {
226  return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B),
227  (__v8hf)_mm_setzero_ph());
228 }
229 
230 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_min_ph(__m256h __A,
231  __m256h __B) {
232  return (__m256h)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B);
233 }
234 
235 static __inline__ __m256h __DEFAULT_FN_ATTRS256
236 _mm256_mask_min_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
237  return (__m256h)__builtin_ia32_selectph_256(
238  (__mmask16)__U,
239  (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B),
240  (__v16hf)__W);
241 }
242 
243 static __inline__ __m256h __DEFAULT_FN_ATTRS256
244 _mm256_maskz_min_ph(__mmask16 __U, __m256h __A, __m256h __B) {
245  return (__m256h)__builtin_ia32_selectph_256(
246  (__mmask16)__U,
247  (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B),
248  (__v16hf)_mm256_setzero_ph());
249 }
250 
251 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_ph(__m128h __A,
252  __m128h __B) {
253  return (__m128h)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B);
254 }
255 
256 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_ph(__m128h __W,
257  __mmask8 __U,
258  __m128h __A,
259  __m128h __B) {
260  return (__m128h)__builtin_ia32_selectph_128(
261  (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
262  (__v8hf)__W);
263 }
264 
265 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_ph(__mmask8 __U,
266  __m128h __A,
267  __m128h __B) {
268  return (__m128h)__builtin_ia32_selectph_128(
269  (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
270  (__v8hf)_mm_setzero_ph());
271 }
272 
273 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_max_ph(__m256h __A,
274  __m256h __B) {
275  return (__m256h)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B);
276 }
277 
278 static __inline__ __m256h __DEFAULT_FN_ATTRS256
279 _mm256_mask_max_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
280  return (__m256h)__builtin_ia32_selectph_256(
281  (__mmask16)__U,
282  (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B),
283  (__v16hf)__W);
284 }
285 
286 static __inline__ __m256h __DEFAULT_FN_ATTRS256
287 _mm256_maskz_max_ph(__mmask16 __U, __m256h __A, __m256h __B) {
288  return (__m256h)__builtin_ia32_selectph_256(
289  (__mmask16)__U,
290  (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B),
291  (__v16hf)_mm256_setzero_ph());
292 }
293 
294 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_ph(__m128h __A,
295  __m128h __B) {
296  return (__m128h)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B);
297 }
298 
299 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_ph(__m128h __W,
300  __mmask8 __U,
301  __m128h __A,
302  __m128h __B) {
303  return (__m128h)__builtin_ia32_selectph_128(
304  (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
305  (__v8hf)__W);
306 }
307 
308 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_ph(__mmask8 __U,
309  __m128h __A,
310  __m128h __B) {
311  return (__m128h)__builtin_ia32_selectph_128(
312  (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
313  (__v8hf)_mm_setzero_ph());
314 }
315 
316 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_abs_ph(__m256h __A) {
317  return (__m256h)_mm256_and_epi32(_mm256_set1_epi32(0x7FFF7FFF), (__m256i)__A);
318 }
319 
320 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_abs_ph(__m128h __A) {
321  return (__m128h)_mm_and_epi32(_mm_set1_epi32(0x7FFF7FFF), (__m128i)__A);
322 }
323 
324 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_conj_pch(__m256h __A) {
325  return (__m256h)_mm256_xor_ps((__m256)__A, _mm256_set1_ps(-0.0f));
326 }
327 
328 static __inline__ __m256h __DEFAULT_FN_ATTRS256
329 _mm256_mask_conj_pch(__m256h __W, __mmask8 __U, __m256h __A) {
330  return (__m256h)__builtin_ia32_selectps_256(
331  (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)__W);
332 }
333 
334 static __inline__ __m256h __DEFAULT_FN_ATTRS256
335 _mm256_maskz_conj_pch(__mmask8 __U, __m256h __A) {
336  return (__m256h)__builtin_ia32_selectps_256(
337  (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)_mm256_setzero_ps());
338 }
339 
340 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_conj_pch(__m128h __A) {
341  return (__m128h)_mm_xor_ps((__m128)__A, _mm_set1_ps(-0.0f));
342 }
343 
344 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_conj_pch(__m128h __W,
345  __mmask8 __U,
346  __m128h __A) {
347  return (__m128h)__builtin_ia32_selectps_128(
348  (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)__W);
349 }
350 
351 static __inline__ __m128h __DEFAULT_FN_ATTRS128
352 _mm_maskz_conj_pch(__mmask8 __U, __m128h __A) {
353  return (__m128h)__builtin_ia32_selectps_128(
354  (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)_mm_setzero_ps());
355 }
356 
357 #define _mm256_cmp_ph_mask(a, b, p) \
358  ((__mmask16)__builtin_ia32_cmpph256_mask( \
359  (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)-1))
360 
361 #define _mm256_mask_cmp_ph_mask(m, a, b, p) \
362  ((__mmask16)__builtin_ia32_cmpph256_mask( \
363  (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)(m)))
364 
365 #define _mm_cmp_ph_mask(a, b, p) \
366  ((__mmask8)__builtin_ia32_cmpph128_mask( \
367  (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)-1))
368 
369 #define _mm_mask_cmp_ph_mask(m, a, b, p) \
370  ((__mmask8)__builtin_ia32_cmpph128_mask( \
371  (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)(m)))
372 
373 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rcp_ph(__m256h __A) {
374  return (__m256h)__builtin_ia32_rcpph256_mask(
375  (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1);
376 }
377 
378 static __inline__ __m256h __DEFAULT_FN_ATTRS256
379 _mm256_mask_rcp_ph(__m256h __W, __mmask16 __U, __m256h __A) {
380  return (__m256h)__builtin_ia32_rcpph256_mask((__v16hf)__A, (__v16hf)__W,
381  (__mmask16)__U);
382 }
383 
384 static __inline__ __m256h __DEFAULT_FN_ATTRS256
385 _mm256_maskz_rcp_ph(__mmask16 __U, __m256h __A) {
386  return (__m256h)__builtin_ia32_rcpph256_mask(
387  (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
388 }
389 
390 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_ph(__m128h __A) {
391  return (__m128h)__builtin_ia32_rcpph128_mask(
392  (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
393 }
394 
395 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rcp_ph(__m128h __W,
396  __mmask8 __U,
397  __m128h __A) {
398  return (__m128h)__builtin_ia32_rcpph128_mask((__v8hf)__A, (__v8hf)__W,
399  (__mmask8)__U);
400 }
401 
402 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rcp_ph(__mmask8 __U,
403  __m128h __A) {
404  return (__m128h)__builtin_ia32_rcpph128_mask(
405  (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
406 }
407 
408 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rsqrt_ph(__m256h __A) {
409  return (__m256h)__builtin_ia32_rsqrtph256_mask(
410  (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1);
411 }
412 
413 static __inline__ __m256h __DEFAULT_FN_ATTRS256
414 _mm256_mask_rsqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) {
415  return (__m256h)__builtin_ia32_rsqrtph256_mask((__v16hf)__A, (__v16hf)__W,
416  (__mmask16)__U);
417 }
418 
419 static __inline__ __m256h __DEFAULT_FN_ATTRS256
420 _mm256_maskz_rsqrt_ph(__mmask16 __U, __m256h __A) {
421  return (__m256h)__builtin_ia32_rsqrtph256_mask(
422  (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
423 }
424 
425 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_ph(__m128h __A) {
426  return (__m128h)__builtin_ia32_rsqrtph128_mask(
427  (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
428 }
429 
430 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt_ph(__m128h __W,
431  __mmask8 __U,
432  __m128h __A) {
433  return (__m128h)__builtin_ia32_rsqrtph128_mask((__v8hf)__A, (__v8hf)__W,
434  (__mmask8)__U);
435 }
436 
437 static __inline__ __m128h __DEFAULT_FN_ATTRS128
438 _mm_maskz_rsqrt_ph(__mmask8 __U, __m128h __A) {
439  return (__m128h)__builtin_ia32_rsqrtph128_mask(
440  (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
441 }
442 
443 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_ph(__m128h __A) {
444  return (__m128h)__builtin_ia32_getexpph128_mask(
445  (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
446 }
447 
448 static __inline__ __m128h __DEFAULT_FN_ATTRS128
449 _mm_mask_getexp_ph(__m128h __W, __mmask8 __U, __m128h __A) {
450  return (__m128h)__builtin_ia32_getexpph128_mask((__v8hf)__A, (__v8hf)__W,
451  (__mmask8)__U);
452 }
453 
454 static __inline__ __m128h __DEFAULT_FN_ATTRS128
455 _mm_maskz_getexp_ph(__mmask8 __U, __m128h __A) {
456  return (__m128h)__builtin_ia32_getexpph128_mask(
457  (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
458 }
459 
460 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_getexp_ph(__m256h __A) {
461  return (__m256h)__builtin_ia32_getexpph256_mask(
462  (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1);
463 }
464 
465 static __inline__ __m256h __DEFAULT_FN_ATTRS256
466 _mm256_mask_getexp_ph(__m256h __W, __mmask16 __U, __m256h __A) {
467  return (__m256h)__builtin_ia32_getexpph256_mask((__v16hf)__A, (__v16hf)__W,
468  (__mmask16)__U);
469 }
470 
471 static __inline__ __m256h __DEFAULT_FN_ATTRS256
473  return (__m256h)__builtin_ia32_getexpph256_mask(
474  (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
475 }
476 
477 #define _mm_getmant_ph(A, B, C) \
478  ((__m128h)__builtin_ia32_getmantph128_mask( \
479  (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \
480  (__mmask8)-1))
481 
482 #define _mm_mask_getmant_ph(W, U, A, B, C) \
483  ((__m128h)__builtin_ia32_getmantph128_mask( \
484  (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)(__m128h)(W), \
485  (__mmask8)(U)))
486 
487 #define _mm_maskz_getmant_ph(U, A, B, C) \
488  ((__m128h)__builtin_ia32_getmantph128_mask( \
489  (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \
490  (__mmask8)(U)))
491 
492 #define _mm256_getmant_ph(A, B, C) \
493  ((__m256h)__builtin_ia32_getmantph256_mask( \
494  (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \
495  (__v16hf)_mm256_setzero_ph(), (__mmask16)-1))
496 
497 #define _mm256_mask_getmant_ph(W, U, A, B, C) \
498  ((__m256h)__builtin_ia32_getmantph256_mask( \
499  (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), (__v16hf)(__m256h)(W), \
500  (__mmask16)(U)))
501 
502 #define _mm256_maskz_getmant_ph(U, A, B, C) \
503  ((__m256h)__builtin_ia32_getmantph256_mask( \
504  (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \
505  (__v16hf)_mm256_setzero_ph(), (__mmask16)(U)))
506 
507 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_ph(__m128h __A,
508  __m128h __B) {
509  return (__m128h)__builtin_ia32_scalefph128_mask(
510  (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
511 }
512 
513 static __inline__ __m128h __DEFAULT_FN_ATTRS128
514 _mm_mask_scalef_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
515  return (__m128h)__builtin_ia32_scalefph128_mask((__v8hf)__A, (__v8hf)__B,
516  (__v8hf)__W, (__mmask8)__U);
517 }
518 
519 static __inline__ __m128h __DEFAULT_FN_ATTRS128
520 _mm_maskz_scalef_ph(__mmask8 __U, __m128h __A, __m128h __B) {
521  return (__m128h)__builtin_ia32_scalefph128_mask(
522  (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
523 }
524 
525 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_scalef_ph(__m256h __A,
526  __m256h __B) {
527  return (__m256h)__builtin_ia32_scalefph256_mask(
528  (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1);
529 }
530 
531 static __inline__ __m256h __DEFAULT_FN_ATTRS256
532 _mm256_mask_scalef_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
533  return (__m256h)__builtin_ia32_scalefph256_mask((__v16hf)__A, (__v16hf)__B,
534  (__v16hf)__W, (__mmask16)__U);
535 }
536 
537 static __inline__ __m256h __DEFAULT_FN_ATTRS256
538 _mm256_maskz_scalef_ph(__mmask16 __U, __m256h __A, __m256h __B) {
539  return (__m256h)__builtin_ia32_scalefph256_mask(
540  (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
541 }
542 
543 #define _mm_roundscale_ph(A, imm) \
544  ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
545  (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \
546  (__mmask8)-1))
547 
548 #define _mm_mask_roundscale_ph(W, U, A, imm) \
549  ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
550  (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U)))
551 
552 #define _mm_maskz_roundscale_ph(U, A, imm) \
553  ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
554  (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \
555  (__mmask8)(U)))
556 
557 #define _mm256_roundscale_ph(A, imm) \
558  ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
559  (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \
560  (__mmask16)-1))
561 
562 #define _mm256_mask_roundscale_ph(W, U, A, imm) \
563  ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
564  (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)(__m256h)(W), \
565  (__mmask16)(U)))
566 
567 #define _mm256_maskz_roundscale_ph(U, A, imm) \
568  ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
569  (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \
570  (__mmask16)(U)))
571 
572 #define _mm_reduce_ph(A, imm) \
573  ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \
574  (__v8hf)_mm_setzero_ph(), \
575  (__mmask8)-1))
576 
577 #define _mm_mask_reduce_ph(W, U, A, imm) \
578  ((__m128h)__builtin_ia32_reduceph128_mask( \
579  (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U)))
580 
581 #define _mm_maskz_reduce_ph(U, A, imm) \
582  ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \
583  (__v8hf)_mm_setzero_ph(), \
584  (__mmask8)(U)))
585 
586 #define _mm256_reduce_ph(A, imm) \
587  ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
588  (__v16hf)_mm256_setzero_ph(), \
589  (__mmask16)-1))
590 
591 #define _mm256_mask_reduce_ph(W, U, A, imm) \
592  ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
593  (__v16hf)(__m256h)(W), \
594  (__mmask16)(U)))
595 
596 #define _mm256_maskz_reduce_ph(U, A, imm) \
597  ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
598  (__v16hf)_mm256_setzero_ph(), \
599  (__mmask16)(U)))
600 
601 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_ph(__m128h __a) {
602  return __builtin_ia32_sqrtph((__v8hf)__a);
603 }
604 
605 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ph(__m128h __W,
606  __mmask8 __U,
607  __m128h __A) {
608  return (__m128h)__builtin_ia32_selectph_128(
609  (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)__W);
610 }
611 
612 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ph(__mmask8 __U,
613  __m128h __A) {
614  return (__m128h)__builtin_ia32_selectph_128(
615  (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)_mm_setzero_ph());
616 }
617 
618 static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_sqrt_ph(__m256h __a) {
619  return (__m256h)__builtin_ia32_sqrtph256((__v16hf)__a);
620 }
621 
622 static __inline__ __m256h __DEFAULT_FN_ATTRS256
623 _mm256_mask_sqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) {
624  return (__m256h)__builtin_ia32_selectph_256(
625  (__mmask16)__U, (__v16hf)_mm256_sqrt_ph(__A), (__v16hf)__W);
626 }
627 
628 static __inline__ __m256h __DEFAULT_FN_ATTRS256
629 _mm256_maskz_sqrt_ph(__mmask16 __U, __m256h __A) {
630  return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
631  (__v16hf)_mm256_sqrt_ph(__A),
632  (__v16hf)_mm256_setzero_ph());
633 }
634 
635 #define _mm_mask_fpclass_ph_mask(U, A, imm) \
636  ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \
637  (int)(imm), (__mmask8)(U)))
638 
639 #define _mm_fpclass_ph_mask(A, imm) \
640  ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \
641  (int)(imm), (__mmask8)-1))
642 
643 #define _mm256_mask_fpclass_ph_mask(U, A, imm) \
644  ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \
645  (int)(imm), (__mmask16)(U)))
646 
647 #define _mm256_fpclass_ph_mask(A, imm) \
648  ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \
649  (int)(imm), (__mmask16)-1))
650 
651 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtpd_ph(__m128d __A) {
652  return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
653  (__v2df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
654 }
655 
656 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ph(__m128h __W,
657  __mmask8 __U,
658  __m128d __A) {
659  return (__m128h)__builtin_ia32_vcvtpd2ph128_mask((__v2df)__A, (__v8hf)__W,
660  (__mmask8)__U);
661 }
662 
663 static __inline__ __m128h __DEFAULT_FN_ATTRS128
664 _mm_maskz_cvtpd_ph(__mmask8 __U, __m128d __A) {
665  return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
666  (__v2df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
667 }
668 
669 static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtpd_ph(__m256d __A) {
670  return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
671  (__v4df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
672 }
673 
674 static __inline__ __m128h __DEFAULT_FN_ATTRS256
675 _mm256_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m256d __A) {
676  return (__m128h)__builtin_ia32_vcvtpd2ph256_mask((__v4df)__A, (__v8hf)__W,
677  (__mmask8)__U);
678 }
679 
680 static __inline__ __m128h __DEFAULT_FN_ATTRS256
681 _mm256_maskz_cvtpd_ph(__mmask8 __U, __m256d __A) {
682  return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
683  (__v4df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
684 }
685 
686 static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtph_pd(__m128h __A) {
687  return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
688  (__v8hf)__A, (__v2df)_mm_undefined_pd(), (__mmask8)-1);
689 }
690 
691 static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_pd(__m128d __W,
692  __mmask8 __U,
693  __m128h __A) {
694  return (__m128d)__builtin_ia32_vcvtph2pd128_mask((__v8hf)__A, (__v2df)__W,
695  (__mmask8)__U);
696 }
697 
698 static __inline__ __m128d __DEFAULT_FN_ATTRS128
699 _mm_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
700  return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
701  (__v8hf)__A, (__v2df)_mm_setzero_pd(), (__mmask8)__U);
702 }
703 
704 static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtph_pd(__m128h __A) {
705  return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
706  (__v8hf)__A, (__v4df)_mm256_undefined_pd(), (__mmask8)-1);
707 }
708 
709 static __inline__ __m256d __DEFAULT_FN_ATTRS256
710 _mm256_mask_cvtph_pd(__m256d __W, __mmask8 __U, __m128h __A) {
711  return (__m256d)__builtin_ia32_vcvtph2pd256_mask((__v8hf)__A, (__v4df)__W,
712  (__mmask8)__U);
713 }
714 
715 static __inline__ __m256d __DEFAULT_FN_ATTRS256
716 _mm256_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
717  return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
718  (__v8hf)__A, (__v4df)_mm256_setzero_pd(), (__mmask8)__U);
719 }
720 
721 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi16(__m128h __A) {
722  return (__m128i)__builtin_ia32_vcvtph2w128_mask(
723  (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
724 }
725 
726 static __inline__ __m128i __DEFAULT_FN_ATTRS128
727 _mm_mask_cvtph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
728  return (__m128i)__builtin_ia32_vcvtph2w128_mask((__v8hf)__A, (__v8hi)__W,
729  (__mmask8)__U);
730 }
731 
732 static __inline__ __m128i __DEFAULT_FN_ATTRS128
733 _mm_maskz_cvtph_epi16(__mmask8 __U, __m128h __A) {
734  return (__m128i)__builtin_ia32_vcvtph2w128_mask(
735  (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
736 }
737 
738 static __inline__ __m256i __DEFAULT_FN_ATTRS256
739 _mm256_cvtph_epi16(__m256h __A) {
740  return (__m256i)__builtin_ia32_vcvtph2w256_mask(
741  (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
742 }
743 
744 static __inline__ __m256i __DEFAULT_FN_ATTRS256
745 _mm256_mask_cvtph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
746  return (__m256i)__builtin_ia32_vcvtph2w256_mask((__v16hf)__A, (__v16hi)__W,
747  (__mmask16)__U);
748 }
749 
750 static __inline__ __m256i __DEFAULT_FN_ATTRS256
752  return (__m256i)__builtin_ia32_vcvtph2w256_mask(
753  (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
754 }
755 
756 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi16(__m128h __A) {
757  return (__m128i)__builtin_ia32_vcvttph2w128_mask(
758  (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
759 }
760 
761 static __inline__ __m128i __DEFAULT_FN_ATTRS128
762 _mm_mask_cvttph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
763  return (__m128i)__builtin_ia32_vcvttph2w128_mask((__v8hf)__A, (__v8hi)__W,
764  (__mmask8)__U);
765 }
766 
767 static __inline__ __m128i __DEFAULT_FN_ATTRS128
768 _mm_maskz_cvttph_epi16(__mmask8 __U, __m128h __A) {
769  return (__m128i)__builtin_ia32_vcvttph2w128_mask(
770  (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
771 }
772 
773 static __inline__ __m256i __DEFAULT_FN_ATTRS256
774 _mm256_cvttph_epi16(__m256h __A) {
775  return (__m256i)__builtin_ia32_vcvttph2w256_mask(
776  (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
777 }
778 
779 static __inline__ __m256i __DEFAULT_FN_ATTRS256
780 _mm256_mask_cvttph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
781  return (__m256i)__builtin_ia32_vcvttph2w256_mask((__v16hf)__A, (__v16hi)__W,
782  (__mmask16)__U);
783 }
784 
785 static __inline__ __m256i __DEFAULT_FN_ATTRS256
787  return (__m256i)__builtin_ia32_vcvttph2w256_mask(
788  (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
789 }
790 
791 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi16_ph(__m128i __A) {
792  return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf);
793 }
794 
795 static __inline__ __m128h __DEFAULT_FN_ATTRS128
796 _mm_mask_cvtepi16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
797  return (__m128h)__builtin_ia32_selectph_128(
798  (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W);
799 }
800 
801 static __inline__ __m128h __DEFAULT_FN_ATTRS128
802 _mm_maskz_cvtepi16_ph(__mmask8 __U, __m128i __A) {
803  return (__m128h)__builtin_ia32_selectph_128(
804  (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph());
805 }
806 
807 static __inline__ __m256h __DEFAULT_FN_ATTRS256
808 _mm256_cvtepi16_ph(__m256i __A) {
809  return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf);
810 }
811 
812 static __inline__ __m256h __DEFAULT_FN_ATTRS256
813 _mm256_mask_cvtepi16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
814  return (__m256h)__builtin_ia32_selectph_256(
815  (__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W);
816 }
817 
818 static __inline__ __m256h __DEFAULT_FN_ATTRS256
820  return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
821  (__v16hf)_mm256_cvtepi16_ph(__A),
822  (__v16hf)_mm256_setzero_ph());
823 }
824 
825 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu16(__m128h __A) {
826  return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
827  (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
828 }
829 
830 static __inline__ __m128i __DEFAULT_FN_ATTRS128
831 _mm_mask_cvtph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
832  return (__m128i)__builtin_ia32_vcvtph2uw128_mask((__v8hf)__A, (__v8hu)__W,
833  (__mmask8)__U);
834 }
835 
836 static __inline__ __m128i __DEFAULT_FN_ATTRS128
837 _mm_maskz_cvtph_epu16(__mmask8 __U, __m128h __A) {
838  return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
839  (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
840 }
841 
842 static __inline__ __m256i __DEFAULT_FN_ATTRS256
843 _mm256_cvtph_epu16(__m256h __A) {
844  return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
845  (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
846 }
847 
848 static __inline__ __m256i __DEFAULT_FN_ATTRS256
849 _mm256_mask_cvtph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
850  return (__m256i)__builtin_ia32_vcvtph2uw256_mask((__v16hf)__A, (__v16hu)__W,
851  (__mmask16)__U);
852 }
853 
854 static __inline__ __m256i __DEFAULT_FN_ATTRS256
856  return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
857  (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
858 }
859 
860 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu16(__m128h __A) {
861  return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
862  (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
863 }
864 
865 static __inline__ __m128i __DEFAULT_FN_ATTRS128
866 _mm_mask_cvttph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
867  return (__m128i)__builtin_ia32_vcvttph2uw128_mask((__v8hf)__A, (__v8hu)__W,
868  (__mmask8)__U);
869 }
870 
871 static __inline__ __m128i __DEFAULT_FN_ATTRS128
872 _mm_maskz_cvttph_epu16(__mmask8 __U, __m128h __A) {
873  return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
874  (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
875 }
876 
877 static __inline__ __m256i __DEFAULT_FN_ATTRS256
878 _mm256_cvttph_epu16(__m256h __A) {
879  return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
880  (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
881 }
882 
883 static __inline__ __m256i __DEFAULT_FN_ATTRS256
884 _mm256_mask_cvttph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
885  return (__m256i)__builtin_ia32_vcvttph2uw256_mask((__v16hf)__A, (__v16hu)__W,
886  (__mmask16)__U);
887 }
888 
889 static __inline__ __m256i __DEFAULT_FN_ATTRS256
891  return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
892  (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
893 }
894 
895 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu16_ph(__m128i __A) {
896  return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf);
897 }
898 
899 static __inline__ __m128h __DEFAULT_FN_ATTRS128
900 _mm_mask_cvtepu16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
901  return (__m128h)__builtin_ia32_selectph_128(
902  (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W);
903 }
904 
905 static __inline__ __m128h __DEFAULT_FN_ATTRS128
906 _mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A) {
907  return (__m128h)__builtin_ia32_selectph_128(
908  (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph());
909 }
910 
911 static __inline__ __m256h __DEFAULT_FN_ATTRS256
912 _mm256_cvtepu16_ph(__m256i __A) {
913  return (__m256h) __builtin_convertvector((__v16hu)__A, __v16hf);
914 }
915 
916 static __inline__ __m256h __DEFAULT_FN_ATTRS256
917 _mm256_mask_cvtepu16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
918  return (__m256h)__builtin_ia32_selectph_256(
919  (__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)__W);
920 }
921 
922 static __inline__ __m256h __DEFAULT_FN_ATTRS256
924  return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
925  (__v16hf)_mm256_cvtepu16_ph(__A),
926  (__v16hf)_mm256_setzero_ph());
927 }
928 
929 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi32(__m128h __A) {
930  return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
931  (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
932 }
933 
934 static __inline__ __m128i __DEFAULT_FN_ATTRS128
935 _mm_mask_cvtph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
936  return (__m128i)__builtin_ia32_vcvtph2dq128_mask((__v8hf)__A, (__v4si)__W,
937  (__mmask8)__U);
938 }
939 
940 static __inline__ __m128i __DEFAULT_FN_ATTRS128
941 _mm_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) {
942  return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
943  (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
944 }
945 
946 static __inline__ __m256i __DEFAULT_FN_ATTRS256
947 _mm256_cvtph_epi32(__m128h __A) {
948  return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
949  (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
950 }
951 
952 static __inline__ __m256i __DEFAULT_FN_ATTRS256
953 _mm256_mask_cvtph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
954  return (__m256i)__builtin_ia32_vcvtph2dq256_mask((__v8hf)__A, (__v8si)__W,
955  (__mmask8)__U);
956 }
957 
958 static __inline__ __m256i __DEFAULT_FN_ATTRS256
960  return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
961  (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
962 }
963 
964 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu32(__m128h __A) {
965  return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
966  (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
967 }
968 
969 static __inline__ __m128i __DEFAULT_FN_ATTRS128
970 _mm_mask_cvtph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
971  return (__m128i)__builtin_ia32_vcvtph2udq128_mask((__v8hf)__A, (__v4su)__W,
972  (__mmask8)__U);
973 }
974 
975 static __inline__ __m128i __DEFAULT_FN_ATTRS128
976 _mm_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) {
977  return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
978  (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
979 }
980 
981 static __inline__ __m256i __DEFAULT_FN_ATTRS256
982 _mm256_cvtph_epu32(__m128h __A) {
983  return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
984  (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
985 }
986 
987 static __inline__ __m256i __DEFAULT_FN_ATTRS256
988 _mm256_mask_cvtph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
989  return (__m256i)__builtin_ia32_vcvtph2udq256_mask((__v8hf)__A, (__v8su)__W,
990  (__mmask8)__U);
991 }
992 
993 static __inline__ __m256i __DEFAULT_FN_ATTRS256
995  return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
996  (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
997 }
998 
999 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi32_ph(__m128i __A) {
1000  return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
1001  (__v4si)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1002 }
1003 
1004 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1005 _mm_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1006  return (__m128h)__builtin_ia32_vcvtdq2ph128_mask((__v4si)__A, (__v8hf)__W,
1007  (__mmask8)__U);
1008 }
1009 
1010 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1011 _mm_maskz_cvtepi32_ph(__mmask8 __U, __m128i __A) {
1012  return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
1013  (__v4si)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1014 }
1015 
1016 static __inline__ __m128h __DEFAULT_FN_ATTRS256
1017 _mm256_cvtepi32_ph(__m256i __A) {
1018  return (__m128h) __builtin_convertvector((__v8si)__A, __v8hf);
1019 }
1020 
1021 static __inline__ __m128h __DEFAULT_FN_ATTRS256
1022 _mm256_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1023  return (__m128h)__builtin_ia32_selectph_128(
1024  (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)__W);
1025 }
1026 
1027 static __inline__ __m128h __DEFAULT_FN_ATTRS256
1029  return (__m128h)__builtin_ia32_selectph_128(
1030  (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)_mm_setzero_ph());
1031 }
1032 
1033 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ph(__m128i __A) {
1034  return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
1035  (__v4su)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1036 }
1037 
1038 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1039 _mm_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1040  return (__m128h)__builtin_ia32_vcvtudq2ph128_mask((__v4su)__A, (__v8hf)__W,
1041  (__mmask8)__U);
1042 }
1043 
1044 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1045 _mm_maskz_cvtepu32_ph(__mmask8 __U, __m128i __A) {
1046  return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
1047  (__v4su)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1048 }
1049 
1050 static __inline__ __m128h __DEFAULT_FN_ATTRS256
1051 _mm256_cvtepu32_ph(__m256i __A) {
1052  return (__m128h) __builtin_convertvector((__v8su)__A, __v8hf);
1053 }
1054 
1055 static __inline__ __m128h __DEFAULT_FN_ATTRS256
1056 _mm256_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1057  return (__m128h)__builtin_ia32_selectph_128(
1058  (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)__W);
1059 }
1060 
1061 static __inline__ __m128h __DEFAULT_FN_ATTRS256
1063  return (__m128h)__builtin_ia32_selectph_128(
1064  (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)_mm_setzero_ph());
1065 }
1066 
1067 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi32(__m128h __A) {
1068  return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
1069  (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
1070 }
1071 
1072 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1073 _mm_mask_cvttph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
1074  return (__m128i)__builtin_ia32_vcvttph2dq128_mask((__v8hf)__A, (__v4si)__W,
1075  (__mmask8)__U);
1076 }
1077 
1078 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1080  return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
1081  (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
1082 }
1083 
1084 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1085 _mm256_cvttph_epi32(__m128h __A) {
1086  return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
1087  (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
1088 }
1089 
1090 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1091 _mm256_mask_cvttph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
1092  return (__m256i)__builtin_ia32_vcvttph2dq256_mask((__v8hf)__A, (__v8si)__W,
1093  (__mmask8)__U);
1094 }
1095 
1096 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1098  return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
1099  (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
1100 }
1101 
1102 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu32(__m128h __A) {
1103  return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
1104  (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
1105 }
1106 
1107 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1108 _mm_mask_cvttph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
1109  return (__m128i)__builtin_ia32_vcvttph2udq128_mask((__v8hf)__A, (__v4su)__W,
1110  (__mmask8)__U);
1111 }
1112 
1113 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1115  return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
1116  (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
1117 }
1118 
1119 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1120 _mm256_cvttph_epu32(__m128h __A) {
1121  return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
1122  (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
1123 }
1124 
1125 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1126 _mm256_mask_cvttph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
1127  return (__m256i)__builtin_ia32_vcvttph2udq256_mask((__v8hf)__A, (__v8su)__W,
1128  (__mmask8)__U);
1129 }
1130 
1131 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1133  return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
1134  (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
1135 }
1136 
1137 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ph(__m128i __A) {
1138  return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
1139  (__v2di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1140 }
1141 
1142 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1143 _mm_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1144  return (__m128h)__builtin_ia32_vcvtqq2ph128_mask((__v2di)__A, (__v8hf)__W,
1145  (__mmask8)__U);
1146 }
1147 
1148 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1149 _mm_maskz_cvtepi64_ph(__mmask8 __U, __m128i __A) {
1150  return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
1151  (__v2di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1152 }
1153 
1154 static __inline__ __m128h __DEFAULT_FN_ATTRS256
1155 _mm256_cvtepi64_ph(__m256i __A) {
1156  return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
1157  (__v4di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1158 }
1159 
1160 static __inline__ __m128h __DEFAULT_FN_ATTRS256
1161 _mm256_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1162  return (__m128h)__builtin_ia32_vcvtqq2ph256_mask((__v4di)__A, (__v8hf)__W,
1163  (__mmask8)__U);
1164 }
1165 
1166 static __inline__ __m128h __DEFAULT_FN_ATTRS256
1168  return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
1169  (__v4di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1170 }
1171 
1172 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi64(__m128h __A) {
1173  return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
1174  (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
1175 }
1176 
1177 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1178 _mm_mask_cvtph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
1179  return (__m128i)__builtin_ia32_vcvtph2qq128_mask((__v8hf)__A, (__v2di)__W,
1180  (__mmask8)__U);
1181 }
1182 
1183 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1184 _mm_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
1185  return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
1186  (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
1187 }
1188 
1189 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1190 _mm256_cvtph_epi64(__m128h __A) {
1191  return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
1192  (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
1193 }
1194 
1195 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1196 _mm256_mask_cvtph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
1197  return (__m256i)__builtin_ia32_vcvtph2qq256_mask((__v8hf)__A, (__v4di)__W,
1198  (__mmask8)__U);
1199 }
1200 
1201 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1203  return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
1204  (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
1205 }
1206 
1207 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ph(__m128i __A) {
1208  return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
1209  (__v2du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1210 }
1211 
1212 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1213 _mm_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1214  return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask((__v2du)__A, (__v8hf)__W,
1215  (__mmask8)__U);
1216 }
1217 
1218 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1219 _mm_maskz_cvtepu64_ph(__mmask8 __U, __m128i __A) {
1220  return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
1221  (__v2du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1222 }
1223 
1224 static __inline__ __m128h __DEFAULT_FN_ATTRS256
1225 _mm256_cvtepu64_ph(__m256i __A) {
1226  return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
1227  (__v4du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1228 }
1229 
1230 static __inline__ __m128h __DEFAULT_FN_ATTRS256
1231 _mm256_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1232  return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask((__v4du)__A, (__v8hf)__W,
1233  (__mmask8)__U);
1234 }
1235 
1236 static __inline__ __m128h __DEFAULT_FN_ATTRS256
1238  return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
1239  (__v4du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1240 }
1241 
1242 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu64(__m128h __A) {
1243  return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
1244  (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
1245 }
1246 
1247 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1248 _mm_mask_cvtph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
1249  return (__m128i)__builtin_ia32_vcvtph2uqq128_mask((__v8hf)__A, (__v2du)__W,
1250  (__mmask8)__U);
1251 }
1252 
1253 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1254 _mm_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
1255  return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
1256  (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
1257 }
1258 
1259 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1260 _mm256_cvtph_epu64(__m128h __A) {
1261  return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
1262  (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
1263 }
1264 
1265 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1266 _mm256_mask_cvtph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
1267  return (__m256i)__builtin_ia32_vcvtph2uqq256_mask((__v8hf)__A, (__v4du)__W,
1268  (__mmask8)__U);
1269 }
1270 
1271 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1273  return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
1274  (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
1275 }
1276 
1277 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi64(__m128h __A) {
1278  return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
1279  (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
1280 }
1281 
1282 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1283 _mm_mask_cvttph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
1284  return (__m128i)__builtin_ia32_vcvttph2qq128_mask((__v8hf)__A, (__v2di)__W,
1285  (__mmask8)__U);
1286 }
1287 
1288 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1290  return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
1291  (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
1292 }
1293 
1294 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1295 _mm256_cvttph_epi64(__m128h __A) {
1296  return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
1297  (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
1298 }
1299 
1300 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1301 _mm256_mask_cvttph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
1302  return (__m256i)__builtin_ia32_vcvttph2qq256_mask((__v8hf)__A, (__v4di)__W,
1303  (__mmask8)__U);
1304 }
1305 
1306 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1308  return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
1309  (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
1310 }
1311 
1312 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu64(__m128h __A) {
1313  return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
1314  (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
1315 }
1316 
1317 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1318 _mm_mask_cvttph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
1319  return (__m128i)__builtin_ia32_vcvttph2uqq128_mask((__v8hf)__A, (__v2du)__W,
1320  (__mmask8)__U);
1321 }
1322 
1323 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1325  return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
1326  (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
1327 }
1328 
1329 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1330 _mm256_cvttph_epu64(__m128h __A) {
1331  return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
1332  (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
1333 }
1334 
1335 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1336 _mm256_mask_cvttph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
1337  return (__m256i)__builtin_ia32_vcvttph2uqq256_mask((__v8hf)__A, (__v4du)__W,
1338  (__mmask8)__U);
1339 }
1340 
1341 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1343  return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
1344  (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
1345 }
1346 
1347 static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtxph_ps(__m128h __A) {
1348  return (__m128)__builtin_ia32_vcvtph2psx128_mask(
1349  (__v8hf)__A, (__v4sf)_mm_undefined_ps(), (__mmask8)-1);
1350 }
1351 
1352 static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtxph_ps(__m128 __W,
1353  __mmask8 __U,
1354  __m128h __A) {
1355  return (__m128)__builtin_ia32_vcvtph2psx128_mask((__v8hf)__A, (__v4sf)__W,
1356  (__mmask8)__U);
1357 }
1358 
1359 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1360 _mm_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) {
1361  return (__m128)__builtin_ia32_vcvtph2psx128_mask(
1362  (__v8hf)__A, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);
1363 }
1364 
1365 static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtxph_ps(__m128h __A) {
1366  return (__m256)__builtin_ia32_vcvtph2psx256_mask(
1367  (__v8hf)__A, (__v8sf)_mm256_undefined_ps(), (__mmask8)-1);
1368 }
1369 
1370 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1371 _mm256_mask_cvtxph_ps(__m256 __W, __mmask8 __U, __m128h __A) {
1372  return (__m256)__builtin_ia32_vcvtph2psx256_mask((__v8hf)__A, (__v8sf)__W,
1373  (__mmask8)__U);
1374 }
1375 
1376 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1378  return (__m256)__builtin_ia32_vcvtph2psx256_mask(
1379  (__v8hf)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)__U);
1380 }
1381 
1382 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtxps_ph(__m128 __A) {
1383  return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
1384  (__v4sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1385 }
1386 
1387 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtxps_ph(__m128h __W,
1388  __mmask8 __U,
1389  __m128 __A) {
1390  return (__m128h)__builtin_ia32_vcvtps2phx128_mask((__v4sf)__A, (__v8hf)__W,
1391  (__mmask8)__U);
1392 }
1393 
1394 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1395 _mm_maskz_cvtxps_ph(__mmask8 __U, __m128 __A) {
1396  return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
1397  (__v4sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1398 }
1399 
1400 static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtxps_ph(__m256 __A) {
1401  return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
1402  (__v8sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1403 }
1404 
1405 static __inline__ __m128h __DEFAULT_FN_ATTRS256
1406 _mm256_mask_cvtxps_ph(__m128h __W, __mmask8 __U, __m256 __A) {
1407  return (__m128h)__builtin_ia32_vcvtps2phx256_mask((__v8sf)__A, (__v8hf)__W,
1408  (__mmask8)__U);
1409 }
1410 
1411 static __inline__ __m128h __DEFAULT_FN_ATTRS256
1413  return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
1414  (__v8sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1415 }
1416 
1417 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_ph(__m128h __A,
1418  __m128h __B,
1419  __m128h __C) {
1420  return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B,
1421  (__v8hf)__C);
1422 }
1423 
1424 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A,
1425  __mmask8 __U,
1426  __m128h __B,
1427  __m128h __C) {
1428  return (__m128h)__builtin_ia32_selectph_128(
1429  (__mmask8)__U,
1430  __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1431  (__v8hf)__A);
1432 }
1433 
1434 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1435 _mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1436  return (__m128h)__builtin_ia32_selectph_128(
1437  (__mmask8)__U,
1438  __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1439  (__v8hf)__C);
1440 }
1441 
1442 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1443 _mm_maskz_fmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1444  return (__m128h)__builtin_ia32_selectph_128(
1445  (__mmask8)__U,
1446  __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1447  (__v8hf)_mm_setzero_ph());
1448 }
1449 
1450 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_ph(__m128h __A,
1451  __m128h __B,
1452  __m128h __C) {
1453  return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B,
1454  -(__v8hf)__C);
1455 }
1456 
1457 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ph(__m128h __A,
1458  __mmask8 __U,
1459  __m128h __B,
1460  __m128h __C) {
1461  return (__m128h)__builtin_ia32_selectph_128(
1462  (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1463  (__v8hf)__A);
1464 }
1465 
1466 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1467 _mm_maskz_fmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1468  return (__m128h)__builtin_ia32_selectph_128(
1469  (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1470  (__v8hf)_mm_setzero_ph());
1471 }
1472 
1473 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1474 _mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1475  return (__m128h)__builtin_ia32_selectph_128(
1476  (__mmask8)__U,
1477  __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1478  (__v8hf)__C);
1479 }
1480 
1481 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1482 _mm_maskz_fnmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1483  return (__m128h)__builtin_ia32_selectph_128(
1484  (__mmask8)__U,
1485  __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1486  (__v8hf)_mm_setzero_ph());
1487 }
1488 
1489 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1490 _mm_maskz_fnmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1491  return (__m128h)__builtin_ia32_selectph_128(
1492  (__mmask8)__U,
1493  __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1494  (__v8hf)_mm_setzero_ph());
1495 }
1496 
1497 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_ph(__m256h __A,
1498  __m256h __B,
1499  __m256h __C) {
1500  return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B,
1501  (__v16hf)__C);
1502 }
1503 
1504 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1505 _mm256_mask_fmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1506  return (__m256h)__builtin_ia32_selectph_256(
1507  (__mmask16)__U,
1508  __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1509  (__v16hf)__A);
1510 }
1511 
1512 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1513 _mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1514  return (__m256h)__builtin_ia32_selectph_256(
1515  (__mmask16)__U,
1516  __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1517  (__v16hf)__C);
1518 }
1519 
1520 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1521 _mm256_maskz_fmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1522  return (__m256h)__builtin_ia32_selectph_256(
1523  (__mmask16)__U,
1524  __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1525  (__v16hf)_mm256_setzero_ph());
1526 }
1527 
1528 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsub_ph(__m256h __A,
1529  __m256h __B,
1530  __m256h __C) {
1531  return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B,
1532  -(__v16hf)__C);
1533 }
1534 
1535 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1536 _mm256_mask_fmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1537  return (__m256h)__builtin_ia32_selectph_256(
1538  (__mmask16)__U,
1539  __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1540  (__v16hf)__A);
1541 }
1542 
1543 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1544 _mm256_maskz_fmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1545  return (__m256h)__builtin_ia32_selectph_256(
1546  (__mmask16)__U,
1547  __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1548  (__v16hf)_mm256_setzero_ph());
1549 }
1550 
1551 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1552 _mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1553  return (__m256h)__builtin_ia32_selectph_256(
1554  (__mmask16)__U,
1555  __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1556  (__v16hf)__C);
1557 }
1558 
1559 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1560 _mm256_maskz_fnmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1561  return (__m256h)__builtin_ia32_selectph_256(
1562  (__mmask16)__U,
1563  __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1564  (__v16hf)_mm256_setzero_ph());
1565 }
1566 
1567 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1568 _mm256_maskz_fnmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1569  return (__m256h)__builtin_ia32_selectph_256(
1570  (__mmask16)__U,
1571  __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1572  (__v16hf)_mm256_setzero_ph());
1573 }
1574 
1575 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmaddsub_ph(__m128h __A,
1576  __m128h __B,
1577  __m128h __C) {
1578  return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
1579  (__v8hf)__C);
1580 }
1581 
1582 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1583 _mm_mask_fmaddsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1584  return (__m128h)__builtin_ia32_selectph_128(
1585  (__mmask8)__U,
1586  __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1587  (__v8hf)__A);
1588 }
1589 
1590 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1591 _mm_mask3_fmaddsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1592  return (__m128h)__builtin_ia32_selectph_128(
1593  (__mmask8)__U,
1594  __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1595  (__v8hf)__C);
1596 }
1597 
1598 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1599 _mm_maskz_fmaddsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1600  return (__m128h)__builtin_ia32_selectph_128(
1601  (__mmask8)__U,
1602  __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1603  (__v8hf)_mm_setzero_ph());
1604 }
1605 
1606 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsubadd_ph(__m128h __A,
1607  __m128h __B,
1608  __m128h __C) {
1609  return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
1610  -(__v8hf)__C);
1611 }
1612 
1613 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1614 _mm_mask_fmsubadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1615  return (__m128h)__builtin_ia32_selectph_128(
1616  (__mmask8)__U,
1617  __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1618  (__v8hf)__A);
1619 }
1620 
1621 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1622 _mm_maskz_fmsubadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1623  return (__m128h)__builtin_ia32_selectph_128(
1624  (__mmask8)__U,
1625  __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1626  (__v8hf)_mm_setzero_ph());
1627 }
1628 
1629 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1630 _mm256_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C) {
1631  return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
1632  (__v16hf)__C);
1633 }
1634 
1635 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1636 _mm256_mask_fmaddsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1637  return (__m256h)__builtin_ia32_selectph_256(
1638  (__mmask16)__U,
1639  __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1640  (__v16hf)__A);
1641 }
1642 
1643 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1644 _mm256_mask3_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1645  return (__m256h)__builtin_ia32_selectph_256(
1646  (__mmask16)__U,
1647  __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1648  (__v16hf)__C);
1649 }
1650 
1651 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1652 _mm256_maskz_fmaddsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1653  return (__m256h)__builtin_ia32_selectph_256(
1654  (__mmask16)__U,
1655  __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1656  (__v16hf)_mm256_setzero_ph());
1657 }
1658 
1659 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1660 _mm256_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C) {
1661  return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
1662  -(__v16hf)__C);
1663 }
1664 
1665 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1666 _mm256_mask_fmsubadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1667  return (__m256h)__builtin_ia32_selectph_256(
1668  (__mmask16)__U,
1669  __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1670  (__v16hf)__A);
1671 }
1672 
1673 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1674 _mm256_maskz_fmsubadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1675  return (__m256h)__builtin_ia32_selectph_256(
1676  (__mmask16)__U,
1677  __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1678  (__v16hf)_mm256_setzero_ph());
1679 }
1680 
1681 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1682 _mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1683  return (__m128h)__builtin_ia32_selectph_128(
1684  (__mmask8)__U,
1685  __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1686  (__v8hf)__C);
1687 }
1688 
1689 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1690 _mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1691  return (__m256h)__builtin_ia32_selectph_256(
1692  (__mmask16)__U,
1693  __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1694  (__v16hf)__C);
1695 }
1696 
1697 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1698 _mm_mask3_fmsubadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1699  return (__m128h)__builtin_ia32_selectph_128(
1700  (__mmask8)__U,
1701  __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1702  (__v8hf)__C);
1703 }
1704 
1705 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1706 _mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1707  return (__m256h)__builtin_ia32_selectph_256(
1708  (__mmask16)__U,
1709  __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1710  (__v16hf)__C);
1711 }
1712 
1713 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_ph(__m128h __A,
1714  __m128h __B,
1715  __m128h __C) {
1716  return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B,
1717  (__v8hf)__C);
1718 }
1719 
1720 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1721 _mm_mask_fnmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1722  return (__m128h)__builtin_ia32_selectph_128(
1723  (__mmask8)__U,
1724  __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C),
1725  (__v8hf)__A);
1726 }
1727 
1728 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ph(__m256h __A,
1729  __m256h __B,
1730  __m256h __C) {
1731  return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B,
1732  (__v16hf)__C);
1733 }
1734 
1735 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1736 _mm256_mask_fnmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1737  return (__m256h)__builtin_ia32_selectph_256(
1738  (__mmask16)__U,
1739  __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C),
1740  (__v16hf)__A);
1741 }
1742 
1743 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_ph(__m128h __A,
1744  __m128h __B,
1745  __m128h __C) {
1746  return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B,
1747  -(__v8hf)__C);
1748 }
1749 
1750 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1751 _mm_mask_fnmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1752  return (__m128h)__builtin_ia32_selectph_128(
1753  (__mmask8)__U,
1754  __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
1755  (__v8hf)__A);
1756 }
1757 
1758 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1759 _mm_mask3_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1760  return (__m128h)__builtin_ia32_selectph_128(
1761  (__mmask8)__U,
1762  __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
1763  (__v8hf)__C);
1764 }
1765 
1766 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ph(__m256h __A,
1767  __m256h __B,
1768  __m256h __C) {
1769  return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B,
1770  -(__v16hf)__C);
1771 }
1772 
1773 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1774 _mm256_mask_fnmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1775  return (__m256h)__builtin_ia32_selectph_256(
1776  (__mmask16)__U,
1777  __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
1778  (__v16hf)__A);
1779 }
1780 
1781 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1782 _mm256_mask3_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1783  return (__m256h)__builtin_ia32_selectph_256(
1784  (__mmask16)__U,
1785  __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
1786  (__v16hf)__C);
1787 }
1788 
1789 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_pch(__m128h __A,
1790  __m128h __B) {
1791  return (__m128h)__builtin_ia32_vfcmulcph128_mask(
1792  (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1);
1793 }
1794 
1795 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1796 _mm_mask_fcmul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
1797  return (__m128h)__builtin_ia32_vfcmulcph128_mask((__v4sf)__A, (__v4sf)__B,
1798  (__v4sf)__W, (__mmask8)__U);
1799 }
1800 
1801 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1802 _mm_maskz_fcmul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
1803  return (__m128h)__builtin_ia32_vfcmulcph128_mask(
1804  (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U);
1805 }
1806 
1807 static __inline__ __m256h __DEFAULT_FN_ATTRS128 _mm256_fcmul_pch(__m256h __A,
1808  __m256h __B) {
1809  return (__m256h)__builtin_ia32_vfcmulcph256_mask(
1810  (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1);
1811 }
1812 
1813 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1814 _mm256_mask_fcmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
1815  return (__m256h)__builtin_ia32_vfcmulcph256_mask((__v8sf)__A, (__v8sf)__B,
1816  (__v8sf)__W, (__mmask8)__U);
1817 }
1818 
1819 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1820 _mm256_maskz_fcmul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
1821  return (__m256h)__builtin_ia32_vfcmulcph256_mask(
1822  (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U);
1823 }
1824 
1825 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmadd_pch(__m128h __A,
1826  __m128h __B,
1827  __m128h __C) {
1828  return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1829  (__v4sf)__C, (__mmask8)-1);
1830 }
1831 
1832 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1833 _mm_mask_fcmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1834  return (__m128h)__builtin_ia32_selectps_128(
1835  __U,
1836  __builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)(__m128h)__B,
1837  (__v4sf)__C, (__mmask8)__U),
1838  (__v4sf)__A);
1839 }
1840 
1841 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1842 _mm_mask3_fcmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1843  return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1844  (__v4sf)__C, (__mmask8)__U);
1845 }
1846 
1847 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1848 _mm_maskz_fcmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1849  return (__m128h)__builtin_ia32_vfcmaddcph128_maskz(
1850  (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U);
1851 }
1852 
1853 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fcmadd_pch(__m256h __A,
1854  __m256h __B,
1855  __m256h __C) {
1856  return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1857  (__v8sf)__C, (__mmask8)-1);
1858 }
1859 
1860 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1861 _mm256_mask_fcmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) {
1862  return (__m256h)__builtin_ia32_selectps_256(
1863  __U,
1864  __builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C,
1865  (__mmask8)__U),
1866  (__v8sf)__A);
1867 }
1868 
1869 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1870 _mm256_mask3_fcmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) {
1871  return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1872  (__v8sf)__C, (__mmask8)__U);
1873 }
1874 
1875 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1876 _mm256_maskz_fcmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
1877  return (__m256h)__builtin_ia32_vfcmaddcph256_maskz(
1878  (__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U);
1879 }
1880 
1881 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmul_pch(__m128h __A,
1882  __m128h __B) {
1883  return (__m128h)__builtin_ia32_vfmulcph128_mask(
1884  (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1);
1885 }
1886 
1887 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmul_pch(__m128h __W,
1888  __mmask8 __U,
1889  __m128h __A,
1890  __m128h __B) {
1891  return (__m128h)__builtin_ia32_vfmulcph128_mask((__v4sf)__A, (__v4sf)__B,
1892  (__v4sf)__W, (__mmask8)__U);
1893 }
1894 
1895 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1896 _mm_maskz_fmul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
1897  return (__m128h)__builtin_ia32_vfmulcph128_mask(
1898  (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U);
1899 }
1900 
1901 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmul_pch(__m256h __A,
1902  __m256h __B) {
1903  return (__m256h)__builtin_ia32_vfmulcph256_mask(
1904  (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1);
1905 }
1906 
1907 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1908 _mm256_mask_fmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
1909  return (__m256h)__builtin_ia32_vfmulcph256_mask((__v8sf)__A, (__v8sf)__B,
1910  (__v8sf)__W, (__mmask8)__U);
1911 }
1912 
1913 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1914 _mm256_maskz_fmul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
1915  return (__m256h)__builtin_ia32_vfmulcph256_mask(
1916  (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U);
1917 }
1918 
1919 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_pch(__m128h __A,
1920  __m128h __B,
1921  __m128h __C) {
1922  return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1923  (__v4sf)__C, (__mmask8)-1);
1924 }
1925 
1926 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1927 _mm_mask_fmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1928  return (__m128h)__builtin_ia32_selectps_128(
1929  __U,
1930  __builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C,
1931  (__mmask8)__U),
1932  (__v4sf)__A);
1933 }
1934 
1935 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1936 _mm_mask3_fmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1937  return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1938  (__v4sf)__C, (__mmask8)__U);
1939 }
1940 
1941 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1942 _mm_maskz_fmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1943  return (__m128h)__builtin_ia32_vfmaddcph128_maskz((__v4sf)__A, (__v4sf)__B,
1944  (__v4sf)__C, (__mmask8)__U);
1945 }
1946 
1947 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_pch(__m256h __A,
1948  __m256h __B,
1949  __m256h __C) {
1950  return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1951  (__v8sf)__C, (__mmask8)-1);
1952 }
1953 
1954 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1955 _mm256_mask_fmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) {
1956  return (__m256h)__builtin_ia32_selectps_256(
1957  __U,
1958  __builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C,
1959  (__mmask8)__U),
1960  (__v8sf)__A);
1961 }
1962 
1963 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1964 _mm256_mask3_fmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) {
1965  return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1966  (__v8sf)__C, (__mmask8)__U);
1967 }
1968 
1969 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1970 _mm256_maskz_fmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
1971  return (__m256h)__builtin_ia32_vfmaddcph256_maskz((__v8sf)__A, (__v8sf)__B,
1972  (__v8sf)__C, (__mmask8)__U);
1973 }
1974 
1975 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U,
1976  __m128h __A,
1977  __m128h __W) {
1978  return (__m128h)__builtin_ia32_selectph_128((__mmask8)__U, (__v8hf)__W,
1979  (__v8hf)__A);
1980 }
1981 
1982 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1983 _mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) {
1984  return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)__W,
1985  (__v16hf)__A);
1986 }
1987 
1988 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1989 _mm_permutex2var_ph(__m128h __A, __m128i __I, __m128h __B) {
1990  return (__m128h)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
1991  (__v8hi)__B);
1992 }
1993 
1994 static __inline__ __m256h __DEFAULT_FN_ATTRS256
1995 _mm256_permutex2var_ph(__m256h __A, __m256i __I, __m256h __B) {
1996  return (__m256h)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
1997  (__v16hi)__B);
1998 }
1999 
2000 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2001 _mm_permutexvar_ph(__m128i __A, __m128h __B) {
2002  return (__m128h)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A);
2003 }
2004 
2005 static __inline__ __m256h __DEFAULT_FN_ATTRS256
2006 _mm256_permutexvar_ph(__m256i __A, __m256h __B) {
2007  return (__m256h)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A);
2008 }
2009 
2010 static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2011 _mm256_reduce_add_ph(__m256h __W) {
2012  return __builtin_ia32_reduce_fadd_ph256(-0.0f16, __W);
2013 }
2014 
2015 static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2016 _mm256_reduce_mul_ph(__m256h __W) {
2017  return __builtin_ia32_reduce_fmul_ph256(1.0f16, __W);
2018 }
2019 
2020 static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2021 _mm256_reduce_max_ph(__m256h __V) {
2022  return __builtin_ia32_reduce_fmax_ph256(__V);
2023 }
2024 
2025 static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2026 _mm256_reduce_min_ph(__m256h __V) {
2027  return __builtin_ia32_reduce_fmin_ph256(__V);
2028 }
2029 
2030 static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2031 _mm_reduce_add_ph(__m128h __W) {
2032  return __builtin_ia32_reduce_fadd_ph128(-0.0f16, __W);
2033 }
2034 
2035 static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2036 _mm_reduce_mul_ph(__m128h __W) {
2037  return __builtin_ia32_reduce_fmul_ph128(1.0f16, __W);
2038 }
2039 
2040 static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2041 _mm_reduce_max_ph(__m128h __V) {
2042  return __builtin_ia32_reduce_fmax_ph128(__V);
2043 }
2044 
2045 static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2046 _mm_reduce_min_ph(__m128h __V) {
2047  return __builtin_ia32_reduce_fmin_ph128(__V);
2048 }
2049 
2050 #undef __DEFAULT_FN_ATTRS128
2051 #undef __DEFAULT_FN_ATTRS256
2052 
2053 #endif
_mm_maskz_fmadd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1443
_mm_maskz_cvttph_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttph_epi16(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:768
_mm256_cvtxph_ps
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtxph_ps(__m128h __A)
Definition: avx512vlfp16intrin.h:1365
_mm256_maskz_cvttph_epi16
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttph_epi16(__mmask16 __U, __m256h __A)
Definition: avx512vlfp16intrin.h:786
_mm_min_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_ph(__m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:251
_mm_mask_cvttph_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttph_epi32(__m128i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1073
_mm256_cvtph_epu32
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtph_epu32(__m128h __A)
Definition: avx512vlfp16intrin.h:982
_mm_setzero_ph
static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_setzero_ph(void)
Definition: avx512fp16intrin.h:42
_mm256_cvtph_epu64
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtph_epu64(__m128h __A)
Definition: avx512vlfp16intrin.h:1260
_mm256_cvtph_epu16
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtph_epu16(__m256h __A)
Definition: avx512vlfp16intrin.h:843
_mm256_cvtsh_h
static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_cvtsh_h(__m256h __a)
Definition: avx512vlfp16intrin.h:31
_mm256_undefined_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_undefined_ph(void)
Definition: avx512fp16intrin.h:51
_mm256_mask_fmul_pch
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:1908
_mm_undefined_pd
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] with unspecified content.
Definition: emmintrin.h:1803
_mm_maskz_cvtepu64_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_ph(__mmask8 __U, __m128i __A)
Definition: avx512vlfp16intrin.h:1219
_mm_mask_cvtph_epi64
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_epi64(__m128i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1178
_mm256_mask_cvtph_epu16
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_epu16(__m256i __W, __mmask16 __U, __m256h __A)
Definition: avx512vlfp16intrin.h:849
_mm_mask_conj_pch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_conj_pch(__m128h __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:344
_mm_mask_scalef_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:514
_mm256_mask_cvtph_epu64
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_epu64(__m256i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1266
_mm256_maskz_cvtph_epi32
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_epi32(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:959
_mm256_maskz_cvtph_pd
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_pd(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:716
_mm256_mask_cvtepu64_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m256i __A)
Definition: avx512vlfp16intrin.h:1231
_mm256_cvttph_epu32
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttph_epu32(__m128h __A)
Definition: avx512vlfp16intrin.h:1120
_mm_set1_ph
static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_set1_ph(_Float16 __h)
Definition: avx512vlfp16intrin.h:39
_mm256_maskz_cvtph_epi16
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_epi16(__mmask16 __U, __m256h __A)
Definition: avx512vlfp16intrin.h:751
_mm256_mask_cvtepi64_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m256i __A)
Definition: avx512vlfp16intrin.h:1161
_mm256_mask_fnmsub_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1774
_mm_maskz_cvttph_epu32
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttph_epu32(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1114
_mm256_mask_rsqrt_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt_ph(__m256h __W, __mmask16 __U, __m256h __A)
Definition: avx512vlfp16intrin.h:414
_mm256_mask_fmadd_pch
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1955
_mm_mask_cvtepu64_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m128i __A)
Definition: avx512vlfp16intrin.h:1213
_mm256_cvtepi32_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_ph(__m256i __A)
Definition: avx512vlfp16intrin.h:1017
_mm_cvtph_epu32
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu32(__m128h __A)
Definition: avx512vlfp16intrin.h:964
_mm_maskz_cvtph_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_epi32(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:941
_mm_mask_rcp_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rcp_ph(__m128h __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:395
_mm256_maskz_cvtepu16_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_ph(__mmask16 __U, __m256i __A)
Definition: avx512vlfp16intrin.h:923
_mm256_maskz_fmsubadd_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1674
_mm_mask_fmadd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1424
_mm256_mask_fcmul_pch
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fcmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:1814
_mm_abs_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_abs_ph(__m128h __A)
Definition: avx512vlfp16intrin.h:320
_mm256_mask_cvtepi16_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_ph(__m256h __W, __mmask16 __U, __m256i __A)
Definition: avx512vlfp16intrin.h:813
_mm256_maskz_fnmsub_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1568
_mm256_fcmadd_pch
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fcmadd_pch(__m256h __A, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1853
__DEFAULT_FN_ATTRS256
#define __DEFAULT_FN_ATTRS256
Definition: avx512vlfp16intrin.h:18
_mm256_mask_cvtph_epi16
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_epi16(__m256i __W, __mmask16 __U, __m256h __A)
Definition: avx512vlfp16intrin.h:745
_mm_maskz_cvtph_epu64
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_epu64(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1254
_mm256_mask_conj_pch
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_conj_pch(__m256h __W, __mmask8 __U, __m256h __A)
Definition: avx512vlfp16intrin.h:329
_mm_fmul_pch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmul_pch(__m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:1881
_mm256_permutex2var_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_permutex2var_ph(__m256h __A, __m256i __I, __m256h __B)
Definition: avx512vlfp16intrin.h:1995
_mm_mask_min_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:256
_mm_xor_ps
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_xor_ps(__m128 __a, __m128 __b)
Performs a bitwise exclusive OR of two 128-bit vectors of [4 x float].
Definition: xmmintrin.h:467
_mm256_reduce_add_ph
static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_reduce_add_ph(__m256h __W)
Definition: avx512vlfp16intrin.h:2011
_mm256_cvtepi16_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_ph(__m256i __A)
Definition: avx512vlfp16intrin.h:808
_mm_mask_cvttph_epu64
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttph_epu64(__m128i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1318
_mm_cvtph_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi16(__m128h __A)
Definition: avx512vlfp16intrin.h:721
_mm256_undefined_pd
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_undefined_pd(void)
Create a 256-bit vector of [4 x double] with undefined values.
Definition: avxintrin.h:3568
_mm256_mask_cvtxps_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtxps_ph(__m128h __W, __mmask8 __U, __m256 __A)
Definition: avx512vlfp16intrin.h:1406
_mm256_setzero_ps
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition: avxintrin.h:4268
_mm256_mask3_fcmadd_pch
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fcmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U)
Definition: avx512vlfp16intrin.h:1870
_mm_mask_cvtxps_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtxps_ph(__m128h __W, __mmask8 __U, __m128 __A)
Definition: avx512vlfp16intrin.h:1387
_mm_cvttph_epu16
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu16(__m128h __A)
Definition: avx512vlfp16intrin.h:860
_mm_mask_sub_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:141
_mm256_maskz_cvtpd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_ph(__mmask8 __U, __m256d __A)
Definition: avx512vlfp16intrin.h:681
_mm_cvtsh_h
static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_cvtsh_h(__m128h __a)
Definition: avx512vlfp16intrin.h:27
_mm_mask3_fmsub_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U)
Definition: avx512vlfp16intrin.h:1682
_mm_mask_sqrt_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ph(__m128h __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:605
_mm256_mask_rcp_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_rcp_ph(__m256h __W, __mmask16 __U, __m256h __A)
Definition: avx512vlfp16intrin.h:379
_mm_and_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_and_epi32(__m128i __a, __m128i __b)
Definition: avx512vlintrin.h:471
_mm256_maskz_fmsub_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1544
__a
static __inline__ void int __a
Definition: emmintrin.h:4189
_mm_cvtxph_ps
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtxph_ps(__m128h __A)
Definition: avx512vlfp16intrin.h:1347
_mm256_maskz_max_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_max_ph(__mmask16 __U, __m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:287
_mm256_cvtxps_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtxps_ph(__m256 __A)
Definition: avx512vlfp16intrin.h:1400
_mm256_mask_cvtph_pd
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_pd(__m256d __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:710
_mm256_setzero_pd
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition: avxintrin.h:4254
_mm_sub_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_ph(__m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:136
_mm256_maskz_fmul_pch
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmul_pch(__mmask8 __U, __m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:1914
_mm_maskz_cvtph_epi64
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_epi64(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1184
_mm_maskz_rsqrt_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt_ph(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:438
_mm_maskz_getexp_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ph(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:455
_mm256_maskz_rcp_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp_ph(__mmask16 __U, __m256h __A)
Definition: avx512vlfp16intrin.h:385
_mm_maskz_scalef_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ph(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:520
_mm_maskz_max_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_ph(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:308
_mm_fmadd_pch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_pch(__m128h __A, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1919
_mm256_rcp_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rcp_ph(__m256h __A)
Definition: avx512vlfp16intrin.h:373
_mm_cvtepi16_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi16_ph(__m128i __A)
Definition: avx512vlfp16intrin.h:791
_mm_cvtepu16_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu16_ph(__m128i __A)
Definition: avx512vlfp16intrin.h:895
_mm_set1_ps
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set1_ps(float __w)
Constructs a 128-bit floating-point vector of [4 x float], with each of the four single-precision flo...
Definition: xmmintrin.h:1818
_mm_mask3_fmaddsub_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U)
Definition: avx512vlfp16intrin.h:1591
_mm256_undefined_si256
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
Definition: avxintrin.h:3594
_mm_add_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_ph(__m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:99
_mm_maskz_cvtepi16_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_ph(__mmask8 __U, __m128i __A)
Definition: avx512vlfp16intrin.h:802
_mm256_maskz_cvtph_epu32
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_epu32(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:994
_mm_mask_fmsubadd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1614
_mm_fmsubadd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsubadd_ph(__m128h __A, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1606
_mm256_maskz_cvtepu32_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_ph(__mmask8 __U, __m256i __A)
Definition: avx512vlfp16intrin.h:1062
_mm_undefined_si128
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition: emmintrin.h:3591
_mm_mask_cvtph_epu32
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_epu32(__m128i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:970
_mm256_set1_ps
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set1_ps(float __w)
Constructs a 256-bit floating-point vector of [8 x float], with each of the eight single-precision fl...
Definition: avxintrin.h:4163
_mm_mask3_fmadd_pch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U)
Definition: avx512vlfp16intrin.h:1936
_mm_mask_cvttph_epu32
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttph_epu32(__m128i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1108
_mm256_maskz_cvtxph_ps
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtxph_ps(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1377
_mm256_maskz_fnmadd_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1560
_mm_maskz_min_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_ph(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:265
_mm256_mask_div_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_div_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:199
_mm_maskz_cvttph_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttph_epi32(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1079
_mm256_maskz_conj_pch
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_conj_pch(__mmask8 __U, __m256h __A)
Definition: avx512vlfp16intrin.h:335
_mm_maskz_fmadd_pch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1942
_mm256_mask_cvttph_epu16
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttph_epu16(__m256i __W, __mmask16 __U, __m256h __A)
Definition: avx512vlfp16intrin.h:884
_mm256_cvtph_pd
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtph_pd(__m128h __A)
Definition: avx512vlfp16intrin.h:704
_mm256_maskz_fmaddsub_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1652
_mm256_reduce_mul_ph
static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_reduce_mul_ph(__m256h __W)
Definition: avx512vlfp16intrin.h:2016
_mm256_cvtepi64_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_ph(__m256i __A)
Definition: avx512vlfp16intrin.h:1155
_mm256_mask_cvtepu16_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_ph(__m256h __W, __mmask16 __U, __m256i __A)
Definition: avx512vlfp16intrin.h:917
_mm256_cvtepu16_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_ph(__m256i __A)
Definition: avx512vlfp16intrin.h:912
_mm256_maskz_cvttph_epu32
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttph_epu32(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1132
_mm256_mask_sub_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_sub_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:125
_mm256_mask_cvtpd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m256d __A)
Definition: avx512vlfp16intrin.h:675
_mm256_maskz_rsqrt_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt_ph(__mmask16 __U, __m256h __A)
Definition: avx512vlfp16intrin.h:420
_mm_fmadd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_ph(__m128h __A, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1417
_mm_maskz_cvtxph_ps
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtxph_ps(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1360
_mm256_mul_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mul_ph(__m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:156
_mm_fmaddsub_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmaddsub_ph(__m128h __A, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1575
_mm_div_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_ph(__m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:210
_mm256_mask_cvttph_epi64
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttph_epi64(__m256i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1301
_mm_reduce_add_ph
static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_reduce_add_ph(__m128h __W)
Definition: avx512vlfp16intrin.h:2031
_mm_mask_cvtph_pd
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_pd(__m128d __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:691
_mm256_cvttph_epu16
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttph_epu16(__m256h __A)
Definition: avx512vlfp16intrin.h:878
_mm256_cvtepu64_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_ph(__m256i __A)
Definition: avx512vlfp16intrin.h:1225
_mm256_mask_cvttph_epu32
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttph_epu32(__m256i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1126
_mm256_getexp_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_getexp_ph(__m256h __A)
Definition: avx512vlfp16intrin.h:460
_mm_rcp_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_ph(__m128h __A)
Definition: avx512vlfp16intrin.h:390
_mm256_sub_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_sub_ph(__m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:119
_mm256_mask_cvtepi32_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m256i __A)
Definition: avx512vlfp16intrin.h:1022
_mm_maskz_fcmul_pch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fcmul_pch(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:1802
_mm_set_ph
static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8)
Definition: avx512vlfp16intrin.h:49
_mm256_add_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_add_ph(__m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:82
_mm256_min_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_min_ph(__m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:230
_mm_mask_fmaddsub_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1583
_mm_mask_cvttph_epu16
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttph_epu16(__m128i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:866
_mm_mask_cvtepu16_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_ph(__m128h __W, __mmask8 __U, __m128i __A)
Definition: avx512vlfp16intrin.h:900
_mm256_set1_epi32
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi32(int __i)
Constructs a 256-bit integer vector of [8 x i32], with each of the 32-bit integral vector elements se...
Definition: avxintrin.h:4182
_mm256_mask_fmaddsub_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1636
_mm_cvtph_pd
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtph_pd(__m128h __A)
Definition: avx512vlfp16intrin.h:686
_mm256_set1_pch
static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_set1_pch(_Float16 _Complex h)
Definition: avx512vlfp16intrin.h:55
_mm256_maskz_fcmadd_pch
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fcmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1876
_mm_maskz_fmsub_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1467
_mm_fmsub_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_ph(__m128h __A, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1450
_mm_maskz_cvttph_epu16
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttph_epu16(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:872
_mm256_fnmsub_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1766
_mm256_cvttph_epi16
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttph_epi16(__m256h __A)
Definition: avx512vlfp16intrin.h:774
_mm_maskz_cvtepi64_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_ph(__mmask8 __U, __m128i __A)
Definition: avx512vlfp16intrin.h:1149
_mm256_maskz_fmadd_pch
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1970
_mm256_permutexvar_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_permutexvar_ph(__m256i __A, __m256h __B)
Definition: avx512vlfp16intrin.h:2006
_mm256_mask_fcmadd_pch
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fcmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1861
_mm256_mask_getexp_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_ph(__m256h __W, __mmask16 __U, __m256h __A)
Definition: avx512vlfp16intrin.h:466
_mm256_fmsubadd_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1660
_mm256_fmsub_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsub_ph(__m256h __A, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1528
_mm_cvtph_epu16
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu16(__m128h __A)
Definition: avx512vlfp16intrin.h:825
_mm256_mask_cvttph_epu64
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttph_epu64(__m256i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1336
_mm_fcmul_pch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_pch(__m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:1789
_mm256_sqrt_ph
static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_sqrt_ph(__m256h __a)
Definition: avx512vlfp16intrin.h:618
_mm_cvtepi32_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi32_ph(__m128i __A)
Definition: avx512vlfp16intrin.h:999
_mm256_reduce_min_ph
static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_reduce_min_ph(__m256h __V)
Definition: avx512vlfp16intrin.h:2026
_mm_reduce_max_ph
static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_reduce_max_ph(__m128h __V)
Definition: avx512vlfp16intrin.h:2041
_mm256_maskz_cvtepi32_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_ph(__mmask8 __U, __m256i __A)
Definition: avx512vlfp16intrin.h:1028
_mm_mask3_fmsubadd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U)
Definition: avx512vlfp16intrin.h:1698
_mm_mask_cvtph_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_epi16(__m128i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:727
_mm256_mask_cvtph_epi32
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_epi32(__m256i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:953
_mm_cvtpd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtpd_ph(__m128d __A)
Definition: avx512vlfp16intrin.h:651
_mm256_fmadd_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_ph(__m256h __A, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1497
_mm_conj_pch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_conj_pch(__m128h __A)
Definition: avx512vlfp16intrin.h:340
_mm_maskz_cvtxps_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtxps_ph(__mmask8 __U, __m128 __A)
Definition: avx512vlfp16intrin.h:1395
_mm_sqrt_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_ph(__m128h __a)
Definition: avx512vlfp16intrin.h:601
_mm_max_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_ph(__m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:294
_mm256_fmadd_pch
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_pch(__m256h __A, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1947
_mm256_mask_sqrt_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_ph(__m256h __W, __mmask16 __U, __m256h __A)
Definition: avx512vlfp16intrin.h:623
_mm256_mask3_fmadd_pch
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U)
Definition: avx512vlfp16intrin.h:1964
_mm_maskz_cvtepu32_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_ph(__mmask8 __U, __m128i __A)
Definition: avx512vlfp16intrin.h:1045
_mm256_cvttph_epi32
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttph_epi32(__m128h __A)
Definition: avx512vlfp16intrin.h:1085
_mm_mask_fmadd_pch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1927
_mm256_fmaddsub_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1630
_mm256_cvtph_epi32
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtph_epi32(__m128h __A)
Definition: avx512vlfp16intrin.h:947
_mm_mask3_fcmadd_pch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fcmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U)
Definition: avx512vlfp16intrin.h:1842
_mm256_mask_fmsubadd_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1666
_mm_cvttph_epi64
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi64(__m128h __A)
Definition: avx512vlfp16intrin.h:1277
_mm256_mask_cvtph_epi64
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_epi64(__m256i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1196
_mm_maskz_cvtph_epu32
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_epu32(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:976
_mm256_maskz_sqrt_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_ph(__mmask16 __U, __m256h __A)
Definition: avx512vlfp16intrin.h:629
_mm256_maskz_cvtepu64_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_ph(__mmask8 __U, __m256i __A)
Definition: avx512vlfp16intrin.h:1237
_mm_maskz_cvtph_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_epi16(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:733
_mm_cvttph_epu64
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu64(__m128h __A)
Definition: avx512vlfp16intrin.h:1312
_mm_cvtepu32_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ph(__m128i __A)
Definition: avx512vlfp16intrin.h:1033
_mm_set1_pch
static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_set1_pch(_Float16 _Complex h)
Definition: avx512vlfp16intrin.h:60
_mm256_maskz_fcmul_pch
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fcmul_pch(__mmask8 __U, __m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:1820
_mm_getexp_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_ph(__m128h __A)
Definition: avx512vlfp16intrin.h:443
_mm256_mask_add_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_add_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:88
_mm256_mask3_fmadd_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U)
Definition: avx512vlfp16intrin.h:1513
_mm_maskz_mul_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ph(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:186
_mm_maskz_sqrt_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ph(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:612
_mm_maskz_add_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_ph(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:112
_mm256_maskz_cvtepi16_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_ph(__mmask16 __U, __m256i __A)
Definition: avx512vlfp16intrin.h:819
_mm_undefined_ps
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_undefined_ps(void)
Create a 128-bit vector of [4 x float] with undefined values.
Definition: xmmintrin.h:1780
_mm256_mask3_fnmadd_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U)
Definition: avx512vlfp16intrin.h:1552
_mm_mask_mul_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:178
_mm_setzero_si128
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition: emmintrin.h:3981
_mm256_fmul_pch
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmul_pch(__m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:1901
_mm256_mask_fmsub_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1536
_mm256_mask_cvtxph_ps
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtxph_ps(__m256 __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1371
_mm256_mask_cvtepu32_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m256i __A)
Definition: avx512vlfp16intrin.h:1056
_mm_maskz_rcp_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rcp_ph(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:402
_mm_rsqrt_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_ph(__m128h __A)
Definition: avx512vlfp16intrin.h:425
_mm256_maskz_sub_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_ph(__mmask16 __U, __m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:131
_mm_mask_cvtph_epu16
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_epu16(__m128i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:831
_mm256_maskz_div_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_div_ph(__mmask16 __U, __m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:205
_mm_mask_cvtepi32_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m128i __A)
Definition: avx512vlfp16intrin.h:1005
_mm_mask_cvtepi16_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_ph(__m128h __W, __mmask8 __U, __m128i __A)
Definition: avx512vlfp16intrin.h:796
_mm256_mask3_fmaddsub_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U)
Definition: avx512vlfp16intrin.h:1644
_mm256_mask3_fmsub_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U)
Definition: avx512vlfp16intrin.h:1690
_mm_cvtph_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi32(__m128h __A)
Definition: avx512vlfp16intrin.h:929
_mm_mask_cvtepi64_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m128i __A)
Definition: avx512vlfp16intrin.h:1143
_mm_reduce_min_ph
static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_reduce_min_ph(__m128h __V)
Definition: avx512vlfp16intrin.h:2046
_mm_mul_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_ph(__m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:173
_mm_cvttph_epu32
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu32(__m128h __A)
Definition: avx512vlfp16intrin.h:1102
_mm256_mask_cvttph_epi16
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttph_epi16(__m256i __W, __mmask16 __U, __m256h __A)
Definition: avx512vlfp16intrin.h:780
_mm_cvtph_epi64
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi64(__m128h __A)
Definition: avx512vlfp16intrin.h:1172
_mm256_mask_fnmadd_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1736
_mm_permutexvar_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_permutexvar_ph(__m128i __A, __m128h __B)
Definition: avx512vlfp16intrin.h:2001
_mm256_maskz_add_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_add_ph(__mmask16 __U, __m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:94
_mm_maskz_cvttph_epi64
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttph_epi64(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1289
_mm_mask_blend_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W)
Definition: avx512vlfp16intrin.h:1975
_mm256_maskz_mul_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_ph(__mmask16 __U, __m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:168
_Float16
__device__ _Float16
Definition: __clang_hip_libdevice_declares.h:295
_mm256_mask_scalef_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:532
_mm_maskz_cvtph_epu16
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_epu16(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:837
_mm256_maskz_scalef_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_ph(__mmask16 __U, __m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:538
_mm_mask3_fnmadd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U)
Definition: avx512vlfp16intrin.h:1474
_mm_maskz_fnmsub_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1490
_mm_maskz_cvtpd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_ph(__mmask8 __U, __m128d __A)
Definition: avx512vlfp16intrin.h:664
_mm256_set_ph
static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8, _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12, _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16)
Definition: avx512vlfp16intrin.h:65
_mm_mask_fmul_pch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:1887
_mm_maskz_fmsubadd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1622
_mm256_mask_mul_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_mul_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:162
_mm256_max_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_max_ph(__m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:273
_mm_mask_cvttph_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttph_epi16(__m128i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:762
_mm_maskz_fcmadd_pch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fcmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1848
_mm256_cvttph_epi64
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttph_epi64(__m128h __A)
Definition: avx512vlfp16intrin.h:1295
__mmask16
unsigned short __mmask16
Definition: avx512fintrin.h:38
_mm256_maskz_cvtph_epu16
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_epu16(__mmask16 __U, __m256h __A)
Definition: avx512vlfp16intrin.h:855
_mm_undefined_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_undefined_ph(void)
Definition: avx512fp16intrin.h:61
_mm_maskz_conj_pch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_conj_pch(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:352
_mm256_cvtph_epi64
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtph_epi64(__m128h __A)
Definition: avx512vlfp16intrin.h:1190
_mm_maskz_div_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_ph(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:223
_mm_mask_fmsub_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1457
_mm256_maskz_cvttph_epu64
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttph_epu64(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1342
_mm_mask_cvtpd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m128d __A)
Definition: avx512vlfp16intrin.h:656
_mm_maskz_cvttph_epu64
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttph_epu64(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1324
_mm_set1_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value.
Definition: emmintrin.h:3808
_mm_scalef_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_ph(__m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:507
_mm256_fnmadd_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1728
_mm256_and_epi32
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_epi32(__m256i __a, __m256i __b)
Definition: avx512vlintrin.h:451
_mm256_maskz_cvtph_epu64
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_epu64(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1272
_mm256_setzero_ph
static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_setzero_ph(void)
Definition: avx512fp16intrin.h:46
_mm256_maskz_cvtepi64_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_ph(__mmask8 __U, __m256i __A)
Definition: avx512vlfp16intrin.h:1167
_mm_reduce_mul_ph
static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_reduce_mul_ph(__m128h __W)
Definition: avx512vlfp16intrin.h:2036
_mm256_mask_cvttph_epi32
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttph_epi32(__m256i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1091
_mm256_reduce_max_ph
static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_reduce_max_ph(__m256h __V)
Definition: avx512vlfp16intrin.h:2021
_mm256_fcmul_pch
static __inline__ __m256h __DEFAULT_FN_ATTRS128 _mm256_fcmul_pch(__m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:1807
_mm_mask3_fnmsub_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U)
Definition: avx512vlfp16intrin.h:1759
_mm256_mask_fmadd_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1505
_mm_cvtxps_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtxps_ph(__m128 __A)
Definition: avx512vlfp16intrin.h:1382
_mm_permutex2var_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_permutex2var_ph(__m128h __A, __m128i __I, __m128h __B)
Definition: avx512vlfp16intrin.h:1989
_mm_setzero_ps
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition: xmmintrin.h:1907
_mm256_maskz_getexp_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_ph(__mmask16 __U, __m256h __A)
Definition: avx512vlfp16intrin.h:472
_mm_mask_getexp_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ph(__m128h __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:449
_mm_cvtph_epu64
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu64(__m128h __A)
Definition: avx512vlfp16intrin.h:1242
_mm256_maskz_fmadd_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C)
Definition: avx512vlfp16intrin.h:1521
_mm_mask_cvtph_epu64
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_epu64(__m128i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1248
_mm256_cvtph_epi16
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtph_epi16(__m256h __A)
Definition: avx512vlfp16intrin.h:739
_mm_mask_cvtxph_ps
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtxph_ps(__m128 __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1352
_mm_maskz_cvtepu16_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A)
Definition: avx512vlfp16intrin.h:906
_mm256_cvtpd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtpd_ph(__m256d __A)
Definition: avx512vlfp16intrin.h:669
_mm256_mask_min_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_min_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:236
_mm256_mask_cvtph_epu32
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_epu32(__m256i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:988
_mm_mask_fnmsub_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1751
_mm_maskz_fmaddsub_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1599
_mm256_scalef_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_scalef_ph(__m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:525
_mm_cvtepu64_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ph(__m128i __A)
Definition: avx512vlfp16intrin.h:1207
_mm_set_sh
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_set_sh(_Float16 __h)
Definition: avx512vlfp16intrin.h:35
_mm_fnmsub_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1743
_mm256_maskz_cvtph_epi64
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_epi64(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1202
_mm256_mask3_fmsubadd_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U)
Definition: avx512vlfp16intrin.h:1706
_mm_mask_rsqrt_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt_ph(__m128h __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:430
_mm256_mask3_fnmsub_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U)
Definition: avx512vlfp16intrin.h:1782
_mm256_rsqrt_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rsqrt_ph(__m256h __A)
Definition: avx512vlfp16intrin.h:408
_mm_mask_fcmadd_pch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fcmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1833
_mm256_mask_blend_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W)
Definition: avx512vlfp16intrin.h:1983
_mm_fnmadd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1713
_mm_cvttph_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi16(__m128h __A)
Definition: avx512vlfp16intrin.h:756
_mm_maskz_cvtph_pd
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_pd(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:699
_mm_mask_cvttph_epi64
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttph_epi64(__m128i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1283
_mm256_mask_max_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_max_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:279
_mm256_cvtepu32_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_ph(__m256i __A)
Definition: avx512vlfp16intrin.h:1051
_mm256_setzero_si256
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition: avxintrin.h:4281
_mm256_div_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_div_ph(__m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:193
_mm256_maskz_cvtxps_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A)
Definition: avx512vlfp16intrin.h:1412
_mm_mask_add_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:104
_mm_mask_div_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:215
__mmask8
unsigned char __mmask8
Definition: avx512fintrin.h:37
_mm_mask_cvtph_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_epi32(__m128i __W, __mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:935
_mm_maskz_cvtepi32_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_ph(__mmask8 __U, __m128i __A)
Definition: avx512vlfp16intrin.h:1011
_mm_maskz_fmul_pch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmul_pch(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:1896
_mm256_maskz_cvttph_epu16
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttph_epu16(__mmask16 __U, __m256h __A)
Definition: avx512vlfp16intrin.h:890
_mm_mask3_fmadd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U)
Definition: avx512vlfp16intrin.h:1435
_mm_cvttph_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi32(__m128h __A)
Definition: avx512vlfp16intrin.h:1067
_mm256_cvttph_epu64
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttph_epu64(__m128h __A)
Definition: avx512vlfp16intrin.h:1330
_mm_setzero_pd
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition: emmintrin.h:1915
_mm256_undefined_ps
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
Definition: avxintrin.h:3581
_mm_mask_cvtepu32_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m128i __A)
Definition: avx512vlfp16intrin.h:1039
_mm_maskz_sub_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ph(__mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:149
_mm256_conj_pch
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_conj_pch(__m256h __A)
Definition: avx512vlfp16intrin.h:324
__DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS128
Definition: avx512vlfp16intrin.h:22
_mm256_maskz_cvttph_epi32
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttph_epi32(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1097
_mm_cvtepi64_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ph(__m128i __A)
Definition: avx512vlfp16intrin.h:1137
_mm256_maskz_cvttph_epi64
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttph_epi64(__mmask8 __U, __m128h __A)
Definition: avx512vlfp16intrin.h:1307
_mm256_maskz_min_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_min_ph(__mmask16 __U, __m256h __A, __m256h __B)
Definition: avx512vlfp16intrin.h:244
_mm_maskz_fnmadd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1482
_mm256_xor_ps
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_xor_ps(__m256 __a, __m256 __b)
Performs a bitwise XOR of two 256-bit vectors of [8 x float].
Definition: avxintrin.h:652
_mm256_abs_ph
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_abs_ph(__m256h __A)
Definition: avx512vlfp16intrin.h:316
_mm_mask_fnmadd_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1721
_mm_mask_max_ph
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:299
_mm_fcmadd_pch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmadd_pch(__m128h __A, __m128h __B, __m128h __C)
Definition: avx512vlfp16intrin.h:1825
_mm_mask_fcmul_pch
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fcmul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
Definition: avx512vlfp16intrin.h:1796
_mm256_set1_ph
static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_set1_ph(_Float16 __h)
Definition: avx512vlfp16intrin.h:43