clang 20.0.0git
avx10_2_512bf16intrin.h
Go to the documentation of this file.
1/*===----------- avx10_2_512bf16intrin.h - AVX10-BF16 intrinsics ---------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error \
11 "Never use <avx10_2_512bf16intrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifdef __SSE2__
15
16#ifndef __AVX10_2_512BF16INTRIN_H
17#define __AVX10_2_512BF16INTRIN_H
18
19/* Define the default attributes for the functions in this file. */
20typedef __bf16 __m512bh_u __attribute__((__vector_size__(64), __aligned__(1)));
21
22/* Define the default attributes for the functions in this file. */
23#define __DEFAULT_FN_ATTRS512 \
24 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \
25 __min_vector_width__(512)))
26
27static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_setzero_pbh(void) {
28 return __builtin_bit_cast(__m512bh, _mm512_setzero_ps());
29}
30
31static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_undefined_pbh(void) {
32 return (__m512bh)__builtin_ia32_undef512();
33}
34
35static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set1_pbh(__bf16 bf) {
36 return (__m512bh)(__v32bf){bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf,
37 bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf,
38 bf, bf, bf, bf, bf, bf, bf, bf, bf, bf};
39}
40
41static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set_pbh(
42 __bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, __bf16 bf5, __bf16 bf6,
43 __bf16 bf7, __bf16 bf8, __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12,
44 __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16, __bf16 bf17,
45 __bf16 bf18, __bf16 bf19, __bf16 bf20, __bf16 bf21, __bf16 bf22,
46 __bf16 bf23, __bf16 bf24, __bf16 bf25, __bf16 bf26, __bf16 bf27,
47 __bf16 bf28, __bf16 bf29, __bf16 bf30, __bf16 bf31, __bf16 bf32) {
48 return (__m512bh)(__v32bf){bf32, bf31, bf30, bf29, bf28, bf27, bf26, bf25,
49 bf24, bf23, bf22, bf21, bf20, bf19, bf18, bf17,
50 bf16, bf15, bf14, bf13, bf12, bf11, bf10, bf9,
51 bf8, bf7, bf6, bf5, bf4, bf3, bf2, bf1};
52}
53
54#define _mm512_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, bf9, bf10, \
55 bf11, bf12, bf13, bf14, bf15, bf16, bf17, bf18, bf19, \
56 bf20, bf21, bf22, bf23, bf24, bf25, bf26, bf27, bf28, \
57 bf29, bf30, bf31, bf32) \
58 _mm512_set_pbh((bf32), (bf31), (bf30), (bf29), (bf28), (bf27), (bf26), \
59 (bf25), (bf24), (bf23), (bf22), (bf21), (bf20), (bf19), \
60 (bf18), (bf17), (bf16), (bf15), (bf14), (bf13), (bf12), \
61 (bf11), (bf10), (bf9), (bf8), (bf7), (bf6), (bf5), (bf4), \
62 (bf3), (bf2), (bf1))
63
64static __inline__ __m512 __DEFAULT_FN_ATTRS512
65_mm512_castbf16_ps(__m512bh __a) {
66 return (__m512)__a;
67}
68
69static __inline__ __m512d __DEFAULT_FN_ATTRS512
70_mm512_castbf16_pd(__m512bh __a) {
71 return (__m512d)__a;
72}
73
74static __inline__ __m512i __DEFAULT_FN_ATTRS512
75_mm512_castbf16_si512(__m512bh __a) {
76 return (__m512i)__a;
77}
78
79static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_castps_pbh(__m512 __a) {
80 return (__m512bh)__a;
81}
82
83static __inline__ __m512bh __DEFAULT_FN_ATTRS512
84_mm512_castpd_pbh(__m512d __a) {
85 return (__m512bh)__a;
86}
87
88static __inline__ __m512bh __DEFAULT_FN_ATTRS512
89_mm512_castsi512_pbh(__m512i __a) {
90 return (__m512bh)__a;
91}
92
93static __inline__ __m128bh __DEFAULT_FN_ATTRS512
94_mm512_castbf16512_pbh128(__m512bh __a) {
95 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
96}
97
98static __inline__ __m256bh __DEFAULT_FN_ATTRS512
99_mm512_castbf16512_pbh256(__m512bh __a) {
100 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
101 12, 13, 14, 15);
102}
103
104static __inline__ __m512bh __DEFAULT_FN_ATTRS512
105_mm512_castbf16128_pbh512(__m128bh __a) {
106 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1,
107 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
108 -1, -1, -1, -1, -1, -1, -1, -1, -1);
109}
110
111static __inline__ __m512bh __DEFAULT_FN_ATTRS512
112_mm512_castbf16256_pbh512(__m256bh __a) {
113 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
114 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1,
115 -1, -1, -1, -1, -1, -1, -1, -1);
116}
117
118static __inline__ __m512bh __DEFAULT_FN_ATTRS512
119_mm512_zextbf16128_pbh512(__m128bh __a) {
120 return __builtin_shufflevector(
121 __a, (__v8bf)_mm_setzero_pbh(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
122 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15);
123}
124
125static __inline__ __m512bh __DEFAULT_FN_ATTRS512
126_mm512_zextbf16256_pbh512(__m256bh __a) {
127 return __builtin_shufflevector(__a, (__v16bf)_mm256_setzero_pbh(), 0, 1, 2, 3,
128 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
129 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
130 29, 30, 31);
131}
132
133static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_abs_pbh(__m512bh __A) {
134 return (__m512bh)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF),
135 (__m512i)__A);
136}
137
138static __inline__ __m512bh __DEFAULT_FN_ATTRS512
139_mm512_load_pbh(void const *__p) {
140 return *(const __m512bh *)__p;
141}
142
143static __inline__ __m512bh __DEFAULT_FN_ATTRS512
144_mm512_loadu_pbh(void const *__p) {
145 struct __loadu_pbh {
146 __m512bh_u __v;
147 } __attribute__((__packed__, __may_alias__));
148 return ((const struct __loadu_pbh *)__p)->__v;
149}
150
151static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_store_pbh(void *__P,
152 __m512bh __A) {
153 *(__m512bh *)__P = __A;
154}
155
156static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_storeu_pbh(void *__P,
157 __m512bh __A) {
158 struct __storeu_pbh {
159 __m512bh_u __v;
160 } __attribute__((__packed__, __may_alias__));
161 ((struct __storeu_pbh *)__P)->__v = __A;
162}
163
164static __inline__ __m512bh __DEFAULT_FN_ATTRS512
165_mm512_mask_blend_pbh(__mmask32 __U, __m512bh __A, __m512bh __W) {
166 return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U, (__v32bf)__W,
167 (__v32bf)__A);
168}
169
170static __inline__ __m512bh __DEFAULT_FN_ATTRS512
171_mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) {
172 return (__m512bh)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
173 (__v32hi)__B);
174}
175
176static __inline__ __m512bh __DEFAULT_FN_ATTRS512
177_mm512_permutexvar_pbh(__m512i __A, __m512bh __B) {
178 return (__m512bh)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
179}
180
181static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_add_pbh(__m512bh __A,
182 __m512bh __B) {
183 return (__m512bh)((__v32bf)__A + (__v32bf)__B);
184}
185
186static __inline__ __m512bh __DEFAULT_FN_ATTRS512
187_mm512_mask_add_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
188 return (__m512bh)__builtin_ia32_selectpbf_512(
189 (__mmask32)__U, (__v32bf)_mm512_add_pbh(__A, __B), (__v32bf)__W);
190}
191
192static __inline__ __m512bh __DEFAULT_FN_ATTRS512
193_mm512_maskz_add_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
194 return (__m512bh)__builtin_ia32_selectpbf_512(
195 (__mmask32)__U, (__v32bf)_mm512_add_pbh(__A, __B),
196 (__v32bf)_mm512_setzero_pbh());
197}
198
199static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sub_pbh(__m512bh __A,
200 __m512bh __B) {
201 return (__m512bh)((__v32bf)__A - (__v32bf)__B);
202}
203
204static __inline__ __m512bh __DEFAULT_FN_ATTRS512
205_mm512_mask_sub_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
206 return (__m512bh)__builtin_ia32_selectpbf_512(
207 (__mmask32)__U, (__v32bf)_mm512_sub_pbh(__A, __B), (__v32bf)__W);
208}
209
210static __inline__ __m512bh __DEFAULT_FN_ATTRS512
211_mm512_maskz_sub_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
212 return (__m512bh)__builtin_ia32_selectpbf_512(
213 (__mmask32)__U, (__v32bf)_mm512_sub_pbh(__A, __B),
214 (__v32bf)_mm512_setzero_pbh());
215}
216
217static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mul_pbh(__m512bh __A,
218 __m512bh __B) {
219 return (__m512bh)((__v32bf)__A * (__v32bf)__B);
220}
221
222static __inline__ __m512bh __DEFAULT_FN_ATTRS512
223_mm512_mask_mul_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
224 return (__m512bh)__builtin_ia32_selectpbf_512(
225 (__mmask32)__U, (__v32bf)_mm512_mul_pbh(__A, __B), (__v32bf)__W);
226}
227
228static __inline__ __m512bh __DEFAULT_FN_ATTRS512
229_mm512_maskz_mul_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
230 return (__m512bh)__builtin_ia32_selectpbf_512(
231 (__mmask32)__U, (__v32bf)_mm512_mul_pbh(__A, __B),
232 (__v32bf)_mm512_setzero_pbh());
233}
234
235static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_div_pbh(__m512bh __A,
236 __m512bh __B) {
237 return (__m512bh)((__v32bf)__A / (__v32bf)__B);
238}
239
240static __inline__ __m512bh __DEFAULT_FN_ATTRS512
241_mm512_mask_div_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
242 return (__m512bh)__builtin_ia32_selectpbf_512(
243 (__mmask32)__U, (__v32bf)_mm512_div_pbh(__A, __B), (__v32bf)__W);
244}
245
246static __inline__ __m512bh __DEFAULT_FN_ATTRS512
247_mm512_maskz_div_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
248 return (__m512bh)__builtin_ia32_selectpbf_512(
249 (__mmask32)__U, (__v32bf)_mm512_div_pbh(__A, __B),
250 (__v32bf)_mm512_setzero_pbh());
251}
252
253static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_max_pbh(__m512bh __A,
254 __m512bh __B) {
255 return (__m512bh)__builtin_ia32_vmaxbf16512((__v32bf)__A, (__v32bf)__B);
256}
257
258static __inline__ __m512bh __DEFAULT_FN_ATTRS512
259_mm512_mask_max_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
260 return (__m512bh)__builtin_ia32_selectpbf_512(
261 (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B), (__v32bf)__W);
262}
263
264static __inline__ __m512bh __DEFAULT_FN_ATTRS512
265_mm512_maskz_max_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
266 return (__m512bh)__builtin_ia32_selectpbf_512(
267 (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B),
268 (__v32bf)_mm512_setzero_pbh());
269}
270
271static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_min_pbh(__m512bh __A,
272 __m512bh __B) {
273 return (__m512bh)__builtin_ia32_vminbf16512((__v32bf)__A, (__v32bf)__B);
274}
275
276static __inline__ __m512bh __DEFAULT_FN_ATTRS512
277_mm512_mask_min_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
278 return (__m512bh)__builtin_ia32_selectpbf_512(
279 (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B), (__v32bf)__W);
280}
281
282static __inline__ __m512bh __DEFAULT_FN_ATTRS512
283_mm512_maskz_min_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
284 return (__m512bh)__builtin_ia32_selectpbf_512(
285 (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B),
286 (__v32bf)_mm512_setzero_pbh());
287}
288
289#define _mm512_cmp_pbh_mask(__A, __B, __P) \
290 ((__mmask32)__builtin_ia32_vcmpbf16512_mask((__v32bf)(__m512bh)(__A), \
291 (__v32bf)(__m512bh)(__B), \
292 (int)(__P), (__mmask32) - 1))
293
294#define _mm512_mask_cmp_pbh_mask(__U, __A, __B, __P) \
295 ((__mmask32)__builtin_ia32_vcmpbf16512_mask((__v32bf)(__m512bh)(__A), \
296 (__v32bf)(__m512bh)(__B), \
297 (int)(__P), (__mmask32)(__U)))
298
299#define _mm512_mask_fpclass_pbh_mask(__U, __A, imm) \
300 ((__mmask32)__builtin_ia32_vfpclassbf16512_mask( \
301 (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32)(__U)))
302
303#define _mm512_fpclass_pbh_mask(__A, imm) \
304 ((__mmask32)__builtin_ia32_vfpclassbf16512_mask( \
305 (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32) - 1))
306
307static __inline__ __m512bh __DEFAULT_FN_ATTRS512
308_mm512_scalef_pbh(__m512bh __A, __m512bh __B) {
309 return (__m512bh)__builtin_ia32_vscalefbf16512_mask(
310 (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_undefined_pbh(),
311 (__mmask32)-1);
312}
313
314static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pbh(
315 __m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
316 return (__m512bh)__builtin_ia32_vscalefbf16512_mask(
317 (__v32bf)__A, (__v32bf)__B, (__v32bf)__W, (__mmask32)__U);
318}
319
320static __inline__ __m512bh __DEFAULT_FN_ATTRS512
321_mm512_maskz_scalef_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
322 return (__m512bh)__builtin_ia32_vscalefbf16512_mask(
323 (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_setzero_pbh(),
324 (__mmask32)__U);
325}
326
327static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_rcp_pbh(__m512bh __A) {
328 return (__m512bh)__builtin_ia32_vrcpbf16512_mask(
329 (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
330}
331
332static __inline__ __m512bh __DEFAULT_FN_ATTRS512
333_mm512_mask_rcp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
334 return (__m512bh)__builtin_ia32_vrcpbf16512_mask((__v32bf)__A, (__v32bf)__W,
335 (__mmask32)__U);
336}
337
338static __inline__ __m512bh __DEFAULT_FN_ATTRS512
339_mm512_maskz_rcp_pbh(__mmask32 __U, __m512bh __A) {
340 return (__m512bh)__builtin_ia32_vrcpbf16512_mask(
341 (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
342}
343
344static __inline__ __m512bh __DEFAULT_FN_ATTRS512
345_mm512_getexp_pbh(__m512bh __A) {
346 return (__m512bh)__builtin_ia32_vgetexpbf16512_mask(
347 (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
348}
349
350static __inline__ __m512bh __DEFAULT_FN_ATTRS512
351_mm512_mask_getexp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
352 return (__m512bh)__builtin_ia32_vgetexpbf16512_mask(
353 (__v32bf)__A, (__v32bf)__W, (__mmask32)__U);
354}
355
356static __inline__ __m512bh __DEFAULT_FN_ATTRS512
357_mm512_maskz_getexp_pbh(__mmask32 __U, __m512bh __A) {
358 return (__m512bh)__builtin_ia32_vgetexpbf16512_mask(
359 (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
360}
361
362static __inline__ __m512bh __DEFAULT_FN_ATTRS512
363_mm512_rsqrt_pbh(__m512bh __A) {
364 return (__m512bh)__builtin_ia32_vrsqrtbf16512_mask(
365 (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
366}
367
368static __inline__ __m512bh __DEFAULT_FN_ATTRS512
369_mm512_mask_rsqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
370 return (__m512bh)__builtin_ia32_vrsqrtbf16512_mask((__v32bf)__A, (__v32bf)__W,
371 (__mmask32)__U);
372}
373
374static __inline__ __m512bh __DEFAULT_FN_ATTRS512
375_mm512_maskz_rsqrt_pbh(__mmask32 __U, __m512bh __A) {
376 return (__m512bh)__builtin_ia32_vrsqrtbf16512_mask(
377 (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
378}
379
380#define _mm512_reduce_pbh(__A, imm) \
381 ((__m512bh)__builtin_ia32_vreducebf16512_mask( \
382 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_undefined_pbh(), \
383 (__mmask32) - 1))
384
385#define _mm512_mask_reduce_pbh(__W, __U, __A, imm) \
386 ((__m512bh)__builtin_ia32_vreducebf16512_mask( \
387 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \
388 (__mmask32)(__U)))
389
390#define _mm512_maskz_reduce_pbh(__U, __A, imm) \
391 ((__m512bh)__builtin_ia32_vreducebf16512_mask( \
392 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
393 (__mmask32)(__U)))
394
395#define _mm512_roundscale_pbh(__A, imm) \
396 ((__m512bh)__builtin_ia32_vrndscalebf16_mask( \
397 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
398 (__mmask32) - 1))
399
400#define _mm512_mask_roundscale_pbh(__W, __U, __A, imm) \
401 ((__m512bh)__builtin_ia32_vrndscalebf16_mask( \
402 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \
403 (__mmask32)(__U)))
404
405#define _mm512_maskz_roundscale_pbh(__U, __A, imm) \
406 ((__m512bh)__builtin_ia32_vrndscalebf16_mask( \
407 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
408 (__mmask32)(__U)))
409
410#define _mm512_getmant_pbh(__A, __B, __C) \
411 ((__m512bh)__builtin_ia32_vgetmantbf16512_mask( \
412 (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \
413 (__v32bf)_mm512_undefined_pbh(), (__mmask32) - 1))
414
415#define _mm512_mask_getmant_pbh(__W, __U, __A, __B, __C) \
416 ((__m512bh)__builtin_ia32_vgetmantbf16512_mask( \
417 (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \
418 (__v32bf)(__m512bh)(__W), (__mmask32)(__U)))
419
420#define _mm512_maskz_getmant_pbh(__U, __A, __B, __C) \
421 ((__m512bh)__builtin_ia32_vgetmantbf16512_mask( \
422 (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \
423 (__v32bf)_mm512_setzero_pbh(), (__mmask32)(__U)))
424
425static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sqrt_pbh(__m512bh __A) {
426 return (__m512bh)__builtin_ia32_vsqrtbf16512((__v32bf)__A);
427}
428
429static __inline__ __m512bh __DEFAULT_FN_ATTRS512
430_mm512_mask_sqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
431 return (__m512bh)__builtin_ia32_selectpbf_512(
432 (__mmask32)__U, (__v32bf)_mm512_sqrt_pbh(__A), (__v32bf)__W);
433}
434
435static __inline__ __m512bh __DEFAULT_FN_ATTRS512
436_mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) {
437 return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U,
438 (__v32bf)_mm512_sqrt_pbh(__A),
439 (__v32bf)_mm512_setzero_pbh());
440}
441
442static __inline__ __m512bh __DEFAULT_FN_ATTRS512
443_mm512_fmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
444 return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B,
445 (__v32bf)__C);
446}
447
448static __inline__ __m512bh __DEFAULT_FN_ATTRS512
449_mm512_mask_fmadd_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
450 return (__m512bh)__builtin_ia32_selectpbf_512(
451 (__mmask32)__U,
452 _mm512_fmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__A);
453}
454
455static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pbh(
456 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
457 return (__m512bh)__builtin_ia32_selectpbf_512(
458 (__mmask32)__U,
459 _mm512_fmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__C);
460}
461
462static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pbh(
463 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
464 return (__m512bh)__builtin_ia32_selectpbf_512(
465 (__mmask32)__U,
466 _mm512_fmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
467 (__v32bf)_mm512_setzero_pbh());
468}
469
470static __inline__ __m512bh __DEFAULT_FN_ATTRS512
471_mm512_fmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
472 return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B,
473 -(__v32bf)__C);
474}
475
476static __inline__ __m512bh __DEFAULT_FN_ATTRS512
477_mm512_mask_fmsub_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
478 return (__m512bh)__builtin_ia32_selectpbf_512(
479 (__mmask32)__U,
480 _mm512_fmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__A);
481}
482
483static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_pbh(
484 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
485 return (__m512bh)__builtin_ia32_selectpbf_512(
486 (__mmask32)__U,
487 _mm512_fmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__C);
488}
489
490static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pbh(
491 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
492 return (__m512bh)__builtin_ia32_selectpbf_512(
493 (__mmask32)__U,
494 _mm512_fmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
495 (__v32bf)_mm512_setzero_pbh());
496}
497
498static __inline__ __m512bh __DEFAULT_FN_ATTRS512
499_mm512_fnmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
500 return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B,
501 (__v32bf)__C);
502}
503
504static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pbh(
505 __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
506 return (__m512bh)__builtin_ia32_selectpbf_512(
507 (__mmask32)__U,
508 _mm512_fnmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
509 (__v32bf)__A);
510}
511
512static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_pbh(
513 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
514 return (__m512bh)__builtin_ia32_selectpbf_512(
515 (__mmask32)__U,
516 _mm512_fnmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
517 (__v32bf)__C);
518}
519
520static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pbh(
521 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
522 return (__m512bh)__builtin_ia32_selectpbf_512(
523 (__mmask32)__U,
524 _mm512_fnmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
525 (__v32bf)_mm512_setzero_pbh());
526}
527
528static __inline__ __m512bh __DEFAULT_FN_ATTRS512
529_mm512_fnmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
530 return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B,
531 -(__v32bf)__C);
532}
533
534static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pbh(
535 __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
536 return (__m512bh)__builtin_ia32_selectpbf_512(
537 (__mmask32)__U,
538 _mm512_fnmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
539 (__v32bf)__A);
540}
541
542static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_pbh(
543 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
544 return (__m512bh)__builtin_ia32_selectpbf_512(
545 (__mmask32)__U,
546 _mm512_fnmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
547 (__v32bf)__C);
548}
549
550static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pbh(
551 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
552 return (__m512bh)__builtin_ia32_selectpbf_512(
553 (__mmask32)__U,
554 _mm512_fnmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
555 (__v32bf)_mm512_setzero_pbh());
556}
557
558#undef __DEFAULT_FN_ATTRS512
559
560#endif
561#endif
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
static __inline__ uint32_t volatile uint32_t * __p
Definition: arm_acle.h:88
#define __DEFAULT_FN_ATTRS512
unsigned int __mmask32
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi32(int __s)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void)
static __inline__ void int __a
Definition: emmintrin.h:4079
struct __storeu_i16 *__P __v
Definition: immintrin.h:472
__inline unsigned int unsigned int unsigned int * __P
Definition: bmi2intrin.h:25