clang 22.0.0git
avx10_2_512bf16intrin.h
Go to the documentation of this file.
1/*===----------- avx10_2_512bf16intrin.h - AVX10-BF16 intrinsics ---------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error \
11 "Never use <avx10_2_512bf16intrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifdef __SSE2__
15
16#ifndef __AVX10_2_512BF16INTRIN_H
17#define __AVX10_2_512BF16INTRIN_H
18
19/* Define the default attributes for the functions in this file. */
20typedef __bf16 __m512bh_u __attribute__((__vector_size__(64), __aligned__(1)));
21
22/* Define the default attributes for the functions in this file. */
23#define __DEFAULT_FN_ATTRS512 \
24 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \
25 __min_vector_width__(512)))
26
27#if defined(__cplusplus) && (__cplusplus >= 201103L)
28#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
29#else
30#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
31#endif
32
33static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_setzero_pbh(void) {
34 return __builtin_bit_cast(__m512bh, _mm512_setzero_ps());
35}
36
37static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_undefined_pbh(void) {
38 return (__m512bh)__builtin_ia32_undef512();
39}
40
41static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set1_pbh(__bf16 bf) {
42 return (__m512bh)(__v32bf){bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf,
43 bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf,
44 bf, bf, bf, bf, bf, bf, bf, bf, bf, bf};
45}
46
47static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set_pbh(
48 __bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, __bf16 bf5, __bf16 bf6,
49 __bf16 bf7, __bf16 bf8, __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12,
50 __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16, __bf16 bf17,
51 __bf16 bf18, __bf16 bf19, __bf16 bf20, __bf16 bf21, __bf16 bf22,
52 __bf16 bf23, __bf16 bf24, __bf16 bf25, __bf16 bf26, __bf16 bf27,
53 __bf16 bf28, __bf16 bf29, __bf16 bf30, __bf16 bf31, __bf16 bf32) {
54 return (__m512bh)(__v32bf){bf32, bf31, bf30, bf29, bf28, bf27, bf26, bf25,
55 bf24, bf23, bf22, bf21, bf20, bf19, bf18, bf17,
56 bf16, bf15, bf14, bf13, bf12, bf11, bf10, bf9,
57 bf8, bf7, bf6, bf5, bf4, bf3, bf2, bf1};
58}
59
60#define _mm512_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, bf9, bf10, \
61 bf11, bf12, bf13, bf14, bf15, bf16, bf17, bf18, bf19, \
62 bf20, bf21, bf22, bf23, bf24, bf25, bf26, bf27, bf28, \
63 bf29, bf30, bf31, bf32) \
64 _mm512_set_pbh((bf32), (bf31), (bf30), (bf29), (bf28), (bf27), (bf26), \
65 (bf25), (bf24), (bf23), (bf22), (bf21), (bf20), (bf19), \
66 (bf18), (bf17), (bf16), (bf15), (bf14), (bf13), (bf12), \
67 (bf11), (bf10), (bf9), (bf8), (bf7), (bf6), (bf5), (bf4), \
68 (bf3), (bf2), (bf1))
69
70static __inline__ __m512 __DEFAULT_FN_ATTRS512
71_mm512_castbf16_ps(__m512bh __a) {
72 return (__m512)__a;
73}
74
75static __inline__ __m512d __DEFAULT_FN_ATTRS512
76_mm512_castbf16_pd(__m512bh __a) {
77 return (__m512d)__a;
78}
79
80static __inline__ __m512i __DEFAULT_FN_ATTRS512
81_mm512_castbf16_si512(__m512bh __a) {
82 return (__m512i)__a;
83}
84
85static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_castps_pbh(__m512 __a) {
86 return (__m512bh)__a;
87}
88
89static __inline__ __m512bh __DEFAULT_FN_ATTRS512
90_mm512_castpd_pbh(__m512d __a) {
91 return (__m512bh)__a;
92}
93
94static __inline__ __m512bh __DEFAULT_FN_ATTRS512
95_mm512_castsi512_pbh(__m512i __a) {
96 return (__m512bh)__a;
97}
98
99static __inline__ __m128bh __DEFAULT_FN_ATTRS512
100_mm512_castbf16512_pbh128(__m512bh __a) {
101 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
102}
103
104static __inline__ __m256bh __DEFAULT_FN_ATTRS512
105_mm512_castbf16512_pbh256(__m512bh __a) {
106 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
107 12, 13, 14, 15);
108}
109
110static __inline__ __m512bh __DEFAULT_FN_ATTRS512
111_mm512_castbf16128_pbh512(__m128bh __a) {
112 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1,
113 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
114 -1, -1, -1, -1, -1, -1, -1, -1, -1);
115}
116
117static __inline__ __m512bh __DEFAULT_FN_ATTRS512
118_mm512_castbf16256_pbh512(__m256bh __a) {
119 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
120 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1,
121 -1, -1, -1, -1, -1, -1, -1, -1);
122}
123
124static __inline__ __m512bh __DEFAULT_FN_ATTRS512
125_mm512_zextbf16128_pbh512(__m128bh __a) {
126 return __builtin_shufflevector(
127 __a, (__v8bf)_mm_setzero_pbh(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
128 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15);
129}
130
131static __inline__ __m512bh __DEFAULT_FN_ATTRS512
132_mm512_zextbf16256_pbh512(__m256bh __a) {
133 return __builtin_shufflevector(__a, (__v16bf)_mm256_setzero_pbh(), 0, 1, 2, 3,
134 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
135 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
136 29, 30, 31);
137}
138
139static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_abs_pbh(__m512bh __A) {
140 return (__m512bh)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF),
141 (__m512i)__A);
142}
143
144static __inline__ __m512bh __DEFAULT_FN_ATTRS512
145_mm512_load_pbh(void const *__p) {
146 return *(const __m512bh *)__p;
147}
148
149static __inline__ __m512bh __DEFAULT_FN_ATTRS512
150_mm512_loadu_pbh(void const *__p) {
151 struct __loadu_pbh {
152 __m512bh_u __v;
153 } __attribute__((__packed__, __may_alias__));
154 return ((const struct __loadu_pbh *)__p)->__v;
155}
156
157static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_store_pbh(void *__P,
158 __m512bh __A) {
159 *(__m512bh *)__P = __A;
160}
161
162static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_storeu_pbh(void *__P,
163 __m512bh __A) {
164 struct __storeu_pbh {
165 __m512bh_u __v;
166 } __attribute__((__packed__, __may_alias__));
167 ((struct __storeu_pbh *)__P)->__v = __A;
168}
169
170static __inline__ __m512bh __DEFAULT_FN_ATTRS512
171_mm512_mask_blend_pbh(__mmask32 __U, __m512bh __A, __m512bh __W) {
172 return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U, (__v32bf)__W,
173 (__v32bf)__A);
174}
175
176static __inline__ __m512bh __DEFAULT_FN_ATTRS512_CONSTEXPR
177_mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) {
178 return (__m512bh)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
179 (__v32hi)__B);
180}
181
182static __inline__ __m512bh __DEFAULT_FN_ATTRS512_CONSTEXPR
183_mm512_permutexvar_pbh(__m512i __A, __m512bh __B) {
184 return (__m512bh)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
185}
186
187static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_add_pbh(__m512bh __A,
188 __m512bh __B) {
189 return (__m512bh)((__v32bf)__A + (__v32bf)__B);
190}
191
192static __inline__ __m512bh __DEFAULT_FN_ATTRS512
193_mm512_mask_add_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
194 return (__m512bh)__builtin_ia32_selectpbf_512(
195 (__mmask32)__U, (__v32bf)_mm512_add_pbh(__A, __B), (__v32bf)__W);
196}
197
198static __inline__ __m512bh __DEFAULT_FN_ATTRS512
199_mm512_maskz_add_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
200 return (__m512bh)__builtin_ia32_selectpbf_512(
201 (__mmask32)__U, (__v32bf)_mm512_add_pbh(__A, __B),
202 (__v32bf)_mm512_setzero_pbh());
203}
204
205static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sub_pbh(__m512bh __A,
206 __m512bh __B) {
207 return (__m512bh)((__v32bf)__A - (__v32bf)__B);
208}
209
210static __inline__ __m512bh __DEFAULT_FN_ATTRS512
211_mm512_mask_sub_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
212 return (__m512bh)__builtin_ia32_selectpbf_512(
213 (__mmask32)__U, (__v32bf)_mm512_sub_pbh(__A, __B), (__v32bf)__W);
214}
215
216static __inline__ __m512bh __DEFAULT_FN_ATTRS512
217_mm512_maskz_sub_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
218 return (__m512bh)__builtin_ia32_selectpbf_512(
219 (__mmask32)__U, (__v32bf)_mm512_sub_pbh(__A, __B),
220 (__v32bf)_mm512_setzero_pbh());
221}
222
223static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mul_pbh(__m512bh __A,
224 __m512bh __B) {
225 return (__m512bh)((__v32bf)__A * (__v32bf)__B);
226}
227
228static __inline__ __m512bh __DEFAULT_FN_ATTRS512
229_mm512_mask_mul_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
230 return (__m512bh)__builtin_ia32_selectpbf_512(
231 (__mmask32)__U, (__v32bf)_mm512_mul_pbh(__A, __B), (__v32bf)__W);
232}
233
234static __inline__ __m512bh __DEFAULT_FN_ATTRS512
235_mm512_maskz_mul_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
236 return (__m512bh)__builtin_ia32_selectpbf_512(
237 (__mmask32)__U, (__v32bf)_mm512_mul_pbh(__A, __B),
238 (__v32bf)_mm512_setzero_pbh());
239}
240
241static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_div_pbh(__m512bh __A,
242 __m512bh __B) {
243 return (__m512bh)((__v32bf)__A / (__v32bf)__B);
244}
245
246static __inline__ __m512bh __DEFAULT_FN_ATTRS512
247_mm512_mask_div_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
248 return (__m512bh)__builtin_ia32_selectpbf_512(
249 (__mmask32)__U, (__v32bf)_mm512_div_pbh(__A, __B), (__v32bf)__W);
250}
251
252static __inline__ __m512bh __DEFAULT_FN_ATTRS512
253_mm512_maskz_div_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
254 return (__m512bh)__builtin_ia32_selectpbf_512(
255 (__mmask32)__U, (__v32bf)_mm512_div_pbh(__A, __B),
256 (__v32bf)_mm512_setzero_pbh());
257}
258
259static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_max_pbh(__m512bh __A,
260 __m512bh __B) {
261 return (__m512bh)__builtin_ia32_vmaxbf16512((__v32bf)__A, (__v32bf)__B);
262}
263
264static __inline__ __m512bh __DEFAULT_FN_ATTRS512
265_mm512_mask_max_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
266 return (__m512bh)__builtin_ia32_selectpbf_512(
267 (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B), (__v32bf)__W);
268}
269
270static __inline__ __m512bh __DEFAULT_FN_ATTRS512
271_mm512_maskz_max_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
272 return (__m512bh)__builtin_ia32_selectpbf_512(
273 (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B),
274 (__v32bf)_mm512_setzero_pbh());
275}
276
277static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_min_pbh(__m512bh __A,
278 __m512bh __B) {
279 return (__m512bh)__builtin_ia32_vminbf16512((__v32bf)__A, (__v32bf)__B);
280}
281
282static __inline__ __m512bh __DEFAULT_FN_ATTRS512
283_mm512_mask_min_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
284 return (__m512bh)__builtin_ia32_selectpbf_512(
285 (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B), (__v32bf)__W);
286}
287
288static __inline__ __m512bh __DEFAULT_FN_ATTRS512
289_mm512_maskz_min_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
290 return (__m512bh)__builtin_ia32_selectpbf_512(
291 (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B),
292 (__v32bf)_mm512_setzero_pbh());
293}
294
295#define _mm512_cmp_pbh_mask(__A, __B, __P) \
296 ((__mmask32)__builtin_ia32_vcmpbf16512_mask((__v32bf)(__m512bh)(__A), \
297 (__v32bf)(__m512bh)(__B), \
298 (int)(__P), (__mmask32) - 1))
299
300#define _mm512_mask_cmp_pbh_mask(__U, __A, __B, __P) \
301 ((__mmask32)__builtin_ia32_vcmpbf16512_mask((__v32bf)(__m512bh)(__A), \
302 (__v32bf)(__m512bh)(__B), \
303 (int)(__P), (__mmask32)(__U)))
304
305#define _mm512_mask_fpclass_pbh_mask(__U, __A, imm) \
306 ((__mmask32)__builtin_ia32_vfpclassbf16512_mask( \
307 (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32)(__U)))
308
309#define _mm512_fpclass_pbh_mask(__A, imm) \
310 ((__mmask32)__builtin_ia32_vfpclassbf16512_mask( \
311 (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32) - 1))
312
313static __inline__ __m512bh __DEFAULT_FN_ATTRS512
314_mm512_scalef_pbh(__m512bh __A, __m512bh __B) {
315 return (__m512bh)__builtin_ia32_vscalefbf16512_mask(
316 (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_undefined_pbh(),
317 (__mmask32)-1);
318}
319
320static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pbh(
321 __m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
322 return (__m512bh)__builtin_ia32_vscalefbf16512_mask(
323 (__v32bf)__A, (__v32bf)__B, (__v32bf)__W, (__mmask32)__U);
324}
325
326static __inline__ __m512bh __DEFAULT_FN_ATTRS512
327_mm512_maskz_scalef_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
328 return (__m512bh)__builtin_ia32_vscalefbf16512_mask(
329 (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_setzero_pbh(),
330 (__mmask32)__U);
331}
332
333static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_rcp_pbh(__m512bh __A) {
334 return (__m512bh)__builtin_ia32_vrcpbf16512_mask(
335 (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
336}
337
338static __inline__ __m512bh __DEFAULT_FN_ATTRS512
339_mm512_mask_rcp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
340 return (__m512bh)__builtin_ia32_vrcpbf16512_mask((__v32bf)__A, (__v32bf)__W,
341 (__mmask32)__U);
342}
343
344static __inline__ __m512bh __DEFAULT_FN_ATTRS512
345_mm512_maskz_rcp_pbh(__mmask32 __U, __m512bh __A) {
346 return (__m512bh)__builtin_ia32_vrcpbf16512_mask(
347 (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
348}
349
350static __inline__ __m512bh __DEFAULT_FN_ATTRS512
351_mm512_getexp_pbh(__m512bh __A) {
352 return (__m512bh)__builtin_ia32_vgetexpbf16512_mask(
353 (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
354}
355
356static __inline__ __m512bh __DEFAULT_FN_ATTRS512
357_mm512_mask_getexp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
358 return (__m512bh)__builtin_ia32_vgetexpbf16512_mask(
359 (__v32bf)__A, (__v32bf)__W, (__mmask32)__U);
360}
361
362static __inline__ __m512bh __DEFAULT_FN_ATTRS512
363_mm512_maskz_getexp_pbh(__mmask32 __U, __m512bh __A) {
364 return (__m512bh)__builtin_ia32_vgetexpbf16512_mask(
365 (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
366}
367
368static __inline__ __m512bh __DEFAULT_FN_ATTRS512
369_mm512_rsqrt_pbh(__m512bh __A) {
370 return (__m512bh)__builtin_ia32_vrsqrtbf16512_mask(
371 (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
372}
373
374static __inline__ __m512bh __DEFAULT_FN_ATTRS512
375_mm512_mask_rsqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
376 return (__m512bh)__builtin_ia32_vrsqrtbf16512_mask((__v32bf)__A, (__v32bf)__W,
377 (__mmask32)__U);
378}
379
380static __inline__ __m512bh __DEFAULT_FN_ATTRS512
381_mm512_maskz_rsqrt_pbh(__mmask32 __U, __m512bh __A) {
382 return (__m512bh)__builtin_ia32_vrsqrtbf16512_mask(
383 (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
384}
385
386#define _mm512_reduce_pbh(__A, imm) \
387 ((__m512bh)__builtin_ia32_vreducebf16512_mask( \
388 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_undefined_pbh(), \
389 (__mmask32) - 1))
390
391#define _mm512_mask_reduce_pbh(__W, __U, __A, imm) \
392 ((__m512bh)__builtin_ia32_vreducebf16512_mask( \
393 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \
394 (__mmask32)(__U)))
395
396#define _mm512_maskz_reduce_pbh(__U, __A, imm) \
397 ((__m512bh)__builtin_ia32_vreducebf16512_mask( \
398 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
399 (__mmask32)(__U)))
400
401#define _mm512_roundscale_pbh(__A, imm) \
402 ((__m512bh)__builtin_ia32_vrndscalebf16_mask( \
403 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
404 (__mmask32) - 1))
405
406#define _mm512_mask_roundscale_pbh(__W, __U, __A, imm) \
407 ((__m512bh)__builtin_ia32_vrndscalebf16_mask( \
408 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \
409 (__mmask32)(__U)))
410
411#define _mm512_maskz_roundscale_pbh(__U, __A, imm) \
412 ((__m512bh)__builtin_ia32_vrndscalebf16_mask( \
413 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
414 (__mmask32)(__U)))
415
416#define _mm512_getmant_pbh(__A, __B, __C) \
417 ((__m512bh)__builtin_ia32_vgetmantbf16512_mask( \
418 (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \
419 (__v32bf)_mm512_undefined_pbh(), (__mmask32) - 1))
420
421#define _mm512_mask_getmant_pbh(__W, __U, __A, __B, __C) \
422 ((__m512bh)__builtin_ia32_vgetmantbf16512_mask( \
423 (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \
424 (__v32bf)(__m512bh)(__W), (__mmask32)(__U)))
425
426#define _mm512_maskz_getmant_pbh(__U, __A, __B, __C) \
427 ((__m512bh)__builtin_ia32_vgetmantbf16512_mask( \
428 (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \
429 (__v32bf)_mm512_setzero_pbh(), (__mmask32)(__U)))
430
431static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sqrt_pbh(__m512bh __A) {
432 return (__m512bh)__builtin_ia32_vsqrtbf16512((__v32bf)__A);
433}
434
435static __inline__ __m512bh __DEFAULT_FN_ATTRS512
436_mm512_mask_sqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
437 return (__m512bh)__builtin_ia32_selectpbf_512(
438 (__mmask32)__U, (__v32bf)_mm512_sqrt_pbh(__A), (__v32bf)__W);
439}
440
441static __inline__ __m512bh __DEFAULT_FN_ATTRS512
442_mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) {
443 return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U,
444 (__v32bf)_mm512_sqrt_pbh(__A),
445 (__v32bf)_mm512_setzero_pbh());
446}
447
448static __inline__ __m512bh __DEFAULT_FN_ATTRS512
449_mm512_fmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
450 return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, (__v32bf)__B,
451 (__v32bf)__C);
452}
453
454static __inline__ __m512bh __DEFAULT_FN_ATTRS512
455_mm512_mask_fmadd_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
456 return (__m512bh)__builtin_ia32_selectpbf_512(
457 (__mmask32)__U,
458 _mm512_fmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__A);
459}
460
461static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pbh(
462 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
463 return (__m512bh)__builtin_ia32_selectpbf_512(
464 (__mmask32)__U,
465 _mm512_fmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__C);
466}
467
468static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pbh(
469 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
470 return (__m512bh)__builtin_ia32_selectpbf_512(
471 (__mmask32)__U,
472 _mm512_fmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
473 (__v32bf)_mm512_setzero_pbh());
474}
475
476static __inline__ __m512bh __DEFAULT_FN_ATTRS512
477_mm512_fmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
478 return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, (__v32bf)__B,
479 -(__v32bf)__C);
480}
481
482static __inline__ __m512bh __DEFAULT_FN_ATTRS512
483_mm512_mask_fmsub_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
484 return (__m512bh)__builtin_ia32_selectpbf_512(
485 (__mmask32)__U,
486 _mm512_fmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__A);
487}
488
489static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_pbh(
490 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
491 return (__m512bh)__builtin_ia32_selectpbf_512(
492 (__mmask32)__U,
493 _mm512_fmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__C);
494}
495
496static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pbh(
497 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
498 return (__m512bh)__builtin_ia32_selectpbf_512(
499 (__mmask32)__U,
500 _mm512_fmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
501 (__v32bf)_mm512_setzero_pbh());
502}
503
504static __inline__ __m512bh __DEFAULT_FN_ATTRS512
505_mm512_fnmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
506 return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, -(__v32bf)__B,
507 (__v32bf)__C);
508}
509
510static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pbh(
511 __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
512 return (__m512bh)__builtin_ia32_selectpbf_512(
513 (__mmask32)__U,
514 _mm512_fnmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
515 (__v32bf)__A);
516}
517
518static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_pbh(
519 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
520 return (__m512bh)__builtin_ia32_selectpbf_512(
521 (__mmask32)__U,
522 _mm512_fnmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
523 (__v32bf)__C);
524}
525
526static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pbh(
527 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
528 return (__m512bh)__builtin_ia32_selectpbf_512(
529 (__mmask32)__U,
530 _mm512_fnmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
531 (__v32bf)_mm512_setzero_pbh());
532}
533
534static __inline__ __m512bh __DEFAULT_FN_ATTRS512
535_mm512_fnmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
536 return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, -(__v32bf)__B,
537 -(__v32bf)__C);
538}
539
540static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pbh(
541 __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
542 return (__m512bh)__builtin_ia32_selectpbf_512(
543 (__mmask32)__U,
544 _mm512_fnmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
545 (__v32bf)__A);
546}
547
548static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_pbh(
549 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
550 return (__m512bh)__builtin_ia32_selectpbf_512(
551 (__mmask32)__U,
552 _mm512_fnmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
553 (__v32bf)__C);
554}
555
556static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pbh(
557 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
558 return (__m512bh)__builtin_ia32_selectpbf_512(
559 (__mmask32)__U,
560 _mm512_fnmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
561 (__v32bf)_mm512_setzero_pbh());
562}
563
564#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
565#undef __DEFAULT_FN_ATTRS512
566
567#endif
568#endif
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
static __inline__ uint32_t volatile uint32_t * __p
Definition arm_acle.h:57
return __v
Definition arm_acle.h:88
#define __DEFAULT_FN_ATTRS512_CONSTEXPR
#define __DEFAULT_FN_ATTRS512
unsigned int __mmask32
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi32(int __s)
static __inline__ void int __a
Definition emmintrin.h:4077
__inline unsigned int unsigned int unsigned int * __P
Definition bmi2intrin.h:25