clang 20.0.0git
avx10_2convertintrin.h
Go to the documentation of this file.
1/*===--------------- avx10_2convertintrin.h - AVX10_2CONVERT ---------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error \
11 "Never use <avx10_2convertintrin.h> directly; include <immintrin.h> instead."
12#endif // __IMMINTRIN_H
13
14#ifdef __SSE2__
15
16#ifndef __AVX10_2CONVERTINTRIN_H
17#define __AVX10_2CONVERTINTRIN_H
18
19/* Define the default attributes for the functions in this file. */
20#define __DEFAULT_FN_ATTRS128 \
21 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
22 __min_vector_width__(128)))
23#define __DEFAULT_FN_ATTRS256 \
24 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
25 __min_vector_width__(256)))
26
27static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtx2ps_ph(__m128 __A,
28 __m128 __B) {
29 return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
30 (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)(-1));
31}
32
33static __inline__ __m128h __DEFAULT_FN_ATTRS128
34_mm_mask_cvtx2ps_ph(__m128h __W, __mmask8 __U, __m128 __A, __m128 __B) {
35 return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
36 (__v4sf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U);
37}
38
39static __inline__ __m128h __DEFAULT_FN_ATTRS128
40_mm_maskz_cvtx2ps_ph(__mmask8 __U, __m128 __A, __m128 __B) {
41 return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
42 (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
43}
44
45static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtx2ps_ph(__m256 __A,
46 __m256 __B) {
47 return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
48 (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)(-1),
50}
51
52static __inline__ __m256h __DEFAULT_FN_ATTRS256
53_mm256_mask_cvtx2ps_ph(__m256h __W, __mmask16 __U, __m256 __A, __m256 __B) {
54 return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
55 (__v8sf)__A, (__v8sf)__B, (__v16hf)__W, (__mmask16)__U,
57}
58
59static __inline__ __m256h __DEFAULT_FN_ATTRS256
60_mm256_maskz_cvtx2ps_ph(__mmask16 __U, __m256 __A, __m256 __B) {
61 return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
62 (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
64}
65
66#define _mm256_cvtx_round2ps_ph(A, B, R) \
67 ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \
68 (__v8sf)(A), (__v8sf)(B), (__v16hf)_mm256_undefined_ph(), \
69 (__mmask16)(-1), (const int)(R)))
70
71#define _mm256_mask_cvtx_round2ps_ph(W, U, A, B, R) \
72 ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \
73 (__v8sf)(A), (__v8sf)(B), (__v16hf)(W), (__mmask16)(U), (const int)(R)))
74
75#define _mm256_maskz_cvtx_round2ps_ph(U, A, B, R) \
76 ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \
77 (__v8sf)(A), (__v8sf)(B), (__v16hf)(_mm256_setzero_ph()), \
78 (__mmask16)(U), (const int)(R)))
79
80static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiasph_bf8(__m128i __A,
81 __m128h __B) {
82 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask(
83 (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1);
84}
85
86static __inline__ __m128i __DEFAULT_FN_ATTRS128
87_mm_mask_cvtbiasph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) {
88 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask(
89 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U);
90}
91
92static __inline__ __m128i __DEFAULT_FN_ATTRS128
93_mm_maskz_cvtbiasph_bf8(__mmask8 __U, __m128i __A, __m128h __B) {
94 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask(
95 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
96 (__mmask8)__U);
97}
98
99static __inline__ __m128i __DEFAULT_FN_ATTRS256
100_mm256_cvtbiasph_bf8(__m256i __A, __m256h __B) {
101 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask(
102 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(),
103 (__mmask16)-1);
104}
105
106static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_bf8(
107 __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) {
108 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask(
109 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U);
110}
111
112static __inline__ __m128i __DEFAULT_FN_ATTRS256
113_mm256_maskz_cvtbiasph_bf8(__mmask16 __U, __m256i __A, __m256h __B) {
114 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask(
115 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
116 (__mmask16)__U);
117}
118
119static __inline__ __m128i __DEFAULT_FN_ATTRS128
120_mm_cvtbiassph_bf8(__m128i __A, __m128h __B) {
121 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
122 (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1);
123}
124
125static __inline__ __m128i __DEFAULT_FN_ATTRS128
126_mm_mask_cvtbiassph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) {
127 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
128 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U);
129}
130
131static __inline__ __m128i __DEFAULT_FN_ATTRS128
132_mm_maskz_cvtbiassph_bf8(__mmask8 __U, __m128i __A, __m128h __B) {
133 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
134 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
135 (__mmask8)__U);
136}
137
138static __inline__ __m128i __DEFAULT_FN_ATTRS256
139_mm256_cvtbiassph_bf8(__m256i __A, __m256h __B) {
140 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
141 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(),
142 (__mmask16)-1);
143}
144
145static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_bf8(
146 __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) {
147 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
148 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U);
149}
150
151static __inline__ __m128i __DEFAULT_FN_ATTRS256
152_mm256_maskz_cvtbiassph_bf8(__mmask16 __U, __m256i __A, __m256h __B) {
153 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
154 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
155 (__mmask16)__U);
156}
157
158static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiasph_hf8(__m128i __A,
159 __m128h __B) {
160 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask(
161 (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1);
162}
163
164static __inline__ __m128i __DEFAULT_FN_ATTRS128
165_mm_mask_cvtbiasph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) {
166 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask(
167 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U);
168}
169
170static __inline__ __m128i __DEFAULT_FN_ATTRS128
171_mm_maskz_cvtbiasph_hf8(__mmask8 __U, __m128i __A, __m128h __B) {
172 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask(
173 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
174 (__mmask8)__U);
175}
176
177static __inline__ __m128i __DEFAULT_FN_ATTRS256
178_mm256_cvtbiasph_hf8(__m256i __A, __m256h __B) {
179 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask(
180 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(),
181 (__mmask16)-1);
182}
183
184static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_hf8(
185 __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) {
186 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask(
187 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U);
188}
189
190static __inline__ __m128i __DEFAULT_FN_ATTRS256
191_mm256_maskz_cvtbiasph_hf8(__mmask16 __U, __m256i __A, __m256h __B) {
192 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask(
193 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
194 (__mmask16)__U);
195}
196
197static __inline__ __m128i __DEFAULT_FN_ATTRS128
198_mm_cvtbiassph_hf8(__m128i __A, __m128h __B) {
199 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
200 (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1);
201}
202
203static __inline__ __m128i __DEFAULT_FN_ATTRS128
204_mm_mask_cvtbiassph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) {
205 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
206 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U);
207}
208
209static __inline__ __m128i __DEFAULT_FN_ATTRS128
210_mm_maskz_cvtbiassph_hf8(__mmask8 __U, __m128i __A, __m128h __B) {
211 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
212 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
213 (__mmask8)__U);
214}
215
216static __inline__ __m128i __DEFAULT_FN_ATTRS256
217_mm256_cvtbiassph_hf8(__m256i __A, __m256h __B) {
218 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
219 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(),
220 (__mmask16)-1);
221}
222
223static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_hf8(
224 __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) {
225 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
226 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U);
227}
228
229static __inline__ __m128i __DEFAULT_FN_ATTRS256
230_mm256_maskz_cvtbiassph_hf8(__mmask16 __U, __m256i __A, __m256h __B) {
231 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
232 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
233 (__mmask16)__U);
234}
235
236static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvt2ph_bf8(__m128h __A,
237 __m128h __B) {
238 return (__m128i)__builtin_ia32_vcvt2ph2bf8_128((__v8hf)(__A), (__v8hf)(__B));
239}
240
241static __inline__ __m128i __DEFAULT_FN_ATTRS128
242_mm_mask_cvt2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) {
243 return (__m128i)__builtin_ia32_selectb_128(
244 (__mmask16)__U, (__v16qi)_mm_cvt2ph_bf8(__A, __B), (__v16qi)__W);
245}
246
247static __inline__ __m128i __DEFAULT_FN_ATTRS128
248_mm_maskz_cvt2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) {
249 return (__m128i)__builtin_ia32_selectb_128(
250 (__mmask16)__U, (__v16qi)_mm_cvt2ph_bf8(__A, __B),
251 (__v16qi)(__m128i)_mm_setzero_si128());
252}
253
254static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvt2ph_bf8(__m256h __A,
255 __m256h __B) {
256 return (__m256i)__builtin_ia32_vcvt2ph2bf8_256((__v16hf)(__A),
257 (__v16hf)(__B));
258}
259
260static __inline__ __m256i __DEFAULT_FN_ATTRS256
261_mm256_mask_cvt2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
262 return (__m256i)__builtin_ia32_selectb_256(
263 (__mmask16)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B), (__v32qi)__W);
264}
265
266static __inline__ __m256i __DEFAULT_FN_ATTRS256
267_mm256_maskz_cvt2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) {
268 return (__m256i)__builtin_ia32_selectb_256(
269 (__mmask16)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B),
270 (__v32qi)(__m256i)_mm256_setzero_si256());
271}
272
273static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts2ph_bf8(__m128h __A,
274 __m128h __B) {
275 return (__m128i)__builtin_ia32_vcvt2ph2bf8s_128((__v8hf)(__A), (__v8hf)(__B));
276}
277
278static __inline__ __m128i __DEFAULT_FN_ATTRS128
279_mm_mask_cvts2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) {
280 return (__m128i)__builtin_ia32_selectb_128(
281 (__mmask16)__U, (__v16qi)_mm_cvts2ph_bf8(__A, __B), (__v16qi)__W);
282}
283
284static __inline__ __m128i __DEFAULT_FN_ATTRS128
285_mm_maskz_cvts2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) {
286 return (__m128i)__builtin_ia32_selectb_128(
287 (__mmask16)__U, (__v16qi)_mm_cvts2ph_bf8(__A, __B),
288 (__v16qi)(__m128i)_mm_setzero_si128());
289}
290
291static __inline__ __m256i __DEFAULT_FN_ATTRS256
292_mm256_cvts2ph_bf8(__m256h __A, __m256h __B) {
293 return (__m256i)__builtin_ia32_vcvt2ph2bf8s_256((__v16hf)(__A),
294 (__v16hf)(__B));
295}
296
297static __inline__ __m256i __DEFAULT_FN_ATTRS256
298_mm256_mask_cvts2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
299 return (__m256i)__builtin_ia32_selectb_256(
300 (__mmask16)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B), (__v32qi)__W);
301}
302
303static __inline__ __m256i __DEFAULT_FN_ATTRS256
304_mm256_maskz_cvts2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) {
305 return (__m256i)__builtin_ia32_selectb_256(
306 (__mmask16)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B),
307 (__v32qi)(__m256i)_mm256_setzero_si256());
308}
309
310static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvt2ph_hf8(__m128h __A,
311 __m128h __B) {
312 return (__m128i)__builtin_ia32_vcvt2ph2hf8_128((__v8hf)(__A), (__v8hf)(__B));
313}
314
315static __inline__ __m128i __DEFAULT_FN_ATTRS128
316_mm_mask_cvt2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) {
317 return (__m128i)__builtin_ia32_selectb_128(
318 (__mmask16)__U, (__v16qi)_mm_cvt2ph_hf8(__A, __B), (__v16qi)__W);
319}
320
321static __inline__ __m128i __DEFAULT_FN_ATTRS128
322_mm_maskz_cvt2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) {
323 return (__m128i)__builtin_ia32_selectb_128(
324 (__mmask16)__U, (__v16qi)_mm_cvt2ph_hf8(__A, __B),
325 (__v16qi)(__m128i)_mm_setzero_si128());
326}
327
328static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvt2ph_hf8(__m256h __A,
329 __m256h __B) {
330 return (__m256i)__builtin_ia32_vcvt2ph2hf8_256((__v16hf)(__A),
331 (__v16hf)(__B));
332}
333
334static __inline__ __m256i __DEFAULT_FN_ATTRS256
335_mm256_mask_cvt2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
336 return (__m256i)__builtin_ia32_selectb_256(
337 (__mmask16)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B), (__v32qi)__W);
338}
339
340static __inline__ __m256i __DEFAULT_FN_ATTRS256
341_mm256_maskz_cvt2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) {
342 return (__m256i)__builtin_ia32_selectb_256(
343 (__mmask16)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B),
344 (__v32qi)(__m256i)_mm256_setzero_si256());
345}
346
347static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts2ph_hf8(__m128h __A,
348 __m128h __B) {
349 return (__m128i)__builtin_ia32_vcvt2ph2hf8s_128((__v8hf)(__A), (__v8hf)(__B));
350}
351
352static __inline__ __m128i __DEFAULT_FN_ATTRS128
353_mm_mask_cvts2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) {
354 return (__m128i)__builtin_ia32_selectb_128(
355 (__mmask16)__U, (__v16qi)_mm_cvts2ph_hf8(__A, __B), (__v16qi)__W);
356}
357
358static __inline__ __m128i __DEFAULT_FN_ATTRS128
359_mm_maskz_cvts2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) {
360 return (__m128i)__builtin_ia32_selectb_128(
361 (__mmask16)__U, (__v16qi)_mm_cvts2ph_hf8(__A, __B),
362 (__v16qi)(__m128i)_mm_setzero_si128());
363}
364
365static __inline__ __m256i __DEFAULT_FN_ATTRS256
366_mm256_cvts2ph_hf8(__m256h __A, __m256h __B) {
367 return (__m256i)__builtin_ia32_vcvt2ph2hf8s_256((__v16hf)(__A),
368 (__v16hf)(__B));
369}
370
371static __inline__ __m256i __DEFAULT_FN_ATTRS256
372_mm256_mask_cvts2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
373 return (__m256i)__builtin_ia32_selectb_256(
374 (__mmask16)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B), (__v32qi)__W);
375}
376
377static __inline__ __m256i __DEFAULT_FN_ATTRS256
378_mm256_maskz_cvts2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) {
379 return (__m256i)__builtin_ia32_selectb_256(
380 (__mmask16)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B),
381 (__v32qi)(__m256i)_mm256_setzero_si256());
382}
383
384static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvthf8(__m128i __A) {
385 return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask(
386 (__v16qi)__A, (__v8hf)(__m128h)_mm_undefined_ph(), (__mmask8)-1);
387}
388
389static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvthf8(__m128h __W,
390 __mmask8 __U,
391 __m128i __A) {
392 return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask(
393 (__v16qi)__A, (__v8hf)(__m128h)__W, (__mmask8)__U);
394}
395
396static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvthf8(__mmask8 __U,
397 __m128i __A) {
398 return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask(
399 (__v16qi)__A, (__v8hf)(__m128h)_mm_setzero_ph(), (__mmask8)__U);
400}
401
402static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvthf8(__m128i __A) {
403 return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask(
404 (__v16qi)__A, (__v16hf)(__m256h)_mm256_undefined_ph(), (__mmask16)-1);
405}
406
407static __inline__ __m256h __DEFAULT_FN_ATTRS256
408_mm256_mask_cvthf8(__m256h __W, __mmask16 __U, __m128i __A) {
409 return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask(
410 (__v16qi)__A, (__v16hf)(__m256h)__W, (__mmask16)__U);
411}
412
413static __inline__ __m256h __DEFAULT_FN_ATTRS256
414_mm256_maskz_cvthf8(__mmask16 __U, __m128i __A) {
415 return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask(
416 (__v16qi)__A, (__v16hf)(__m256h)_mm256_setzero_ph(), (__mmask16)__U);
417}
418
419static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_bf8(__m128h __A) {
420 return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask(
421 (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1);
422}
423
424static __inline__ __m128i __DEFAULT_FN_ATTRS128
425_mm_mask_cvtph_bf8(__m128i __W, __mmask8 __U, __m128h __A) {
426 return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask(
427 (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U);
428}
429
430static __inline__ __m128i __DEFAULT_FN_ATTRS128
431_mm_maskz_cvtph_bf8(__mmask8 __U, __m128h __A) {
432 return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask(
433 (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U);
434}
435
436static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtph_bf8(__m256h __A) {
437 return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask(
438 (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1);
439}
440
441static __inline__ __m128i __DEFAULT_FN_ATTRS256
442_mm256_mask_cvtph_bf8(__m128i __W, __mmask16 __U, __m256h __A) {
443 return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask(
444 (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U);
445}
446
447static __inline__ __m128i __DEFAULT_FN_ATTRS256
448_mm256_maskz_cvtph_bf8(__mmask16 __U, __m256h __A) {
449 return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask(
450 (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U);
451}
452
453static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsph_bf8(__m128h __A) {
454 return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask(
455 (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1);
456}
457
458static __inline__ __m128i __DEFAULT_FN_ATTRS128
459_mm_mask_cvtsph_bf8(__m128i __W, __mmask8 __U, __m128h __A) {
460 return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask(
461 (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U);
462}
463
464static __inline__ __m128i __DEFAULT_FN_ATTRS128
465_mm_maskz_cvtsph_bf8(__mmask8 __U, __m128h __A) {
466 return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask(
467 (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U);
468}
469
470static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsph_bf8(__m256h __A) {
471 return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask(
472 (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1);
473}
474
475static __inline__ __m128i __DEFAULT_FN_ATTRS256
476_mm256_mask_cvtsph_bf8(__m128i __W, __mmask16 __U, __m256h __A) {
477 return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask(
478 (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U);
479}
480
481static __inline__ __m128i __DEFAULT_FN_ATTRS256
482_mm256_maskz_cvtsph_bf8(__mmask16 __U, __m256h __A) {
483 return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask(
484 (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U);
485}
486
487static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_hf8(__m128h __A) {
488 return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask(
489 (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1);
490}
491
492static __inline__ __m128i __DEFAULT_FN_ATTRS128
493_mm_mask_cvtph_hf8(__m128i __W, __mmask8 __U, __m128h __A) {
494 return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask(
495 (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U);
496}
497
498static __inline__ __m128i __DEFAULT_FN_ATTRS128
499_mm_maskz_cvtph_hf8(__mmask8 __U, __m128h __A) {
500 return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask(
501 (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U);
502}
503
504static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtph_hf8(__m256h __A) {
505 return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask(
506 (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1);
507}
508
509static __inline__ __m128i __DEFAULT_FN_ATTRS256
510_mm256_mask_cvtph_hf8(__m128i __W, __mmask16 __U, __m256h __A) {
511 return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask(
512 (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U);
513}
514
515static __inline__ __m128i __DEFAULT_FN_ATTRS256
516_mm256_maskz_cvtph_hf8(__mmask16 __U, __m256h __A) {
517 return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask(
518 (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U);
519}
520
521static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsph_hf8(__m128h __A) {
522 return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask(
523 (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1);
524}
525
526static __inline__ __m128i __DEFAULT_FN_ATTRS128
527_mm_mask_cvtsph_hf8(__m128i __W, __mmask8 __U, __m128h __A) {
528 return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask(
529 (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U);
530}
531
532static __inline__ __m128i __DEFAULT_FN_ATTRS128
533_mm_maskz_cvtsph_hf8(__mmask8 __U, __m128h __A) {
534 return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask(
535 (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U);
536}
537
538static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsph_hf8(__m256h __A) {
539 return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask(
540 (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1);
541}
542
543static __inline__ __m128i __DEFAULT_FN_ATTRS256
544_mm256_mask_cvtsph_hf8(__m128i __W, __mmask16 __U, __m256h __A) {
545 return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask(
546 (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U);
547}
548
549static __inline__ __m128i __DEFAULT_FN_ATTRS256
550_mm256_maskz_cvtsph_hf8(__mmask16 __U, __m256h __A) {
551 return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask(
552 (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U);
553}
554
555static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtbf8_ph(__m128i __A) {
556 return _mm_castsi128_ph(_mm_slli_epi16(_mm_cvtepi8_epi16(__A), 8));
557}
558
559static __inline__ __m128h __DEFAULT_FN_ATTRS128
560_mm_mask_cvtbf8_ph(__m128h __S, __mmask8 __U, __m128i __A) {
561 return _mm_castsi128_ph(
562 _mm_mask_slli_epi16((__m128i)__S, __U, _mm_cvtepi8_epi16(__A), 8));
563}
564
565static __inline__ __m128h __DEFAULT_FN_ATTRS128
566_mm_maskz_cvtbf8_ph(__mmask8 __U, __m128i __A) {
567 return _mm_castsi128_ph(_mm_slli_epi16(_mm_maskz_cvtepi8_epi16(__U, __A), 8));
568}
569
570static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtbf8_ph(__m128i __A) {
571 return _mm256_castsi256_ph(_mm256_slli_epi16(_mm256_cvtepi8_epi16(__A), 8));
572}
573
574static __inline__ __m256h __DEFAULT_FN_ATTRS256
575_mm256_mask_cvtbf8_ph(__m256h __S, __mmask16 __U, __m128i __A) {
576 return _mm256_castsi256_ph(
577 _mm256_mask_slli_epi16((__m256i)__S, __U, _mm256_cvtepi8_epi16(__A), 8));
578}
579
580static __inline__ __m256h __DEFAULT_FN_ATTRS256
581_mm256_maskz_cvtbf8_ph(__mmask16 __U, __m128i __A) {
582 return _mm256_castsi256_ph(
584}
585
586#undef __DEFAULT_FN_ATTRS128
587#undef __DEFAULT_FN_ATTRS256
588
589#endif // __AVX10_2CONVERTINTRIN_H
590#endif // __SSE2__
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi16(__m128i __V)
Sign-extends bytes from the 128-bit integer vector in __V and returns the 16-bit values in the corres...
Definition: avx2intrin.h:1367
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi16(__m256i __a, int __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a left by __count bits,...
Definition: avx2intrin.h:2138
unsigned int __mmask32
unsigned char __mmask8
Definition: avx512fintrin.h:41
unsigned short __mmask16
Definition: avx512fintrin.h:42
#define _MM_FROUND_CUR_DIRECTION
Definition: avx512fintrin.h:49
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A)
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition: avxintrin.h:4366
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition: emmintrin.h:3880
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition: emmintrin.h:3495
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, int __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2774
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V)
Sign-extends each of the lower eight 8-bit integer elements of a 128-bit vector of [16 x i8] to 16-bi...
Definition: smmintrin.h:1227