clang 22.0.0git
avx10_2satcvtintrin.h
Go to the documentation of this file.
1/*===----------- avx10_2satcvtintrin.h - AVX10_2SATCVT intrinsics ----------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error \
11 "Never use <avx10_2satcvtintrin.h> directly; include <immintrin.h> instead."
12#endif // __IMMINTRIN_H
13
14#ifndef __AVX10_2SATCVTINTRIN_H
15#define __AVX10_2SATCVTINTRIN_H
16
17#define _mm_ipcvts_bf16_epi8(A) \
18 ((__m128i)__builtin_ia32_vcvtbf162ibs128((__v8bf)(__m128bh)(A)))
19
20#define _mm_mask_ipcvts_bf16_epi8(W, U, A) \
21 ((__m128i)__builtin_ia32_selectw_128( \
22 (__mmask8)(U), (__v8hi)_mm_ipcvts_bf16_epi8(A), (__v8hi)(__m128i)(W)))
23
24#define _mm_maskz_ipcvts_bf16_epi8(U, A) \
25 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
26 (__v8hi)_mm_ipcvts_bf16_epi8(A), \
27 (__v8hi)_mm_setzero_si128()))
28
29#define _mm256_ipcvts_bf16_epi8(A) \
30 ((__m256i)__builtin_ia32_vcvtbf162ibs256((__v16bf)(__m256bh)(A)))
31
32#define _mm256_mask_ipcvts_bf16_epi8(W, U, A) \
33 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
34 (__v16hi)_mm256_ipcvts_bf16_epi8(A), \
35 (__v16hi)(__m256i)(W)))
36
37#define _mm256_maskz_ipcvts_bf16_epi8(U, A) \
38 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
39 (__v16hi)_mm256_ipcvts_bf16_epi8(A), \
40 (__v16hi)_mm256_setzero_si256()))
41
42#define _mm_ipcvts_bf16_epu8(A) \
43 ((__m128i)__builtin_ia32_vcvtbf162iubs128((__v8bf)(__m128bh)(A)))
44
45#define _mm_mask_ipcvts_bf16_epu8(W, U, A) \
46 ((__m128i)__builtin_ia32_selectw_128( \
47 (__mmask8)(U), (__v8hi)_mm_ipcvts_bf16_epu8(A), (__v8hi)(__m128i)(W)))
48
49#define _mm_maskz_ipcvts_bf16_epu8(U, A) \
50 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
51 (__v8hi)_mm_ipcvts_bf16_epu8(A), \
52 (__v8hi)_mm_setzero_si128()))
53
54#define _mm256_ipcvts_bf16_epu8(A) \
55 ((__m256i)__builtin_ia32_vcvtbf162iubs256((__v16bf)(__m256bh)(A)))
56
57#define _mm256_mask_ipcvts_bf16_epu8(W, U, A) \
58 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
59 (__v16hi)_mm256_ipcvts_bf16_epu8(A), \
60 (__v16hi)(__m256i)(W)))
61
62#define _mm256_maskz_ipcvts_bf16_epu8(U, A) \
63 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
64 (__v16hi)_mm256_ipcvts_bf16_epu8(A), \
65 (__v16hi)_mm256_setzero_si256()))
66
67#define _mm_ipcvts_ph_epi8(A) \
68 ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \
69 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
70
71#define _mm_mask_ipcvts_ph_epi8(W, U, A) \
72 ((__m128i)__builtin_ia32_vcvtph2ibs128_mask((__v8hf)(__m128h)(A), \
73 (__v8hu)(W), (__mmask8)(U)))
74
75#define _mm_maskz_ipcvts_ph_epi8(U, A) \
76 ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \
77 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
78
79#define _mm256_ipcvts_ph_epi8(A) \
80 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \
81 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1))
82
83#define _mm256_mask_ipcvts_ph_epi8(W, U, A) \
84 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \
85 (__v16hu)(W), (__mmask16)(U)))
86
87#define _mm256_maskz_ipcvts_ph_epi8(U, A) \
88 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \
89 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
90 (__mmask16)(U)))
91
92#define _mm_ipcvts_ph_epu8(A) \
93 ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \
94 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
95
96#define _mm_mask_ipcvts_ph_epu8(W, U, A) \
97 ((__m128i)__builtin_ia32_vcvtph2iubs128_mask((__v8hf)(__m128h)(A), \
98 (__v8hu)(W), (__mmask8)(U)))
99
100#define _mm_maskz_ipcvts_ph_epu8(U, A) \
101 ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \
102 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
103
104#define _mm256_ipcvts_ph_epu8(A) \
105 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
106 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1))
107
108#define _mm256_mask_ipcvts_ph_epu8(W, U, A) \
109 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask((__v16hf)(__m256h)(A), \
110 (__v16hu)(W), (__mmask16)(U)))
111
112#define _mm256_maskz_ipcvts_ph_epu8(U, A) \
113 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
114 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
115 (__mmask16)(U)))
116
117#define _mm_ipcvts_ps_epi8(A) \
118 ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \
119 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
120
121#define _mm_mask_ipcvts_ps_epi8(W, U, A) \
122 ((__m128i)__builtin_ia32_vcvtps2ibs128_mask((__v4sf)(__m128)(A), \
123 (__v4su)(W), (__mmask8)(U)))
124
125#define _mm_maskz_ipcvts_ps_epi8(U, A) \
126 ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \
127 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
128
129#define _mm256_ipcvts_ps_epi8(A) \
130 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \
131 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1))
132
133#define _mm256_mask_ipcvts_ps_epi8(W, U, A) \
134 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \
135 (__v8su)(W), (__mmask8)(U)))
136
137#define _mm256_maskz_ipcvts_ps_epi8(U, A) \
138 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \
139 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U)))
140
141#define _mm_ipcvts_ps_epu8(A) \
142 ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \
143 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
144
145#define _mm_mask_ipcvts_ps_epu8(W, U, A) \
146 ((__m128i)__builtin_ia32_vcvtps2iubs128_mask((__v4sf)(__m128)(A), \
147 (__v4su)(W), (__mmask8)(U)))
148
149#define _mm_maskz_ipcvts_ps_epu8(U, A) \
150 ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \
151 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
152
153#define _mm256_ipcvts_ps_epu8(A) \
154 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \
155 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1))
156
157#define _mm256_mask_ipcvts_ps_epu8(W, U, A) \
158 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \
159 (__v8su)(W), (__mmask8)(U)))
160
161#define _mm256_maskz_ipcvts_ps_epu8(U, A) \
162 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \
163 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U)))
164
165#define _mm_ipcvtts_bf16_epi8(A) \
166 ((__m128i)__builtin_ia32_vcvttbf162ibs128((__v8bf)(__m128bh)(A)))
167
168#define _mm_mask_ipcvtts_bf16_epi8(W, U, A) \
169 ((__m128i)__builtin_ia32_selectw_128( \
170 (__mmask8)(U), (__v8hi)_mm_ipcvtts_bf16_epi8(A), (__v8hi)(__m128i)(W)))
171
172#define _mm_maskz_ipcvtts_bf16_epi8(U, A) \
173 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
174 (__v8hi)_mm_ipcvtts_bf16_epi8(A), \
175 (__v8hi)_mm_setzero_si128()))
176
177#define _mm256_ipcvtts_bf16_epi8(A) \
178 ((__m256i)__builtin_ia32_vcvttbf162ibs256((__v16bf)(__m256bh)(A)))
179
180#define _mm256_mask_ipcvtts_bf16_epi8(W, U, A) \
181 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
182 (__v16hi)_mm256_ipcvtts_bf16_epi8(A), \
183 (__v16hi)(__m256i)(W)))
184
185#define _mm256_maskz_ipcvtts_bf16_epi8(U, A) \
186 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
187 (__v16hi)_mm256_ipcvtts_bf16_epi8(A), \
188 (__v16hi)_mm256_setzero_si256()))
189
190#define _mm_ipcvtts_bf16_epu8(A) \
191 ((__m128i)__builtin_ia32_vcvttbf162iubs128((__v8bf)(__m128bh)(A)))
192
193#define _mm_mask_ipcvtts_bf16_epu8(W, U, A) \
194 ((__m128i)__builtin_ia32_selectw_128( \
195 (__mmask8)(U), (__v8hi)_mm_ipcvtts_bf16_epu8(A), (__v8hi)(__m128i)(W)))
196
197#define _mm_maskz_ipcvtts_bf16_epu8(U, A) \
198 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
199 (__v8hi)_mm_ipcvtts_bf16_epu8(A), \
200 (__v8hi)_mm_setzero_si128()))
201
202#define _mm256_ipcvtts_bf16_epu8(A) \
203 ((__m256i)__builtin_ia32_vcvttbf162iubs256((__v16bf)(__m256bh)(A)))
204
205#define _mm256_mask_ipcvtts_bf16_epu8(W, U, A) \
206 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
207 (__v16hi)_mm256_ipcvtts_bf16_epu8(A), \
208 (__v16hi)(__m256i)(W)))
209
210#define _mm256_maskz_ipcvtts_bf16_epu8(U, A) \
211 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
212 (__v16hi)_mm256_ipcvtts_bf16_epu8(A), \
213 (__v16hi)_mm256_setzero_si256()))
214
215#define _mm_ipcvtts_ph_epi8(A) \
216 ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \
217 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
218
219#define _mm_mask_ipcvtts_ph_epi8(W, U, A) \
220 ((__m128i)__builtin_ia32_vcvttph2ibs128_mask((__v8hf)(__m128h)(A), \
221 (__v8hu)(W), (__mmask8)(U)))
222
223#define _mm_maskz_ipcvtts_ph_epi8(U, A) \
224 ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \
225 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
226
227#define _mm256_ipcvtts_ph_epi8(A) \
228 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
229 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1))
230
231#define _mm256_mask_ipcvtts_ph_epi8(W, U, A) \
232 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask((__v16hf)(__m256h)(A), \
233 (__v16hu)(W), (__mmask16)(U)))
234
235#define _mm256_maskz_ipcvtts_ph_epi8(U, A) \
236 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
237 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
238 (__mmask16)(U)))
239
240#define _mm_ipcvtts_ph_epu8(A) \
241 ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \
242 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
243
244#define _mm_mask_ipcvtts_ph_epu8(W, U, A) \
245 ((__m128i)__builtin_ia32_vcvttph2iubs128_mask((__v8hf)(__m128h)(A), \
246 (__v8hu)(W), (__mmask8)(U)))
247
248#define _mm_maskz_ipcvtts_ph_epu8(U, A) \
249 ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \
250 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
251
252#define _mm256_ipcvtts_ph_epu8(A) \
253 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
254 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1))
255
256#define _mm256_mask_ipcvtts_ph_epu8(W, U, A) \
257 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask((__v16hf)(__m256h)(A), \
258 (__v16hu)(W), (__mmask16)(U)))
259
260#define _mm256_maskz_ipcvtts_ph_epu8(U, A) \
261 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
262 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
263 (__mmask16)(U)))
264
265#define _mm_ipcvtts_ps_epi8(A) \
266 ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \
267 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
268
269#define _mm_mask_ipcvtts_ps_epi8(W, U, A) \
270 ((__m128i)__builtin_ia32_vcvttps2ibs128_mask((__v4sf)(__m128)(A), \
271 (__v4su)(W), (__mmask8)(U)))
272
273#define _mm_maskz_ipcvtts_ps_epi8(U, A) \
274 ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \
275 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
276
277#define _mm256_ipcvtts_ps_epi8(A) \
278 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \
279 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1))
280
281#define _mm256_mask_ipcvtts_ps_epi8(W, U, A) \
282 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \
283 (__v8su)(W), (__mmask8)(U)))
284
285#define _mm256_maskz_ipcvtts_ps_epi8(U, A) \
286 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \
287 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U)))
288
289#define _mm_ipcvtts_ps_epu8(A) \
290 ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \
291 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
292
293#define _mm_mask_ipcvtts_ps_epu8(W, U, A) \
294 ((__m128i)__builtin_ia32_vcvttps2iubs128_mask((__v4sf)(__m128)(A), \
295 (__v4su)(W), (__mmask8)(U)))
296
297#define _mm_maskz_ipcvtts_ps_epu8(U, A) \
298 ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \
299 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
300
301#define _mm256_ipcvtts_ps_epu8(A) \
302 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
303 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1))
304
305#define _mm256_mask_ipcvtts_ps_epu8(W, U, A) \
306 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask((__v8sf)(__m256)(A), \
307 (__v8su)(W), (__mmask8)(U)))
308
309#define _mm256_maskz_ipcvtts_ps_epu8(U, A) \
310 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
311 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U)))
312#endif // __AVX10_2SATCVTINTRIN_H