Go to the documentation of this file.
11 "Never use <avx10_2satcvtintrin.h> directly; include <immintrin.h> instead."
14#ifndef __AVX10_2SATCVTINTRIN_H
15#define __AVX10_2SATCVTINTRIN_H
17#define _mm_ipcvts_bf16_epi8(A) \
18 ((__m128i)__builtin_ia32_vcvtbf162ibs128((__v8bf)(__m128bh)(A)))
20#define _mm_mask_ipcvts_bf16_epi8(W, U, A) \
21 ((__m128i)__builtin_ia32_selectw_128( \
22 (__mmask8)(U), (__v8hi)_mm_ipcvts_bf16_epi8(A), (__v8hi)(__m128i)(W)))
24#define _mm_maskz_ipcvts_bf16_epi8(U, A) \
25 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
26 (__v8hi)_mm_ipcvts_bf16_epi8(A), \
27 (__v8hi)_mm_setzero_si128()))
29#define _mm256_ipcvts_bf16_epi8(A) \
30 ((__m256i)__builtin_ia32_vcvtbf162ibs256((__v16bf)(__m256bh)(A)))
32#define _mm256_mask_ipcvts_bf16_epi8(W, U, A) \
33 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
34 (__v16hi)_mm256_ipcvts_bf16_epi8(A), \
35 (__v16hi)(__m256i)(W)))
37#define _mm256_maskz_ipcvts_bf16_epi8(U, A) \
38 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
39 (__v16hi)_mm256_ipcvts_bf16_epi8(A), \
40 (__v16hi)_mm256_setzero_si256()))
42#define _mm_ipcvts_bf16_epu8(A) \
43 ((__m128i)__builtin_ia32_vcvtbf162iubs128((__v8bf)(__m128bh)(A)))
45#define _mm_mask_ipcvts_bf16_epu8(W, U, A) \
46 ((__m128i)__builtin_ia32_selectw_128( \
47 (__mmask8)(U), (__v8hi)_mm_ipcvts_bf16_epu8(A), (__v8hi)(__m128i)(W)))
49#define _mm_maskz_ipcvts_bf16_epu8(U, A) \
50 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
51 (__v8hi)_mm_ipcvts_bf16_epu8(A), \
52 (__v8hi)_mm_setzero_si128()))
54#define _mm256_ipcvts_bf16_epu8(A) \
55 ((__m256i)__builtin_ia32_vcvtbf162iubs256((__v16bf)(__m256bh)(A)))
57#define _mm256_mask_ipcvts_bf16_epu8(W, U, A) \
58 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
59 (__v16hi)_mm256_ipcvts_bf16_epu8(A), \
60 (__v16hi)(__m256i)(W)))
62#define _mm256_maskz_ipcvts_bf16_epu8(U, A) \
63 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
64 (__v16hi)_mm256_ipcvts_bf16_epu8(A), \
65 (__v16hi)_mm256_setzero_si256()))
67#define _mm_ipcvts_ph_epi8(A) \
68 ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \
69 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
71#define _mm_mask_ipcvts_ph_epi8(W, U, A) \
72 ((__m128i)__builtin_ia32_vcvtph2ibs128_mask((__v8hf)(__m128h)(A), \
73 (__v8hu)(W), (__mmask8)(U)))
75#define _mm_maskz_ipcvts_ph_epi8(U, A) \
76 ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \
77 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
79#define _mm256_ipcvts_ph_epi8(A) \
80 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \
81 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1))
83#define _mm256_mask_ipcvts_ph_epi8(W, U, A) \
84 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \
85 (__v16hu)(W), (__mmask16)(U)))
87#define _mm256_maskz_ipcvts_ph_epi8(U, A) \
88 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \
89 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
92#define _mm_ipcvts_ph_epu8(A) \
93 ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \
94 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
96#define _mm_mask_ipcvts_ph_epu8(W, U, A) \
97 ((__m128i)__builtin_ia32_vcvtph2iubs128_mask((__v8hf)(__m128h)(A), \
98 (__v8hu)(W), (__mmask8)(U)))
100#define _mm_maskz_ipcvts_ph_epu8(U, A) \
101 ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \
102 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
104#define _mm256_ipcvts_ph_epu8(A) \
105 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
106 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1))
108#define _mm256_mask_ipcvts_ph_epu8(W, U, A) \
109 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask((__v16hf)(__m256h)(A), \
110 (__v16hu)(W), (__mmask16)(U)))
112#define _mm256_maskz_ipcvts_ph_epu8(U, A) \
113 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
114 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
117#define _mm_ipcvts_ps_epi8(A) \
118 ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \
119 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
121#define _mm_mask_ipcvts_ps_epi8(W, U, A) \
122 ((__m128i)__builtin_ia32_vcvtps2ibs128_mask((__v4sf)(__m128)(A), \
123 (__v4su)(W), (__mmask8)(U)))
125#define _mm_maskz_ipcvts_ps_epi8(U, A) \
126 ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \
127 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
129#define _mm256_ipcvts_ps_epi8(A) \
130 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \
131 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1))
133#define _mm256_mask_ipcvts_ps_epi8(W, U, A) \
134 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \
135 (__v8su)(W), (__mmask8)(U)))
137#define _mm256_maskz_ipcvts_ps_epi8(U, A) \
138 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \
139 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U)))
141#define _mm_ipcvts_ps_epu8(A) \
142 ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \
143 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
145#define _mm_mask_ipcvts_ps_epu8(W, U, A) \
146 ((__m128i)__builtin_ia32_vcvtps2iubs128_mask((__v4sf)(__m128)(A), \
147 (__v4su)(W), (__mmask8)(U)))
149#define _mm_maskz_ipcvts_ps_epu8(U, A) \
150 ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \
151 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
153#define _mm256_ipcvts_ps_epu8(A) \
154 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \
155 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1))
157#define _mm256_mask_ipcvts_ps_epu8(W, U, A) \
158 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \
159 (__v8su)(W), (__mmask8)(U)))
161#define _mm256_maskz_ipcvts_ps_epu8(U, A) \
162 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \
163 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U)))
165#define _mm_ipcvtts_bf16_epi8(A) \
166 ((__m128i)__builtin_ia32_vcvttbf162ibs128((__v8bf)(__m128bh)(A)))
168#define _mm_mask_ipcvtts_bf16_epi8(W, U, A) \
169 ((__m128i)__builtin_ia32_selectw_128( \
170 (__mmask8)(U), (__v8hi)_mm_ipcvtts_bf16_epi8(A), (__v8hi)(__m128i)(W)))
172#define _mm_maskz_ipcvtts_bf16_epi8(U, A) \
173 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
174 (__v8hi)_mm_ipcvtts_bf16_epi8(A), \
175 (__v8hi)_mm_setzero_si128()))
177#define _mm256_ipcvtts_bf16_epi8(A) \
178 ((__m256i)__builtin_ia32_vcvttbf162ibs256((__v16bf)(__m256bh)(A)))
180#define _mm256_mask_ipcvtts_bf16_epi8(W, U, A) \
181 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
182 (__v16hi)_mm256_ipcvtts_bf16_epi8(A), \
183 (__v16hi)(__m256i)(W)))
185#define _mm256_maskz_ipcvtts_bf16_epi8(U, A) \
186 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
187 (__v16hi)_mm256_ipcvtts_bf16_epi8(A), \
188 (__v16hi)_mm256_setzero_si256()))
190#define _mm_ipcvtts_bf16_epu8(A) \
191 ((__m128i)__builtin_ia32_vcvttbf162iubs128((__v8bf)(__m128bh)(A)))
193#define _mm_mask_ipcvtts_bf16_epu8(W, U, A) \
194 ((__m128i)__builtin_ia32_selectw_128( \
195 (__mmask8)(U), (__v8hi)_mm_ipcvtts_bf16_epu8(A), (__v8hi)(__m128i)(W)))
197#define _mm_maskz_ipcvtts_bf16_epu8(U, A) \
198 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
199 (__v8hi)_mm_ipcvtts_bf16_epu8(A), \
200 (__v8hi)_mm_setzero_si128()))
202#define _mm256_ipcvtts_bf16_epu8(A) \
203 ((__m256i)__builtin_ia32_vcvttbf162iubs256((__v16bf)(__m256bh)(A)))
205#define _mm256_mask_ipcvtts_bf16_epu8(W, U, A) \
206 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
207 (__v16hi)_mm256_ipcvtts_bf16_epu8(A), \
208 (__v16hi)(__m256i)(W)))
210#define _mm256_maskz_ipcvtts_bf16_epu8(U, A) \
211 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
212 (__v16hi)_mm256_ipcvtts_bf16_epu8(A), \
213 (__v16hi)_mm256_setzero_si256()))
215#define _mm_ipcvtts_ph_epi8(A) \
216 ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \
217 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
219#define _mm_mask_ipcvtts_ph_epi8(W, U, A) \
220 ((__m128i)__builtin_ia32_vcvttph2ibs128_mask((__v8hf)(__m128h)(A), \
221 (__v8hu)(W), (__mmask8)(U)))
223#define _mm_maskz_ipcvtts_ph_epi8(U, A) \
224 ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \
225 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
227#define _mm256_ipcvtts_ph_epi8(A) \
228 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
229 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1))
231#define _mm256_mask_ipcvtts_ph_epi8(W, U, A) \
232 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask((__v16hf)(__m256h)(A), \
233 (__v16hu)(W), (__mmask16)(U)))
235#define _mm256_maskz_ipcvtts_ph_epi8(U, A) \
236 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
237 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
240#define _mm_ipcvtts_ph_epu8(A) \
241 ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \
242 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
244#define _mm_mask_ipcvtts_ph_epu8(W, U, A) \
245 ((__m128i)__builtin_ia32_vcvttph2iubs128_mask((__v8hf)(__m128h)(A), \
246 (__v8hu)(W), (__mmask8)(U)))
248#define _mm_maskz_ipcvtts_ph_epu8(U, A) \
249 ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \
250 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
252#define _mm256_ipcvtts_ph_epu8(A) \
253 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
254 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1))
256#define _mm256_mask_ipcvtts_ph_epu8(W, U, A) \
257 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask((__v16hf)(__m256h)(A), \
258 (__v16hu)(W), (__mmask16)(U)))
260#define _mm256_maskz_ipcvtts_ph_epu8(U, A) \
261 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
262 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
265#define _mm_ipcvtts_ps_epi8(A) \
266 ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \
267 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
269#define _mm_mask_ipcvtts_ps_epi8(W, U, A) \
270 ((__m128i)__builtin_ia32_vcvttps2ibs128_mask((__v4sf)(__m128)(A), \
271 (__v4su)(W), (__mmask8)(U)))
273#define _mm_maskz_ipcvtts_ps_epi8(U, A) \
274 ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \
275 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
277#define _mm256_ipcvtts_ps_epi8(A) \
278 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \
279 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1))
281#define _mm256_mask_ipcvtts_ps_epi8(W, U, A) \
282 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \
283 (__v8su)(W), (__mmask8)(U)))
285#define _mm256_maskz_ipcvtts_ps_epi8(U, A) \
286 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \
287 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U)))
289#define _mm_ipcvtts_ps_epu8(A) \
290 ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \
291 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
293#define _mm_mask_ipcvtts_ps_epu8(W, U, A) \
294 ((__m128i)__builtin_ia32_vcvttps2iubs128_mask((__v4sf)(__m128)(A), \
295 (__v4su)(W), (__mmask8)(U)))
297#define _mm_maskz_ipcvtts_ps_epu8(U, A) \
298 ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \
299 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
301#define _mm256_ipcvtts_ps_epu8(A) \
302 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
303 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1))
305#define _mm256_mask_ipcvtts_ps_epu8(W, U, A) \
306 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask((__v8sf)(__m256)(A), \
307 (__v8su)(W), (__mmask8)(U)))
309#define _mm256_maskz_ipcvtts_ps_epu8(U, A) \
310 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
311 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U)))