11#error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead."
14#ifndef __AVX512VLBWINTRIN_H
15#define __AVX512VLBWINTRIN_H
18#define __DEFAULT_FN_ATTRS128 \
19 __attribute__((__always_inline__, __nodebug__, \
20 __target__("avx512vl,avx512bw"), __min_vector_width__(128)))
21#define __DEFAULT_FN_ATTRS256 \
22 __attribute__((__always_inline__, __nodebug__, \
23 __target__("avx512vl,avx512bw"), __min_vector_width__(256)))
25#if defined(__cplusplus) && (__cplusplus >= 201103L)
26#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
27#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
29#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
30#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
35#define _mm_cmp_epi8_mask(a, b, p) \
36 ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
37 (__v16qi)(__m128i)(b), (int)(p), \
40#define _mm_mask_cmp_epi8_mask(m, a, b, p) \
41 ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
42 (__v16qi)(__m128i)(b), (int)(p), \
45#define _mm_cmp_epu8_mask(a, b, p) \
46 ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
47 (__v16qi)(__m128i)(b), (int)(p), \
50#define _mm_mask_cmp_epu8_mask(m, a, b, p) \
51 ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
52 (__v16qi)(__m128i)(b), (int)(p), \
55#define _mm256_cmp_epi8_mask(a, b, p) \
56 ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
57 (__v32qi)(__m256i)(b), (int)(p), \
60#define _mm256_mask_cmp_epi8_mask(m, a, b, p) \
61 ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
62 (__v32qi)(__m256i)(b), (int)(p), \
65#define _mm256_cmp_epu8_mask(a, b, p) \
66 ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
67 (__v32qi)(__m256i)(b), (int)(p), \
70#define _mm256_mask_cmp_epu8_mask(m, a, b, p) \
71 ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
72 (__v32qi)(__m256i)(b), (int)(p), \
75#define _mm_cmp_epi16_mask(a, b, p) \
76 ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
77 (__v8hi)(__m128i)(b), (int)(p), \
80#define _mm_mask_cmp_epi16_mask(m, a, b, p) \
81 ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
82 (__v8hi)(__m128i)(b), (int)(p), \
85#define _mm_cmp_epu16_mask(a, b, p) \
86 ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
87 (__v8hi)(__m128i)(b), (int)(p), \
90#define _mm_mask_cmp_epu16_mask(m, a, b, p) \
91 ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
92 (__v8hi)(__m128i)(b), (int)(p), \
95#define _mm256_cmp_epi16_mask(a, b, p) \
96 ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
97 (__v16hi)(__m256i)(b), (int)(p), \
100#define _mm256_mask_cmp_epi16_mask(m, a, b, p) \
101 ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
102 (__v16hi)(__m256i)(b), (int)(p), \
105#define _mm256_cmp_epu16_mask(a, b, p) \
106 ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
107 (__v16hi)(__m256i)(b), (int)(p), \
110#define _mm256_mask_cmp_epu16_mask(m, a, b, p) \
111 ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
112 (__v16hi)(__m256i)(b), (int)(p), \
115#define _mm_cmpeq_epi8_mask(A, B) \
116 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
117#define _mm_mask_cmpeq_epi8_mask(k, A, B) \
118 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
119#define _mm_cmpge_epi8_mask(A, B) \
120 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
121#define _mm_mask_cmpge_epi8_mask(k, A, B) \
122 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
123#define _mm_cmpgt_epi8_mask(A, B) \
124 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
125#define _mm_mask_cmpgt_epi8_mask(k, A, B) \
126 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
127#define _mm_cmple_epi8_mask(A, B) \
128 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
129#define _mm_mask_cmple_epi8_mask(k, A, B) \
130 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
131#define _mm_cmplt_epi8_mask(A, B) \
132 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
133#define _mm_mask_cmplt_epi8_mask(k, A, B) \
134 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
135#define _mm_cmpneq_epi8_mask(A, B) \
136 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
137#define _mm_mask_cmpneq_epi8_mask(k, A, B) \
138 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
140#define _mm256_cmpeq_epi8_mask(A, B) \
141 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
142#define _mm256_mask_cmpeq_epi8_mask(k, A, B) \
143 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
144#define _mm256_cmpge_epi8_mask(A, B) \
145 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
146#define _mm256_mask_cmpge_epi8_mask(k, A, B) \
147 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
148#define _mm256_cmpgt_epi8_mask(A, B) \
149 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
150#define _mm256_mask_cmpgt_epi8_mask(k, A, B) \
151 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
152#define _mm256_cmple_epi8_mask(A, B) \
153 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
154#define _mm256_mask_cmple_epi8_mask(k, A, B) \
155 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
156#define _mm256_cmplt_epi8_mask(A, B) \
157 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
158#define _mm256_mask_cmplt_epi8_mask(k, A, B) \
159 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
160#define _mm256_cmpneq_epi8_mask(A, B) \
161 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
162#define _mm256_mask_cmpneq_epi8_mask(k, A, B) \
163 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
165#define _mm_cmpeq_epu8_mask(A, B) \
166 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
167#define _mm_mask_cmpeq_epu8_mask(k, A, B) \
168 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
169#define _mm_cmpge_epu8_mask(A, B) \
170 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
171#define _mm_mask_cmpge_epu8_mask(k, A, B) \
172 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
173#define _mm_cmpgt_epu8_mask(A, B) \
174 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
175#define _mm_mask_cmpgt_epu8_mask(k, A, B) \
176 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
177#define _mm_cmple_epu8_mask(A, B) \
178 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
179#define _mm_mask_cmple_epu8_mask(k, A, B) \
180 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
181#define _mm_cmplt_epu8_mask(A, B) \
182 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
183#define _mm_mask_cmplt_epu8_mask(k, A, B) \
184 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
185#define _mm_cmpneq_epu8_mask(A, B) \
186 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
187#define _mm_mask_cmpneq_epu8_mask(k, A, B) \
188 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
190#define _mm256_cmpeq_epu8_mask(A, B) \
191 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
192#define _mm256_mask_cmpeq_epu8_mask(k, A, B) \
193 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
194#define _mm256_cmpge_epu8_mask(A, B) \
195 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
196#define _mm256_mask_cmpge_epu8_mask(k, A, B) \
197 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
198#define _mm256_cmpgt_epu8_mask(A, B) \
199 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
200#define _mm256_mask_cmpgt_epu8_mask(k, A, B) \
201 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
202#define _mm256_cmple_epu8_mask(A, B) \
203 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
204#define _mm256_mask_cmple_epu8_mask(k, A, B) \
205 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
206#define _mm256_cmplt_epu8_mask(A, B) \
207 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
208#define _mm256_mask_cmplt_epu8_mask(k, A, B) \
209 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
210#define _mm256_cmpneq_epu8_mask(A, B) \
211 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
212#define _mm256_mask_cmpneq_epu8_mask(k, A, B) \
213 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
215#define _mm_cmpeq_epi16_mask(A, B) \
216 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
217#define _mm_mask_cmpeq_epi16_mask(k, A, B) \
218 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
219#define _mm_cmpge_epi16_mask(A, B) \
220 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
221#define _mm_mask_cmpge_epi16_mask(k, A, B) \
222 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
223#define _mm_cmpgt_epi16_mask(A, B) \
224 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
225#define _mm_mask_cmpgt_epi16_mask(k, A, B) \
226 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
227#define _mm_cmple_epi16_mask(A, B) \
228 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
229#define _mm_mask_cmple_epi16_mask(k, A, B) \
230 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
231#define _mm_cmplt_epi16_mask(A, B) \
232 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
233#define _mm_mask_cmplt_epi16_mask(k, A, B) \
234 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
235#define _mm_cmpneq_epi16_mask(A, B) \
236 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
237#define _mm_mask_cmpneq_epi16_mask(k, A, B) \
238 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
240#define _mm256_cmpeq_epi16_mask(A, B) \
241 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
242#define _mm256_mask_cmpeq_epi16_mask(k, A, B) \
243 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
244#define _mm256_cmpge_epi16_mask(A, B) \
245 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
246#define _mm256_mask_cmpge_epi16_mask(k, A, B) \
247 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
248#define _mm256_cmpgt_epi16_mask(A, B) \
249 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
250#define _mm256_mask_cmpgt_epi16_mask(k, A, B) \
251 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
252#define _mm256_cmple_epi16_mask(A, B) \
253 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
254#define _mm256_mask_cmple_epi16_mask(k, A, B) \
255 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
256#define _mm256_cmplt_epi16_mask(A, B) \
257 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
258#define _mm256_mask_cmplt_epi16_mask(k, A, B) \
259 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
260#define _mm256_cmpneq_epi16_mask(A, B) \
261 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
262#define _mm256_mask_cmpneq_epi16_mask(k, A, B) \
263 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
265#define _mm_cmpeq_epu16_mask(A, B) \
266 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
267#define _mm_mask_cmpeq_epu16_mask(k, A, B) \
268 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
269#define _mm_cmpge_epu16_mask(A, B) \
270 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
271#define _mm_mask_cmpge_epu16_mask(k, A, B) \
272 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
273#define _mm_cmpgt_epu16_mask(A, B) \
274 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
275#define _mm_mask_cmpgt_epu16_mask(k, A, B) \
276 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
277#define _mm_cmple_epu16_mask(A, B) \
278 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
279#define _mm_mask_cmple_epu16_mask(k, A, B) \
280 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
281#define _mm_cmplt_epu16_mask(A, B) \
282 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
283#define _mm_mask_cmplt_epu16_mask(k, A, B) \
284 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
285#define _mm_cmpneq_epu16_mask(A, B) \
286 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
287#define _mm_mask_cmpneq_epu16_mask(k, A, B) \
288 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
290#define _mm256_cmpeq_epu16_mask(A, B) \
291 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
292#define _mm256_mask_cmpeq_epu16_mask(k, A, B) \
293 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
294#define _mm256_cmpge_epu16_mask(A, B) \
295 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
296#define _mm256_mask_cmpge_epu16_mask(k, A, B) \
297 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
298#define _mm256_cmpgt_epu16_mask(A, B) \
299 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
300#define _mm256_mask_cmpgt_epu16_mask(k, A, B) \
301 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
302#define _mm256_cmple_epu16_mask(A, B) \
303 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
304#define _mm256_mask_cmple_epu16_mask(k, A, B) \
305 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
306#define _mm256_cmplt_epu16_mask(A, B) \
307 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
308#define _mm256_mask_cmplt_epu16_mask(k, A, B) \
309 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
310#define _mm256_cmpneq_epu16_mask(A, B) \
311 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
312#define _mm256_mask_cmpneq_epu16_mask(k, A, B) \
313 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
317 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
324 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
331 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
338 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
345 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
352 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
359 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
366 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
373 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
380 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
387 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
394 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
401 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
408 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
415 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
422 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
429 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
436 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
443 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
450 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
457 return (__m128i) __builtin_ia32_selectb_128 ((
__mmask16) __U,
464 return (__m256i) __builtin_ia32_selectb_256 ((
__mmask32) __U,
471 return (__m128i) __builtin_ia32_selectw_128 ((
__mmask8) __U,
478 return (__m256i) __builtin_ia32_selectw_256 ((
__mmask16) __U,
485 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
492 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
499 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
506 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
513 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
520 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
527 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
534 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
541 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
549 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
557 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
565 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
573 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
581 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
589 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
597 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
605 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
613 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
621 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
629 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
637 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
645 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
653 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
661 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
669 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
677 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
685 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
693 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
701 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
709 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
717 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
725 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
733 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
741 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
749 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
757 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
765 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
773 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
781 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
789 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
796 return (__m128i)__builtin_ia32_selectb_128(
802 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
809 return (__m256i)__builtin_ia32_selectb_256(
815 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
822 return (__m128i)__builtin_ia32_selectw_128(
828 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
835 return (__m256i)__builtin_ia32_selectw_256(
841 return (__m256i)__builtin_ia32_selectw_256(
848 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
855 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
862 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
869 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
876 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
883 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
890 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
897 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
904 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
911 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
918 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
925 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
932 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
939 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
946 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
953 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
960 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
967 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
974 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
981 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
988 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
995 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
1002 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
1009 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
1016 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
1023 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
1030 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
1037 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
1044 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
1051 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
1058 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
1065 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
1072 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1079 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1086 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1093 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1101 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1109 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1117 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1125 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1133 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1141 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1149 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1157 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1165 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1173 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1181 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1189 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1197 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1205 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1213 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1221 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1229 return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
1237 return (__m128i)__builtin_ia32_selectw_128(__U,
1246 return (__m128i)__builtin_ia32_selectw_128(__U,
1255 return (__m128i)__builtin_ia32_selectw_128(__U,
1263 return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
1271 return (__m256i)__builtin_ia32_selectw_256(__U,
1280 return (__m256i)__builtin_ia32_selectw_256(__U,
1289 return (__m256i)__builtin_ia32_selectw_256(__U,
1296 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1303 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1311 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1318 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1325 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
1332 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
1339 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
1346 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
1353 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1360 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1367 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1374 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1381 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1388 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1395 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1402 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1409 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1416 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1423 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1430 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1437 return (__m128i)__builtin_shufflevector(
1438 __builtin_convertvector((__v8hi)__A, __v8qi),
1439 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
1445 return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
1452 return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
1460 __builtin_ia32_pmovwb128mem_mask ((__v16qi *)
__P, (__v8hi) __A, __M);
1467 __builtin_ia32_pmovswb128mem_mask ((__v16qi *)
__P, (__v8hi) __A, __M);
1473 __builtin_ia32_pmovuswb128mem_mask ((__v16qi *)
__P, (__v8hi) __A, __M);
1478 return (__m128i)__builtin_convertvector((__v16hi) __A, __v16qi);
1483 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
1490 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
1498 __builtin_ia32_pmovwb256mem_mask ((__v16qi *)
__P, (__v16hi) __A, __M);
1504 __builtin_ia32_pmovswb256mem_mask ((__v16qi *)
__P, (__v16hi) __A, __M);
1510 __builtin_ia32_pmovuswb256mem_mask ((__v16qi*)
__P, (__v16hi) __A, __M);
1515 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1522 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1529 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1536 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1543 return (__m128i)__builtin_ia32_selectw_128(
1549 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1556 return (__m256i)__builtin_ia32_selectw_256(
1562 return (__m256i)__builtin_ia32_selectw_256(
1569 return (__m128i)__builtin_ia32_selectw_128(
1575 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1582 return (__m256i)__builtin_ia32_selectw_256(
1588 return (__m256i)__builtin_ia32_selectw_256(
1595 return (__m128i)__builtin_ia32_selectb_128(
1601 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1608 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1615 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1622 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1629 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1636 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1643 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1650 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1657 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1664 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1671 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1678 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1685 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1692 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1699 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1707 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1715 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1723 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1731 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1740 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1748 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1756 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1764 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1770#define _mm_mask_shufflehi_epi16(W, U, A, imm) \
1771 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1772 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
1773 (__v8hi)(__m128i)(W)))
1775#define _mm_maskz_shufflehi_epi16(U, A, imm) \
1776 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1777 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
1778 (__v8hi)_mm_setzero_si128()))
1780#define _mm256_mask_shufflehi_epi16(W, U, A, imm) \
1781 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1782 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
1783 (__v16hi)(__m256i)(W)))
1785#define _mm256_maskz_shufflehi_epi16(U, A, imm) \
1786 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1787 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
1788 (__v16hi)_mm256_setzero_si256()))
1790#define _mm_mask_shufflelo_epi16(W, U, A, imm) \
1791 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1792 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
1793 (__v8hi)(__m128i)(W)))
1795#define _mm_maskz_shufflelo_epi16(U, A, imm) \
1796 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1797 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
1798 (__v8hi)_mm_setzero_si128()))
1800#define _mm256_mask_shufflelo_epi16(W, U, A, imm) \
1801 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1802 (__v16hi)_mm256_shufflelo_epi16((A), \
1804 (__v16hi)(__m256i)(W)))
1806#define _mm256_maskz_shufflelo_epi16(U, A, imm) \
1807 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1808 (__v16hi)_mm256_shufflelo_epi16((A), \
1810 (__v16hi)_mm256_setzero_si256()))
1815 return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B);
1821 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1829 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1837 return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B);
1843 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1851 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1859 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1867 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1875 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1883 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1891 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1899 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1907 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1914 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1922 return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B);
1928 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1936 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1944 return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B);
1950 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1958 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1966 return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B);
1972 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1980 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1988 return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B);
1994 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2002 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2010 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2018 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2026 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2034 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2041 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2048 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2056 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2063 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2071 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2079 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2087 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2095 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2102 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2109 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2116 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2123 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2130 return (__m128i) __builtin_ia32_selectw_128 ((
__mmask8) __U,
2137 return (__m128i) __builtin_ia32_selectw_128 ((
__mmask8) __U,
2144 return (__m256i) __builtin_ia32_selectw_256 ((
__mmask16) __U,
2151 return (__m256i) __builtin_ia32_selectw_256 ((
__mmask16) __U,
2158 return (__m128i) __builtin_ia32_selectb_128 ((
__mmask16) __U,
2165 return (__m128i) __builtin_ia32_selectb_128 ((
__mmask16) __U,
2172 return (__m256i) __builtin_ia32_selectb_256 ((
__mmask32) __U,
2179 return (__m256i) __builtin_ia32_selectb_256 ((
__mmask32) __U,
2187 return (__m128i) __builtin_ia32_selectb_128(__M,
2195 return (__m128i) __builtin_ia32_selectb_128(__M,
2203 return (__m256i) __builtin_ia32_selectb_256(__M,
2211 return (__m256i) __builtin_ia32_selectb_256(__M,
2219 struct __loadu_epi16 {
2222 return ((
const struct __loadu_epi16*)
__P)->__v;
2228 return (__m128i) __builtin_ia32_loaddquhi128_mask ((
const __v8hi *)
__P,
2236 return (__m128i) __builtin_ia32_loaddquhi128_mask ((
const __v8hi *)
__P,
2245 struct __loadu_epi16 {
2248 return ((
const struct __loadu_epi16*)
__P)->__v;
2254 return (__m256i) __builtin_ia32_loaddquhi256_mask ((
const __v16hi *)
__P,
2262 return (__m256i) __builtin_ia32_loaddquhi256_mask ((
const __v16hi *)
__P,
2271 struct __loadu_epi8 {
2274 return ((
const struct __loadu_epi8*)
__P)->__v;
2280 return (__m128i) __builtin_ia32_loaddquqi128_mask ((
const __v16qi *)
__P,
2288 return (__m128i) __builtin_ia32_loaddquqi128_mask ((
const __v16qi *)
__P,
2297 struct __loadu_epi8 {
2300 return ((
const struct __loadu_epi8*)
__P)->__v;
2306 return (__m256i) __builtin_ia32_loaddquqi256_mask ((
const __v32qi *)
__P,
2314 return (__m256i) __builtin_ia32_loaddquqi256_mask ((
const __v32qi *)
__P,
2323 struct __storeu_epi16 {
2326 ((
struct __storeu_epi16*)
__P)->
__v = __A;
2332 __builtin_ia32_storedquhi128_mask ((__v8hi *)
__P,
2340 struct __storeu_epi16 {
2343 ((
struct __storeu_epi16*)
__P)->
__v = __A;
2349 __builtin_ia32_storedquhi256_mask ((__v16hi *)
__P,
2357 struct __storeu_epi8 {
2360 ((
struct __storeu_epi8*)
__P)->
__v = __A;
2366 __builtin_ia32_storedquqi128_mask ((__v16qi *)
__P,
2374 struct __storeu_epi8 {
2377 ((
struct __storeu_epi8*)
__P)->
__v = __A;
2383 __builtin_ia32_storedquqi256_mask ((__v32qi *)
__P,
2494 return (
__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
2500 return (
__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
2506 return (
__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
2512 return (
__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
2518 return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
2524 return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
2530 return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
2536 return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
2542 return (__m128i)__builtin_ia32_selectb_128(__M,
2550 return (__m128i)__builtin_ia32_selectb_128(__M,
2558 return (__m256i)__builtin_ia32_selectb_256(__M,
2566 return (__m256i)__builtin_ia32_selectb_256(__M,
2574 return (__m128i)__builtin_ia32_selectw_128(__M,
2582 return (__m128i)__builtin_ia32_selectw_128(__M,
2590 return (__m256i)__builtin_ia32_selectw_256(__M,
2598 return (__m256i)__builtin_ia32_selectw_256(__M,
2606 return (__m256i) __builtin_ia32_selectw_256 (__M,
2614 return (__m256i) __builtin_ia32_selectw_256(__M,
2622 return (__m128i) __builtin_ia32_selectw_128(__M,
2630 return (__m128i) __builtin_ia32_selectw_128(__M,
2638 return (__m128i)__builtin_ia32_permvarhi128((__v8hi) __B, (__v8hi) __A);
2644 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
2653 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
2661 return (__m256i)__builtin_ia32_permvarhi256((__v16hi) __B, (__v16hi) __A);
2668 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
2677 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
2682#define _mm_mask_alignr_epi8(W, U, A, B, N) \
2683 ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
2684 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
2685 (__v16qi)(__m128i)(W)))
2687#define _mm_maskz_alignr_epi8(U, A, B, N) \
2688 ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
2689 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
2690 (__v16qi)_mm_setzero_si128()))
2692#define _mm256_mask_alignr_epi8(W, U, A, B, N) \
2693 ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
2694 (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
2695 (__v32qi)(__m256i)(W)))
2697#define _mm256_maskz_alignr_epi8(U, A, B, N) \
2698 ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
2699 (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
2700 (__v32qi)_mm256_setzero_si256()))
2702#define _mm_dbsad_epu8(A, B, imm) \
2703 ((__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \
2704 (__v16qi)(__m128i)(B), (int)(imm)))
2706#define _mm_mask_dbsad_epu8(W, U, A, B, imm) \
2707 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2708 (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
2709 (__v8hi)(__m128i)(W)))
2711#define _mm_maskz_dbsad_epu8(U, A, B, imm) \
2712 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2713 (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
2714 (__v8hi)_mm_setzero_si128()))
2716#define _mm256_dbsad_epu8(A, B, imm) \
2717 ((__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \
2718 (__v32qi)(__m256i)(B), (int)(imm)))
2720#define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \
2721 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2722 (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
2723 (__v16hi)(__m256i)(W)))
2725#define _mm256_maskz_dbsad_epu8(U, A, B, imm) \
2726 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2727 (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
2728 (__v16hi)_mm256_setzero_si256()))
2732 return __builtin_reduce_add((__v8hi)__W);
2737 return __builtin_reduce_mul((__v8hi)__W);
2742 return __builtin_reduce_and((__v8hi)__W);
2747 return __builtin_reduce_or((__v8hi)__W);
2753 return __builtin_reduce_add((__v8hi)__W);
2759 return __builtin_reduce_mul((__v8hi)__W);
2765 return __builtin_reduce_and((__v8hi)__W);
2771 return __builtin_reduce_or((__v8hi)__W);
2776 return __builtin_reduce_max((__v8hi)__V);
2781 return __builtin_reduce_max((__v8hu)__V);
2786 return __builtin_reduce_min((__v8hi)__V);
2791 return __builtin_reduce_min((__v8hu)__V);
2797 return __builtin_reduce_max((__v8hi)__V);
2803 return __builtin_reduce_max((__v8hu)__V);
2809 return __builtin_reduce_min((__v8hi)__V);
2815 return __builtin_reduce_min((__v8hu)__V);
2820 return __builtin_reduce_add((__v16hi)__W);
2825 return __builtin_reduce_mul((__v16hi)__W);
2830 return __builtin_reduce_and((__v16hi)__W);
2835 return __builtin_reduce_or((__v16hi)__W);
2841 return __builtin_reduce_add((__v16hi)__W);
2847 return __builtin_reduce_mul((__v16hi)__W);
2853 return __builtin_reduce_and((__v16hi)__W);
2859 return __builtin_reduce_or((__v16hi)__W);
2864 return __builtin_reduce_max((__v16hi)__V);
2869 return __builtin_reduce_max((__v16hu)__V);
2874 return __builtin_reduce_min((__v16hi)__V);
2879 return __builtin_reduce_min((__v16hu)__V);
2885 return __builtin_reduce_max((__v16hi)__V);
2891 return __builtin_reduce_max((__v16hu)__V);
2897 return __builtin_reduce_min((__v16hi)__V);
2903 return __builtin_reduce_min((__v16hu)__V);
2908 return __builtin_reduce_add((__v16qs)__W);
2913 return __builtin_reduce_mul((__v16qs)__W);
2918 return __builtin_reduce_and((__v16qs)__W);
2923 return __builtin_reduce_or((__v16qs)__W);
2929 return __builtin_reduce_add((__v16qs)__W);
2935 return __builtin_reduce_mul((__v16qs)__W);
2941 return __builtin_reduce_and((__v16qs)__W);
2947 return __builtin_reduce_or((__v16qs)__W);
2952 return __builtin_reduce_max((__v16qs)__V);
2957 return __builtin_reduce_max((__v16qu)__V);
2962 return __builtin_reduce_min((__v16qs)__V);
2967 return __builtin_reduce_min((__v16qu)__V);
2973 return __builtin_reduce_max((__v16qs)__V);
2979 return __builtin_reduce_max((__v16qu)__V);
2985 return __builtin_reduce_min((__v16qs)__V);
2991 return __builtin_reduce_min((__v16qu)__V);
2996 return __builtin_reduce_add((__v32qs)__W);
3001 return __builtin_reduce_mul((__v32qs)__W);
3006 return __builtin_reduce_and((__v32qs)__W);
3011 return __builtin_reduce_or((__v32qs)__W);
3017 return __builtin_reduce_add((__v32qs)__W);
3023 return __builtin_reduce_mul((__v32qs)__W);
3029 return __builtin_reduce_and((__v32qs)__W);
3035 return __builtin_reduce_or((__v32qs)__W);
3040 return __builtin_reduce_max((__v32qs)__V);
3045 return __builtin_reduce_max((__v32qu)__V);
3050 return __builtin_reduce_min((__v32qs)__V);
3055 return __builtin_reduce_min((__v32qu)__V);
3061 return __builtin_reduce_max((__v32qs)__V);
3067 return __builtin_reduce_max((__v32qu)__V);
3073 return __builtin_reduce_min((__v32qs)__V);
3079 return __builtin_reduce_min((__v32qu)__V);
3082#undef __DEFAULT_FN_ATTRS128
3083#undef __DEFAULT_FN_ATTRS256
3084#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
3085#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi16(__m256i __a, __m128i __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a right by the number of bits giv...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_max_epu16(__m256i __a, __m256i __b)
Compares the corresponding unsigned 16-bit integers in the two 256-bit vectors of [16 x i16] in __a a...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mulhi_epi16(__m256i __a, __m256i __b)
Multiplies signed 16-bit integer elements of two 256-bit vectors of [16 x i16], and returns the upper...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_adds_epu8(__m256i __a, __m256i __b)
Adds 8-bit integers from corresponding bytes of two 256-bit integer vectors using unsigned saturation...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maddubs_epi16(__m256i __a, __m256i __b)
Multiplies each unsigned byte from the 256-bit integer vector in __a with the corresponding signed by...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mulhi_epu16(__m256i __a, __m256i __b)
Multiplies unsigned 16-bit integer elements of two 256-bit vectors of [16 x i16], and returns the upp...
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_broadcastb_epi8(__m128i __X)
Broadcasts the low byte from the 128-bit integer vector in __X to all bytes of the 128-bit result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu8_epi16(__m128i __V)
Zero-extends bytes from the 128-bit integer vector in __V and returns the 16-bit values in the corres...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_subs_epu16(__m256i __a, __m256i __b)
Subtracts 16-bit integers from corresponding elements of two 256-bit vectors of [16 x i16] using unsi...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_packus_epi32(__m256i __V1, __m256i __V2)
Converts elements from two 256-bit vectors of [8 x i32] to 16-bit integers using unsigned saturation,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcastb_epi8(__m128i __X)
Broadcasts the low byte from the 128-bit integer vector in __X to all bytes of the 256-bit result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpacklo_epi16(__m256i __a, __m256i __b)
Unpacks and interleaves 16-bit integers from parts of the 256-bit vectors of [16 x i16] in __a and __...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mullo_epi16(__m256i __a, __m256i __b)
Multiplies signed 16-bit integer elements of two 256-bit vectors of [16 x i16], and returns the lower...
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_broadcastw_epi16(__m128i __X)
Broadcasts the low element from the 128-bit vector of [8 x i16] in __X to all elements of the result'...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_adds_epi16(__m256i __a, __m256i __b)
Adds 16-bit integers from corresponding elements of two 256-bit vectors of [16 x i16] using signed sa...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_min_epi8(__m256i __a, __m256i __b)
Compares the corresponding signed bytes in the two 256-bit integer vectors in __a and __b and returns...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi16(__m256i __a, __m128i __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a left by the number of bits spec...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srli_epi16(__m256i __a, int __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a right by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_max_epi16(__m256i __a, __m256i __b)
Compares the corresponding signed 16-bit integers in the two 256-bit vectors of [16 x i16] in __a and...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_shuffle_epi8(__m256i __a, __m256i __b)
Shuffles 8-bit integers in the 256-bit integer vector __a according to control information in the 256...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi8_epi16(__m128i __V)
Sign-extends bytes from the 128-bit integer vector in __V and returns the 16-bit values in the corres...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpacklo_epi8(__m256i __a, __m256i __b)
Unpacks and interleaves 8-bit integers from parts of the 256-bit integer vectors in __a and __b to fo...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcastw_epi16(__m128i __X)
Broadcasts the low element from the 128-bit vector of [8 x i16] in __X to all elements of the result'...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srai_epi16(__m256i __a, int __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a right by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_add_epi8(__m256i __a, __m256i __b)
Adds 8-bit integers from corresponding bytes of two 256-bit integer vectors and returns the lower 8 b...
#define __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_min_epu16(__m256i __a, __m256i __b)
Compares the corresponding unsigned 16-bit integers in the two 256-bit vectors of [16 x i16] in __a a...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_subs_epu8(__m256i __a, __m256i __b)
Subtracts 8-bit integers from corresponding bytes of two 256-bit integer vectors using unsigned satur...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_subs_epi16(__m256i __a, __m256i __b)
Subtracts 16-bit integers from corresponding elements of two 256-bit vectors of [16 x i16] using sign...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_abs_epi8(__m256i __a)
Computes the absolute value of each signed byte in the 256-bit integer vector __a and returns each va...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpackhi_epi8(__m256i __a, __m256i __b)
Unpacks and interleaves 8-bit integers from parts of the 256-bit integer vectors in __a and __b to fo...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sub_epi16(__m256i __a, __m256i __b)
Subtracts 16-bit integers from corresponding elements of two 256-bit vectors of [16 x i16].
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_and_si256(__m256i __a, __m256i __b)
Computes the bitwise AND of the 256-bit integer vectors in __a and __b.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_packus_epi16(__m256i __a, __m256i __b)
Converts elements from two 256-bit vectors of [16 x i16] to 8-bit integers using unsigned saturation,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_abs_epi16(__m256i __a)
Computes the absolute value of each signed 16-bit element in the 256-bit vector of [16 x i16] in __a ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_max_epu8(__m256i __a, __m256i __b)
Compares the corresponding unsigned bytes in the two 256-bit integer vectors in __a and __b and retur...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_add_epi16(__m256i __a, __m256i __b)
Adds 16-bit integers from corresponding elements of two 256-bit vectors of [16 x i16] and returns the...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_min_epu8(__m256i __a, __m256i __b)
Compares the corresponding unsigned bytes in the two 256-bit integer vectors in __a and __b and retur...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sub_epi8(__m256i __a, __m256i __b)
Subtracts 8-bit integers from corresponding bytes of two 256-bit integer vectors.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_min_epi16(__m256i __a, __m256i __b)
Compares the corresponding signed 16-bit integers in the two 256-bit vectors of [16 x i16] in __a and...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_avg_epu16(__m256i __a, __m256i __b)
Computes the averages of the corresponding unsigned 16-bit integers in the two 256-bit vectors of [16...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi16(__m256i __a, __m128i __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a right by the number of bits giv...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_adds_epu16(__m256i __a, __m256i __b)
Adds 16-bit integers from corresponding elements of two 256-bit vectors of [16 x i16] using unsigned ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_madd_epi16(__m256i __a, __m256i __b)
Multiplies corresponding 16-bit elements of two 256-bit vectors of [16 x i16], forming 32-bit interme...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_max_epi8(__m256i __a, __m256i __b)
Compares the corresponding signed bytes in the two 256-bit integer vectors in __a and __b and returns...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpackhi_epi16(__m256i __a, __m256i __b)
Unpacks and interleaves 16-bit integers from parts of the 256-bit vectors of [16 x i16] in __a and __...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_subs_epi8(__m256i __a, __m256i __b)
Subtracts 8-bit integers from corresponding bytes of two 256-bit integer vectors using signed saturat...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_packs_epi32(__m256i __a, __m256i __b)
Converts the elements of two 256-bit vectors of [8 x i32] to 16-bit integers using signed saturation,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mulhrs_epi16(__m256i __a, __m256i __b)
Multiplies signed 16-bit integer elements of two 256-bit vectors of [16 x i16], truncates the 32-bit ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_adds_epi8(__m256i __a, __m256i __b)
Adds 8-bit integers from corresponding bytes of two 256-bit integer vectors using signed saturation,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_slli_epi16(__m256i __a, int __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a left by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_packs_epi16(__m256i __a, __m256i __b)
Converts the elements of two 256-bit vectors of [16 x i16] to 8-bit integers using signed saturation,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_avg_epu8(__m256i __a, __m256i __b)
Computes the averages of the corresponding unsigned bytes in the two 256-bit integer vectors in __a a...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W)
static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epu16(__m128i __V)
#define _mm256_mask_cmpeq_epi16_mask(k, A, B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_mul_epi8(__m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epi8(__m256i __V)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi16(void *__P, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastb_epi8(__m128i __O, __mmask16 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi16_epi8(__mmask8 __M, __m128i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_movepi8_mask(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi8(__m128i __W, __mmask16 __U, void const *__P)
static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi8(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastb_epi8(__m256i __O, __mmask32 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
#define _mm256_cmpeq_epi8_mask(A, B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi16_mask(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A)
#define _mm_cmpneq_epi16_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_and_epi16(__m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
#define _mm_mask_cmpeq_epi8_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_abs_epi8(__mmask32 __U, __m256i __A)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_and_epi8(__m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srav_epi16(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __V)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_mul_epi16(__mmask16 __M, __m256i __W)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi16_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastb_epi8(__mmask32 __M, __m128i __A)
#define _mm_cmpneq_epi8_mask(A, B)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi16_storeu_epi8(void *__P, __mmask16 __M, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi16_mask(__m128i __A, __m128i __B)
#define _mm256_mask_cmpneq_epi8_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epi16(__m256i __V)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi16_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_add_epi8(__m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi16(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi16(void *__P, __mmask16 __U, __m256i __A)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_mul_epi16(__m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi16_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __V)
#define _mm256_cmpneq_epi8_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_and_epi16(__mmask8 __M, __m128i __W)
static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_test_epi8_mask(__m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epi16(__m128i __V)
static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epu16(__m256i __V)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi16_mask(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi16(__mmask8 __M, short __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi8(__mmask16 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_or_epi16(__m128i __W)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi16_mask(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi16_epi8(__mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi8(__m128i __O, __mmask16 __M, char __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_and_epi16(__m256i __W)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_movepi8_mask(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
#define _mm_cmpeq_epi8_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi16(void *__P, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi8(__mmask32 __M, char __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_testn_epi8_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi16(__mmask16 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi8(__m256i __W, __mmask32 __U, void const *__P)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_mul_epi8(__m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_add_epi8(__m128i __W)
static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epu16(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi16(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi8(void *__P, __mmask32 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi16(__mmask16 __M, short __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epi8(__m256i __V)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_add_epi16(__m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi16(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi8(__mmask16 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __V)
#define _mm256_cmpeq_epi16_mask(A, B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __V)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi8(void *__P, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srlv_epi16(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epu8(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi16_epi8(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_movepi16_mask(__m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi16_storeu_epi8(void *__P, __mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi16(__m128i __O, __mmask8 __M, short __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mov_epi16(__mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi16(__m256i __W, __mmask16 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutexvar_epi16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mov_epi8(__m256i __W, __mmask32 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastw_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_mul_epi16(__mmask8 __M, __m128i __W)
static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epu8(__m256i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi16_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mov_epi16(__m256i __W, __mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi8(__mmask16 __M, char __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi8(__mmask32 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi16_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_storeu_epi8(void *__P, __mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testn_epi8_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi16(__mmask16 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
#define _mm_mask_cmpneq_epi16_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srlv_epi16(__m256i __A, __m256i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_test_epi16_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi16_epi8(__mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epi16(__mmask16 __M, __m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srav_epi16(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi8(__mmask32 __A)
#define _mm_cmpeq_epi16_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
#define _mm_mask_cmpeq_epi16_mask(k, A, B)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epi16(__m256i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mov_epi16(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epu16(__mmask16 __M, __m128i __V)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epi8(__m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi8(void *__P, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi16(__mmask16 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutexvar_epi16(__m128i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_or_epi8(__m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mov_epi8(__m128i __W, __mmask16 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi8(void *__P, __mmask16 __U, __m128i __A)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_add_epi16(__m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epi16(__mmask16 __M, __m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi16(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_or_epi16(__m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_add_epi16(__mmask16 __M, __m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_abs_epi8(__mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srli_epi16(__mmask8 __U, __m128i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epi16(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mov_epi8(__mmask32 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastw_epi16(__m256i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_testn_epi16_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepi16_epi8(__mmask16 __M, __m256i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi16_mask(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi16(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi8(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sllv_epi16(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sllv_epi16(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y)
#define _mm256_mask_cmpeq_epi8_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi16_epi8(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi16_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi16_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epu8(__m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastw_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_test_epi8_mask(__m128i __A, __m128i __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epu8(__m256i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastb_epi8(__mmask16 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B)
#define _mm256_mask_cmpneq_epi16_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi16(__m256i __O, __mmask16 __M, short __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_epu8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mov_epi8(__mmask16 __U, __m128i __A)
static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epu16(__m256i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi16(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi16(__mmask8 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_and_epi16(__mmask16 __M, __m256i __W)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_or_epi8(__m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi16_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi16(__mmask8 __U, __m128i __A, __m128i __B)
#define _mm_mask_cmpneq_epi8_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastw_epi16(__mmask16 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_mul_epi16(__m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_epu8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mov_epi16(__m128i __W, __mmask8 __U, __m128i __A)
#define _mm256_cmpneq_epi16_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi16(__mmask16 __U, void const *__P)
static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epu16(__mmask16 __M, __m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_cvtepi16_epi8(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi16(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_and_epi8(__m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi16(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_abs_epi16(__mmask8 __U, __m128i __A)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epi8(__m128i __V)
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi16(short __w)
Constructs a 256-bit integer vector of [16 x i16], with each of the 16-bit integral vector elements s...
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi8(char __b)
Constructs a 256-bit integer vector of [32 x i8], with each of the 8-bit integral vector elements set...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_subs_epu16(__m128i __a, __m128i __b)
Subtracts, with saturation, corresponding 16-bit unsigned integer values in the input and returns the...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the smaller value f...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_packs_epi32(__m128i __a, __m128i __b)
Converts, with saturation, 32-bit signed integers from both 128-bit integer vector operands into 16-b...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_avg_epu8(__m128i __a, __m128i __b)
Computes the rounded averages of corresponding elements of two 128-bit unsigned [16 x i8] vectors,...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_srai_epi16(__m128i __a, int __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the greater value f...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpacklo_epi8(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-7) values from two 128-bit vectors of [16 x i8] and interleaves them i...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_avg_epu16(__m128i __a, __m128i __b)
Computes the rounded averages of corresponding elements of two 128-bit unsigned [8 x i16] vectors,...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpacklo_epi16(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-3) values from each of the two 128-bit vectors of [8 x i16] and interl...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_epi16(__m128i __a, __m128i __b)
Subtracts the corresponding 16-bit integer values in the operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi16(short __w)
Initializes all values in a 128-bit vector of [8 x i16] with the specified 16-bit value.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_epi8(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [16 x i8], saving the lower 8 bits of each ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mullo_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the lower 16 bits of ea...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, __m128i __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_madd_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two 128-bit signed [8 x i16] vectors, producing eight interm...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_packus_epi16(__m128i __a, __m128i __b)
Converts, with saturation, 16-bit signed integers from both 128-bit integer vector operands into 8-bi...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the smaller value fro...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_subs_epi8(__m128i __a, __m128i __b)
Subtracts, with saturation, corresponding 8-bit signed integer values in the input and returns the di...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_adds_epu8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [16 x i8] vectors,...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_slli_epi16(__m128i __a, int __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_adds_epu16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [8 x i16] vectors,...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_srli_epi16(__m128i __a, int __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, __m128i __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpackhi_epi8(__m128i __a, __m128i __b)
Unpacks the high-order (index 8-15) values from two 128-bit vectors of [16 x i8] and interleaves them...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_adds_epi8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [16 x i8] vectors,...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, __m128i __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mulhi_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the upper 16 bits of ea...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_epi8(__m128i __a, __m128i __b)
Subtracts the corresponding 8-bit integer values in the operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_packs_epi16(__m128i __a, __m128i __b)
Converts, with saturation, 16-bit signed integers from both 128-bit integer vector operands into 8-bi...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpackhi_epi16(__m128i __a, __m128i __b)
Unpacks the high-order (index 4-7) values from two 128-bit vectors of [8 x i16] and interleaves them ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mulhi_epu16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two unsigned [8 x i16] vectors, saving the upper 16 bits of ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the greater value fro...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_subs_epi16(__m128i __a, __m128i __b)
Subtracts, with saturation, corresponding 16-bit signed integer values in the input and returns the d...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_epi16(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [8 x i16], saving the lower 16 bits of each...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_adds_epi16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [8 x i16] vectors,...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_subs_epu8(__m128i __a, __m128i __b)
Subtracts, with saturation, corresponding 8-bit unsigned integer values in the input and returns the ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
__inline unsigned int unsigned int unsigned int * __P
__inline unsigned int unsigned int __Y
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_epu16(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [8 x u16] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_epi8(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [16 x i8] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepu8_epi16(__m128i __V)
Zero-extends each of the lower eight 8-bit integer elements of a 128-bit vector of [16 x i8] to 16-bi...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_packus_epi32(__m128i __V1, __m128i __V2)
Converts, with saturation, 32-bit signed integers from both 128-bit integer vector operands into 16-b...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_epu16(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [8 x u16] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi8_epi16(__m128i __V)
Sign-extends each of the lower eight 8-bit integer elements of a 128-bit vector of [16 x i8] to 16-bi...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_epi8(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [16 x i8] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...