clang 20.0.0git
avx512vlbwintrin.h
Go to the documentation of this file.
1/*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __AVX512VLBWINTRIN_H
15#define __AVX512VLBWINTRIN_H
16
17/* Define the default attributes for the functions in this file. */
18#define __DEFAULT_FN_ATTRS128 \
19 __attribute__((__always_inline__, __nodebug__, \
20 __target__("avx512vl,avx512bw,no-evex512"), \
21 __min_vector_width__(128)))
22#define __DEFAULT_FN_ATTRS256 \
23 __attribute__((__always_inline__, __nodebug__, \
24 __target__("avx512vl,avx512bw,no-evex512"), \
25 __min_vector_width__(256)))
26
27/* Integer compare */
28
29#define _mm_cmp_epi8_mask(a, b, p) \
30 ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
31 (__v16qi)(__m128i)(b), (int)(p), \
32 (__mmask16)-1))
33
34#define _mm_mask_cmp_epi8_mask(m, a, b, p) \
35 ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
36 (__v16qi)(__m128i)(b), (int)(p), \
37 (__mmask16)(m)))
38
39#define _mm_cmp_epu8_mask(a, b, p) \
40 ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
41 (__v16qi)(__m128i)(b), (int)(p), \
42 (__mmask16)-1))
43
44#define _mm_mask_cmp_epu8_mask(m, a, b, p) \
45 ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
46 (__v16qi)(__m128i)(b), (int)(p), \
47 (__mmask16)(m)))
48
49#define _mm256_cmp_epi8_mask(a, b, p) \
50 ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
51 (__v32qi)(__m256i)(b), (int)(p), \
52 (__mmask32)-1))
53
54#define _mm256_mask_cmp_epi8_mask(m, a, b, p) \
55 ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
56 (__v32qi)(__m256i)(b), (int)(p), \
57 (__mmask32)(m)))
58
59#define _mm256_cmp_epu8_mask(a, b, p) \
60 ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
61 (__v32qi)(__m256i)(b), (int)(p), \
62 (__mmask32)-1))
63
64#define _mm256_mask_cmp_epu8_mask(m, a, b, p) \
65 ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
66 (__v32qi)(__m256i)(b), (int)(p), \
67 (__mmask32)(m)))
68
69#define _mm_cmp_epi16_mask(a, b, p) \
70 ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
71 (__v8hi)(__m128i)(b), (int)(p), \
72 (__mmask8)-1))
73
74#define _mm_mask_cmp_epi16_mask(m, a, b, p) \
75 ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
76 (__v8hi)(__m128i)(b), (int)(p), \
77 (__mmask8)(m)))
78
79#define _mm_cmp_epu16_mask(a, b, p) \
80 ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
81 (__v8hi)(__m128i)(b), (int)(p), \
82 (__mmask8)-1))
83
84#define _mm_mask_cmp_epu16_mask(m, a, b, p) \
85 ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
86 (__v8hi)(__m128i)(b), (int)(p), \
87 (__mmask8)(m)))
88
89#define _mm256_cmp_epi16_mask(a, b, p) \
90 ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
91 (__v16hi)(__m256i)(b), (int)(p), \
92 (__mmask16)-1))
93
94#define _mm256_mask_cmp_epi16_mask(m, a, b, p) \
95 ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
96 (__v16hi)(__m256i)(b), (int)(p), \
97 (__mmask16)(m)))
98
99#define _mm256_cmp_epu16_mask(a, b, p) \
100 ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
101 (__v16hi)(__m256i)(b), (int)(p), \
102 (__mmask16)-1))
103
104#define _mm256_mask_cmp_epu16_mask(m, a, b, p) \
105 ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
106 (__v16hi)(__m256i)(b), (int)(p), \
107 (__mmask16)(m)))
108
109#define _mm_cmpeq_epi8_mask(A, B) \
110 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
111#define _mm_mask_cmpeq_epi8_mask(k, A, B) \
112 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
113#define _mm_cmpge_epi8_mask(A, B) \
114 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
115#define _mm_mask_cmpge_epi8_mask(k, A, B) \
116 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
117#define _mm_cmpgt_epi8_mask(A, B) \
118 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
119#define _mm_mask_cmpgt_epi8_mask(k, A, B) \
120 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
121#define _mm_cmple_epi8_mask(A, B) \
122 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
123#define _mm_mask_cmple_epi8_mask(k, A, B) \
124 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
125#define _mm_cmplt_epi8_mask(A, B) \
126 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
127#define _mm_mask_cmplt_epi8_mask(k, A, B) \
128 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
129#define _mm_cmpneq_epi8_mask(A, B) \
130 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
131#define _mm_mask_cmpneq_epi8_mask(k, A, B) \
132 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
133
134#define _mm256_cmpeq_epi8_mask(A, B) \
135 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
136#define _mm256_mask_cmpeq_epi8_mask(k, A, B) \
137 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
138#define _mm256_cmpge_epi8_mask(A, B) \
139 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
140#define _mm256_mask_cmpge_epi8_mask(k, A, B) \
141 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
142#define _mm256_cmpgt_epi8_mask(A, B) \
143 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
144#define _mm256_mask_cmpgt_epi8_mask(k, A, B) \
145 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
146#define _mm256_cmple_epi8_mask(A, B) \
147 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
148#define _mm256_mask_cmple_epi8_mask(k, A, B) \
149 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
150#define _mm256_cmplt_epi8_mask(A, B) \
151 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
152#define _mm256_mask_cmplt_epi8_mask(k, A, B) \
153 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
154#define _mm256_cmpneq_epi8_mask(A, B) \
155 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
156#define _mm256_mask_cmpneq_epi8_mask(k, A, B) \
157 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
158
159#define _mm_cmpeq_epu8_mask(A, B) \
160 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
161#define _mm_mask_cmpeq_epu8_mask(k, A, B) \
162 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
163#define _mm_cmpge_epu8_mask(A, B) \
164 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
165#define _mm_mask_cmpge_epu8_mask(k, A, B) \
166 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
167#define _mm_cmpgt_epu8_mask(A, B) \
168 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
169#define _mm_mask_cmpgt_epu8_mask(k, A, B) \
170 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
171#define _mm_cmple_epu8_mask(A, B) \
172 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
173#define _mm_mask_cmple_epu8_mask(k, A, B) \
174 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
175#define _mm_cmplt_epu8_mask(A, B) \
176 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
177#define _mm_mask_cmplt_epu8_mask(k, A, B) \
178 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
179#define _mm_cmpneq_epu8_mask(A, B) \
180 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
181#define _mm_mask_cmpneq_epu8_mask(k, A, B) \
182 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
183
184#define _mm256_cmpeq_epu8_mask(A, B) \
185 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
186#define _mm256_mask_cmpeq_epu8_mask(k, A, B) \
187 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
188#define _mm256_cmpge_epu8_mask(A, B) \
189 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
190#define _mm256_mask_cmpge_epu8_mask(k, A, B) \
191 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
192#define _mm256_cmpgt_epu8_mask(A, B) \
193 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
194#define _mm256_mask_cmpgt_epu8_mask(k, A, B) \
195 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
196#define _mm256_cmple_epu8_mask(A, B) \
197 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
198#define _mm256_mask_cmple_epu8_mask(k, A, B) \
199 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
200#define _mm256_cmplt_epu8_mask(A, B) \
201 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
202#define _mm256_mask_cmplt_epu8_mask(k, A, B) \
203 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
204#define _mm256_cmpneq_epu8_mask(A, B) \
205 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
206#define _mm256_mask_cmpneq_epu8_mask(k, A, B) \
207 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
208
209#define _mm_cmpeq_epi16_mask(A, B) \
210 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
211#define _mm_mask_cmpeq_epi16_mask(k, A, B) \
212 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
213#define _mm_cmpge_epi16_mask(A, B) \
214 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
215#define _mm_mask_cmpge_epi16_mask(k, A, B) \
216 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
217#define _mm_cmpgt_epi16_mask(A, B) \
218 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
219#define _mm_mask_cmpgt_epi16_mask(k, A, B) \
220 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
221#define _mm_cmple_epi16_mask(A, B) \
222 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
223#define _mm_mask_cmple_epi16_mask(k, A, B) \
224 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
225#define _mm_cmplt_epi16_mask(A, B) \
226 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
227#define _mm_mask_cmplt_epi16_mask(k, A, B) \
228 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
229#define _mm_cmpneq_epi16_mask(A, B) \
230 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
231#define _mm_mask_cmpneq_epi16_mask(k, A, B) \
232 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
233
234#define _mm256_cmpeq_epi16_mask(A, B) \
235 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
236#define _mm256_mask_cmpeq_epi16_mask(k, A, B) \
237 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
238#define _mm256_cmpge_epi16_mask(A, B) \
239 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
240#define _mm256_mask_cmpge_epi16_mask(k, A, B) \
241 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
242#define _mm256_cmpgt_epi16_mask(A, B) \
243 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
244#define _mm256_mask_cmpgt_epi16_mask(k, A, B) \
245 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
246#define _mm256_cmple_epi16_mask(A, B) \
247 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
248#define _mm256_mask_cmple_epi16_mask(k, A, B) \
249 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
250#define _mm256_cmplt_epi16_mask(A, B) \
251 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
252#define _mm256_mask_cmplt_epi16_mask(k, A, B) \
253 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
254#define _mm256_cmpneq_epi16_mask(A, B) \
255 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
256#define _mm256_mask_cmpneq_epi16_mask(k, A, B) \
257 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
258
259#define _mm_cmpeq_epu16_mask(A, B) \
260 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
261#define _mm_mask_cmpeq_epu16_mask(k, A, B) \
262 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
263#define _mm_cmpge_epu16_mask(A, B) \
264 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
265#define _mm_mask_cmpge_epu16_mask(k, A, B) \
266 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
267#define _mm_cmpgt_epu16_mask(A, B) \
268 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
269#define _mm_mask_cmpgt_epu16_mask(k, A, B) \
270 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
271#define _mm_cmple_epu16_mask(A, B) \
272 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
273#define _mm_mask_cmple_epu16_mask(k, A, B) \
274 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
275#define _mm_cmplt_epu16_mask(A, B) \
276 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
277#define _mm_mask_cmplt_epu16_mask(k, A, B) \
278 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
279#define _mm_cmpneq_epu16_mask(A, B) \
280 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
281#define _mm_mask_cmpneq_epu16_mask(k, A, B) \
282 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
283
284#define _mm256_cmpeq_epu16_mask(A, B) \
285 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
286#define _mm256_mask_cmpeq_epu16_mask(k, A, B) \
287 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
288#define _mm256_cmpge_epu16_mask(A, B) \
289 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
290#define _mm256_mask_cmpge_epu16_mask(k, A, B) \
291 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
292#define _mm256_cmpgt_epu16_mask(A, B) \
293 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
294#define _mm256_mask_cmpgt_epu16_mask(k, A, B) \
295 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
296#define _mm256_cmple_epu16_mask(A, B) \
297 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
298#define _mm256_mask_cmple_epu16_mask(k, A, B) \
299 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
300#define _mm256_cmplt_epu16_mask(A, B) \
301 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
302#define _mm256_mask_cmplt_epu16_mask(k, A, B) \
303 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
304#define _mm256_cmpneq_epu16_mask(A, B) \
305 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
306#define _mm256_mask_cmpneq_epu16_mask(k, A, B) \
307 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
308
309static __inline__ __m256i __DEFAULT_FN_ATTRS256
310_mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){
311 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
312 (__v32qi)_mm256_add_epi8(__A, __B),
313 (__v32qi)__W);
314}
315
316static __inline__ __m256i __DEFAULT_FN_ATTRS256
317_mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
318 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
319 (__v32qi)_mm256_add_epi8(__A, __B),
320 (__v32qi)_mm256_setzero_si256());
321}
322
323static __inline__ __m256i __DEFAULT_FN_ATTRS256
324_mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
325 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
326 (__v16hi)_mm256_add_epi16(__A, __B),
327 (__v16hi)__W);
328}
329
330static __inline__ __m256i __DEFAULT_FN_ATTRS256
331_mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
332 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
333 (__v16hi)_mm256_add_epi16(__A, __B),
334 (__v16hi)_mm256_setzero_si256());
335}
336
337static __inline__ __m256i __DEFAULT_FN_ATTRS256
338_mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
339 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
340 (__v32qi)_mm256_sub_epi8(__A, __B),
341 (__v32qi)__W);
342}
343
344static __inline__ __m256i __DEFAULT_FN_ATTRS256
345_mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
346 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
347 (__v32qi)_mm256_sub_epi8(__A, __B),
348 (__v32qi)_mm256_setzero_si256());
349}
350
351static __inline__ __m256i __DEFAULT_FN_ATTRS256
352_mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
353 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
354 (__v16hi)_mm256_sub_epi16(__A, __B),
355 (__v16hi)__W);
356}
357
358static __inline__ __m256i __DEFAULT_FN_ATTRS256
359_mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
360 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
361 (__v16hi)_mm256_sub_epi16(__A, __B),
362 (__v16hi)_mm256_setzero_si256());
363}
364
365static __inline__ __m128i __DEFAULT_FN_ATTRS128
366_mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
367 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
368 (__v16qi)_mm_add_epi8(__A, __B),
369 (__v16qi)__W);
370}
371
372static __inline__ __m128i __DEFAULT_FN_ATTRS128
373_mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
374 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
375 (__v16qi)_mm_add_epi8(__A, __B),
376 (__v16qi)_mm_setzero_si128());
377}
378
379static __inline__ __m128i __DEFAULT_FN_ATTRS128
380_mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
381 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
382 (__v8hi)_mm_add_epi16(__A, __B),
383 (__v8hi)__W);
384}
385
386static __inline__ __m128i __DEFAULT_FN_ATTRS128
387_mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
388 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
389 (__v8hi)_mm_add_epi16(__A, __B),
390 (__v8hi)_mm_setzero_si128());
391}
392
393static __inline__ __m128i __DEFAULT_FN_ATTRS128
394_mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
395 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
396 (__v16qi)_mm_sub_epi8(__A, __B),
397 (__v16qi)__W);
398}
399
400static __inline__ __m128i __DEFAULT_FN_ATTRS128
401_mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
402 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
403 (__v16qi)_mm_sub_epi8(__A, __B),
404 (__v16qi)_mm_setzero_si128());
405}
406
407static __inline__ __m128i __DEFAULT_FN_ATTRS128
408_mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
409 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
410 (__v8hi)_mm_sub_epi16(__A, __B),
411 (__v8hi)__W);
412}
413
414static __inline__ __m128i __DEFAULT_FN_ATTRS128
415_mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
416 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
417 (__v8hi)_mm_sub_epi16(__A, __B),
418 (__v8hi)_mm_setzero_si128());
419}
420
421static __inline__ __m256i __DEFAULT_FN_ATTRS256
422_mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
423 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
424 (__v16hi)_mm256_mullo_epi16(__A, __B),
425 (__v16hi)__W);
426}
427
428static __inline__ __m256i __DEFAULT_FN_ATTRS256
429_mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
430 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
431 (__v16hi)_mm256_mullo_epi16(__A, __B),
432 (__v16hi)_mm256_setzero_si256());
433}
434
435static __inline__ __m128i __DEFAULT_FN_ATTRS128
436_mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
437 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
438 (__v8hi)_mm_mullo_epi16(__A, __B),
439 (__v8hi)__W);
440}
441
442static __inline__ __m128i __DEFAULT_FN_ATTRS128
443_mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
444 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
445 (__v8hi)_mm_mullo_epi16(__A, __B),
446 (__v8hi)_mm_setzero_si128());
447}
448
449static __inline__ __m128i __DEFAULT_FN_ATTRS128
450_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
451{
452 return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
453 (__v16qi) __W,
454 (__v16qi) __A);
455}
456
457static __inline__ __m256i __DEFAULT_FN_ATTRS256
458_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
459{
460 return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
461 (__v32qi) __W,
462 (__v32qi) __A);
463}
464
465static __inline__ __m128i __DEFAULT_FN_ATTRS128
466_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
467{
468 return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
469 (__v8hi) __W,
470 (__v8hi) __A);
471}
472
473static __inline__ __m256i __DEFAULT_FN_ATTRS256
474_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
475{
476 return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
477 (__v16hi) __W,
478 (__v16hi) __A);
479}
480
481static __inline__ __m128i __DEFAULT_FN_ATTRS128
482_mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A)
483{
484 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
485 (__v16qi)_mm_abs_epi8(__A),
486 (__v16qi)__W);
487}
488
489static __inline__ __m128i __DEFAULT_FN_ATTRS128
491{
492 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
493 (__v16qi)_mm_abs_epi8(__A),
494 (__v16qi)_mm_setzero_si128());
495}
496
497static __inline__ __m256i __DEFAULT_FN_ATTRS256
498_mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A)
499{
500 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
501 (__v32qi)_mm256_abs_epi8(__A),
502 (__v32qi)__W);
503}
504
505static __inline__ __m256i __DEFAULT_FN_ATTRS256
507{
508 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
509 (__v32qi)_mm256_abs_epi8(__A),
510 (__v32qi)_mm256_setzero_si256());
511}
512
513static __inline__ __m128i __DEFAULT_FN_ATTRS128
514_mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A)
515{
516 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
517 (__v8hi)_mm_abs_epi16(__A),
518 (__v8hi)__W);
519}
520
521static __inline__ __m128i __DEFAULT_FN_ATTRS128
523{
524 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
525 (__v8hi)_mm_abs_epi16(__A),
526 (__v8hi)_mm_setzero_si128());
527}
528
529static __inline__ __m256i __DEFAULT_FN_ATTRS256
530_mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A)
531{
532 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
533 (__v16hi)_mm256_abs_epi16(__A),
534 (__v16hi)__W);
535}
536
537static __inline__ __m256i __DEFAULT_FN_ATTRS256
539{
540 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
541 (__v16hi)_mm256_abs_epi16(__A),
542 (__v16hi)_mm256_setzero_si256());
543}
544
545static __inline__ __m128i __DEFAULT_FN_ATTRS128
546_mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
547 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
548 (__v8hi)_mm_packs_epi32(__A, __B),
549 (__v8hi)_mm_setzero_si128());
550}
551
552static __inline__ __m128i __DEFAULT_FN_ATTRS128
553_mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
554{
555 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
556 (__v8hi)_mm_packs_epi32(__A, __B),
557 (__v8hi)__W);
558}
559
560static __inline__ __m256i __DEFAULT_FN_ATTRS256
561_mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B)
562{
563 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
564 (__v16hi)_mm256_packs_epi32(__A, __B),
565 (__v16hi)_mm256_setzero_si256());
566}
567
568static __inline__ __m256i __DEFAULT_FN_ATTRS256
569_mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
570{
571 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
572 (__v16hi)_mm256_packs_epi32(__A, __B),
573 (__v16hi)__W);
574}
575
576static __inline__ __m128i __DEFAULT_FN_ATTRS128
577_mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B)
578{
579 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
580 (__v16qi)_mm_packs_epi16(__A, __B),
581 (__v16qi)_mm_setzero_si128());
582}
583
584static __inline__ __m128i __DEFAULT_FN_ATTRS128
585_mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
586{
587 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
588 (__v16qi)_mm_packs_epi16(__A, __B),
589 (__v16qi)__W);
590}
591
592static __inline__ __m256i __DEFAULT_FN_ATTRS256
593_mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B)
594{
595 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
596 (__v32qi)_mm256_packs_epi16(__A, __B),
597 (__v32qi)_mm256_setzero_si256());
598}
599
600static __inline__ __m256i __DEFAULT_FN_ATTRS256
601_mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
602{
603 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
604 (__v32qi)_mm256_packs_epi16(__A, __B),
605 (__v32qi)__W);
606}
607
608static __inline__ __m128i __DEFAULT_FN_ATTRS128
609_mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B)
610{
611 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
612 (__v8hi)_mm_packus_epi32(__A, __B),
613 (__v8hi)_mm_setzero_si128());
614}
615
616static __inline__ __m128i __DEFAULT_FN_ATTRS128
617_mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
618{
619 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
620 (__v8hi)_mm_packus_epi32(__A, __B),
621 (__v8hi)__W);
622}
623
624static __inline__ __m256i __DEFAULT_FN_ATTRS256
625_mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B)
626{
627 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
628 (__v16hi)_mm256_packus_epi32(__A, __B),
629 (__v16hi)_mm256_setzero_si256());
630}
631
632static __inline__ __m256i __DEFAULT_FN_ATTRS256
633_mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
634{
635 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
636 (__v16hi)_mm256_packus_epi32(__A, __B),
637 (__v16hi)__W);
638}
639
640static __inline__ __m128i __DEFAULT_FN_ATTRS128
641_mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B)
642{
643 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
644 (__v16qi)_mm_packus_epi16(__A, __B),
645 (__v16qi)_mm_setzero_si128());
646}
647
648static __inline__ __m128i __DEFAULT_FN_ATTRS128
649_mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
650{
651 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
652 (__v16qi)_mm_packus_epi16(__A, __B),
653 (__v16qi)__W);
654}
655
656static __inline__ __m256i __DEFAULT_FN_ATTRS256
657_mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B)
658{
659 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
660 (__v32qi)_mm256_packus_epi16(__A, __B),
661 (__v32qi)_mm256_setzero_si256());
662}
663
664static __inline__ __m256i __DEFAULT_FN_ATTRS256
665_mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
666{
667 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
668 (__v32qi)_mm256_packus_epi16(__A, __B),
669 (__v32qi)__W);
670}
671
672static __inline__ __m128i __DEFAULT_FN_ATTRS128
673_mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
674{
675 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
676 (__v16qi)_mm_adds_epi8(__A, __B),
677 (__v16qi)__W);
678}
679
680static __inline__ __m128i __DEFAULT_FN_ATTRS128
681_mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B)
682{
683 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
684 (__v16qi)_mm_adds_epi8(__A, __B),
685 (__v16qi)_mm_setzero_si128());
686}
687
688static __inline__ __m256i __DEFAULT_FN_ATTRS256
689_mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
690{
691 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
692 (__v32qi)_mm256_adds_epi8(__A, __B),
693 (__v32qi)__W);
694}
695
696static __inline__ __m256i __DEFAULT_FN_ATTRS256
697_mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B)
698{
699 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
700 (__v32qi)_mm256_adds_epi8(__A, __B),
701 (__v32qi)_mm256_setzero_si256());
702}
703
704static __inline__ __m128i __DEFAULT_FN_ATTRS128
705_mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
706{
707 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
708 (__v8hi)_mm_adds_epi16(__A, __B),
709 (__v8hi)__W);
710}
711
712static __inline__ __m128i __DEFAULT_FN_ATTRS128
713_mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B)
714{
715 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
716 (__v8hi)_mm_adds_epi16(__A, __B),
717 (__v8hi)_mm_setzero_si128());
718}
719
720static __inline__ __m256i __DEFAULT_FN_ATTRS256
721_mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
722{
723 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
724 (__v16hi)_mm256_adds_epi16(__A, __B),
725 (__v16hi)__W);
726}
727
728static __inline__ __m256i __DEFAULT_FN_ATTRS256
729_mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B)
730{
731 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
732 (__v16hi)_mm256_adds_epi16(__A, __B),
733 (__v16hi)_mm256_setzero_si256());
734}
735
736static __inline__ __m128i __DEFAULT_FN_ATTRS128
737_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
738{
739 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
740 (__v16qi)_mm_adds_epu8(__A, __B),
741 (__v16qi)__W);
742}
743
744static __inline__ __m128i __DEFAULT_FN_ATTRS128
745_mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B)
746{
747 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
748 (__v16qi)_mm_adds_epu8(__A, __B),
749 (__v16qi)_mm_setzero_si128());
750}
751
752static __inline__ __m256i __DEFAULT_FN_ATTRS256
753_mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
754{
755 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
756 (__v32qi)_mm256_adds_epu8(__A, __B),
757 (__v32qi)__W);
758}
759
760static __inline__ __m256i __DEFAULT_FN_ATTRS256
761_mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B)
762{
763 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
764 (__v32qi)_mm256_adds_epu8(__A, __B),
765 (__v32qi)_mm256_setzero_si256());
766}
767
768static __inline__ __m128i __DEFAULT_FN_ATTRS128
769_mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
770{
771 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
772 (__v8hi)_mm_adds_epu16(__A, __B),
773 (__v8hi)__W);
774}
775
776static __inline__ __m128i __DEFAULT_FN_ATTRS128
777_mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B)
778{
779 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
780 (__v8hi)_mm_adds_epu16(__A, __B),
781 (__v8hi)_mm_setzero_si128());
782}
783
784static __inline__ __m256i __DEFAULT_FN_ATTRS256
785_mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
786{
787 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
788 (__v16hi)_mm256_adds_epu16(__A, __B),
789 (__v16hi)__W);
790}
791
792static __inline__ __m256i __DEFAULT_FN_ATTRS256
793_mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B)
794{
795 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
796 (__v16hi)_mm256_adds_epu16(__A, __B),
797 (__v16hi)_mm256_setzero_si256());
798}
799
800static __inline__ __m128i __DEFAULT_FN_ATTRS128
801_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
802{
803 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
804 (__v16qi)_mm_avg_epu8(__A, __B),
805 (__v16qi)__W);
806}
807
808static __inline__ __m128i __DEFAULT_FN_ATTRS128
809_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B)
810{
811 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
812 (__v16qi)_mm_avg_epu8(__A, __B),
813 (__v16qi)_mm_setzero_si128());
814}
815
816static __inline__ __m256i __DEFAULT_FN_ATTRS256
817_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
818{
819 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
820 (__v32qi)_mm256_avg_epu8(__A, __B),
821 (__v32qi)__W);
822}
823
824static __inline__ __m256i __DEFAULT_FN_ATTRS256
825_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B)
826{
827 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
828 (__v32qi)_mm256_avg_epu8(__A, __B),
829 (__v32qi)_mm256_setzero_si256());
830}
831
832static __inline__ __m128i __DEFAULT_FN_ATTRS128
833_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
834{
835 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
836 (__v8hi)_mm_avg_epu16(__A, __B),
837 (__v8hi)__W);
838}
839
840static __inline__ __m128i __DEFAULT_FN_ATTRS128
841_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B)
842{
843 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
844 (__v8hi)_mm_avg_epu16(__A, __B),
845 (__v8hi)_mm_setzero_si128());
846}
847
848static __inline__ __m256i __DEFAULT_FN_ATTRS256
849_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
850{
851 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
852 (__v16hi)_mm256_avg_epu16(__A, __B),
853 (__v16hi)__W);
854}
855
856static __inline__ __m256i __DEFAULT_FN_ATTRS256
857_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B)
858{
859 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
860 (__v16hi)_mm256_avg_epu16(__A, __B),
861 (__v16hi)_mm256_setzero_si256());
862}
863
864static __inline__ __m128i __DEFAULT_FN_ATTRS128
865_mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B)
866{
867 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
868 (__v16qi)_mm_max_epi8(__A, __B),
869 (__v16qi)_mm_setzero_si128());
870}
871
872static __inline__ __m128i __DEFAULT_FN_ATTRS128
873_mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
874{
875 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
876 (__v16qi)_mm_max_epi8(__A, __B),
877 (__v16qi)__W);
878}
879
880static __inline__ __m256i __DEFAULT_FN_ATTRS256
881_mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B)
882{
883 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
884 (__v32qi)_mm256_max_epi8(__A, __B),
885 (__v32qi)_mm256_setzero_si256());
886}
887
888static __inline__ __m256i __DEFAULT_FN_ATTRS256
889_mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
890{
891 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
892 (__v32qi)_mm256_max_epi8(__A, __B),
893 (__v32qi)__W);
894}
895
896static __inline__ __m128i __DEFAULT_FN_ATTRS128
897_mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B)
898{
899 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
900 (__v8hi)_mm_max_epi16(__A, __B),
901 (__v8hi)_mm_setzero_si128());
902}
903
904static __inline__ __m128i __DEFAULT_FN_ATTRS128
905_mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
906{
907 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
908 (__v8hi)_mm_max_epi16(__A, __B),
909 (__v8hi)__W);
910}
911
912static __inline__ __m256i __DEFAULT_FN_ATTRS256
913_mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B)
914{
915 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
916 (__v16hi)_mm256_max_epi16(__A, __B),
917 (__v16hi)_mm256_setzero_si256());
918}
919
920static __inline__ __m256i __DEFAULT_FN_ATTRS256
921_mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
922{
923 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
924 (__v16hi)_mm256_max_epi16(__A, __B),
925 (__v16hi)__W);
926}
927
928static __inline__ __m128i __DEFAULT_FN_ATTRS128
929_mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B)
930{
931 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
932 (__v16qi)_mm_max_epu8(__A, __B),
933 (__v16qi)_mm_setzero_si128());
934}
935
936static __inline__ __m128i __DEFAULT_FN_ATTRS128
937_mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
938{
939 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
940 (__v16qi)_mm_max_epu8(__A, __B),
941 (__v16qi)__W);
942}
943
944static __inline__ __m256i __DEFAULT_FN_ATTRS256
945_mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
946{
947 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
948 (__v32qi)_mm256_max_epu8(__A, __B),
949 (__v32qi)_mm256_setzero_si256());
950}
951
952static __inline__ __m256i __DEFAULT_FN_ATTRS256
953_mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
954{
955 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
956 (__v32qi)_mm256_max_epu8(__A, __B),
957 (__v32qi)__W);
958}
959
960static __inline__ __m128i __DEFAULT_FN_ATTRS128
961_mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B)
962{
963 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
964 (__v8hi)_mm_max_epu16(__A, __B),
965 (__v8hi)_mm_setzero_si128());
966}
967
968static __inline__ __m128i __DEFAULT_FN_ATTRS128
969_mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
970{
971 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
972 (__v8hi)_mm_max_epu16(__A, __B),
973 (__v8hi)__W);
974}
975
976static __inline__ __m256i __DEFAULT_FN_ATTRS256
977_mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B)
978{
979 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
980 (__v16hi)_mm256_max_epu16(__A, __B),
981 (__v16hi)_mm256_setzero_si256());
982}
983
984static __inline__ __m256i __DEFAULT_FN_ATTRS256
985_mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
986{
987 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
988 (__v16hi)_mm256_max_epu16(__A, __B),
989 (__v16hi)__W);
990}
991
992static __inline__ __m128i __DEFAULT_FN_ATTRS128
993_mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B)
994{
995 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
996 (__v16qi)_mm_min_epi8(__A, __B),
997 (__v16qi)_mm_setzero_si128());
998}
999
1000static __inline__ __m128i __DEFAULT_FN_ATTRS128
1001_mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
1002{
1003 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1004 (__v16qi)_mm_min_epi8(__A, __B),
1005 (__v16qi)__W);
1006}
1007
1008static __inline__ __m256i __DEFAULT_FN_ATTRS256
1009_mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B)
1010{
1011 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1012 (__v32qi)_mm256_min_epi8(__A, __B),
1013 (__v32qi)_mm256_setzero_si256());
1014}
1015
1016static __inline__ __m256i __DEFAULT_FN_ATTRS256
1017_mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
1018{
1019 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1020 (__v32qi)_mm256_min_epi8(__A, __B),
1021 (__v32qi)__W);
1022}
1023
1024static __inline__ __m128i __DEFAULT_FN_ATTRS128
1025_mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B)
1026{
1027 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1028 (__v8hi)_mm_min_epi16(__A, __B),
1029 (__v8hi)_mm_setzero_si128());
1030}
1031
1032static __inline__ __m128i __DEFAULT_FN_ATTRS128
1033_mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
1034{
1035 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1036 (__v8hi)_mm_min_epi16(__A, __B),
1037 (__v8hi)__W);
1038}
1039
1040static __inline__ __m256i __DEFAULT_FN_ATTRS256
1041_mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B)
1042{
1043 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1044 (__v16hi)_mm256_min_epi16(__A, __B),
1045 (__v16hi)_mm256_setzero_si256());
1046}
1047
1048static __inline__ __m256i __DEFAULT_FN_ATTRS256
1049_mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
1050{
1051 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1052 (__v16hi)_mm256_min_epi16(__A, __B),
1053 (__v16hi)__W);
1054}
1055
1056static __inline__ __m128i __DEFAULT_FN_ATTRS128
1057_mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B)
1058{
1059 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1060 (__v16qi)_mm_min_epu8(__A, __B),
1061 (__v16qi)_mm_setzero_si128());
1062}
1063
1064static __inline__ __m128i __DEFAULT_FN_ATTRS128
1065_mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
1066{
1067 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1068 (__v16qi)_mm_min_epu8(__A, __B),
1069 (__v16qi)__W);
1070}
1071
1072static __inline__ __m256i __DEFAULT_FN_ATTRS256
1073_mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
1074{
1075 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1076 (__v32qi)_mm256_min_epu8(__A, __B),
1077 (__v32qi)_mm256_setzero_si256());
1078}
1079
1080static __inline__ __m256i __DEFAULT_FN_ATTRS256
1081_mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
1082{
1083 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1084 (__v32qi)_mm256_min_epu8(__A, __B),
1085 (__v32qi)__W);
1086}
1087
1088static __inline__ __m128i __DEFAULT_FN_ATTRS128
1089_mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B)
1090{
1091 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1092 (__v8hi)_mm_min_epu16(__A, __B),
1093 (__v8hi)_mm_setzero_si128());
1094}
1095
1096static __inline__ __m128i __DEFAULT_FN_ATTRS128
1097_mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
1098{
1099 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1100 (__v8hi)_mm_min_epu16(__A, __B),
1101 (__v8hi)__W);
1102}
1103
1104static __inline__ __m256i __DEFAULT_FN_ATTRS256
1105_mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B)
1106{
1107 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1108 (__v16hi)_mm256_min_epu16(__A, __B),
1109 (__v16hi)_mm256_setzero_si256());
1110}
1111
1112static __inline__ __m256i __DEFAULT_FN_ATTRS256
1113_mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
1114{
1115 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1116 (__v16hi)_mm256_min_epu16(__A, __B),
1117 (__v16hi)__W);
1118}
1119
1120static __inline__ __m128i __DEFAULT_FN_ATTRS128
1121_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
1122{
1123 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1124 (__v16qi)_mm_shuffle_epi8(__A, __B),
1125 (__v16qi)__W);
1126}
1127
1128static __inline__ __m128i __DEFAULT_FN_ATTRS128
1129_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B)
1130{
1131 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1132 (__v16qi)_mm_shuffle_epi8(__A, __B),
1133 (__v16qi)_mm_setzero_si128());
1134}
1135
1136static __inline__ __m256i __DEFAULT_FN_ATTRS256
1137_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
1138{
1139 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1140 (__v32qi)_mm256_shuffle_epi8(__A, __B),
1141 (__v32qi)__W);
1142}
1143
1144static __inline__ __m256i __DEFAULT_FN_ATTRS256
1145_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B)
1146{
1147 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1148 (__v32qi)_mm256_shuffle_epi8(__A, __B),
1149 (__v32qi)_mm256_setzero_si256());
1150}
1151
1152static __inline__ __m128i __DEFAULT_FN_ATTRS128
1153_mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
1154{
1155 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1156 (__v16qi)_mm_subs_epi8(__A, __B),
1157 (__v16qi)__W);
1158}
1159
1160static __inline__ __m128i __DEFAULT_FN_ATTRS128
1161_mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B)
1162{
1163 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1164 (__v16qi)_mm_subs_epi8(__A, __B),
1165 (__v16qi)_mm_setzero_si128());
1166}
1167
1168static __inline__ __m256i __DEFAULT_FN_ATTRS256
1169_mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
1170{
1171 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1172 (__v32qi)_mm256_subs_epi8(__A, __B),
1173 (__v32qi)__W);
1174}
1175
1176static __inline__ __m256i __DEFAULT_FN_ATTRS256
1177_mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B)
1178{
1179 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1180 (__v32qi)_mm256_subs_epi8(__A, __B),
1181 (__v32qi)_mm256_setzero_si256());
1182}
1183
1184static __inline__ __m128i __DEFAULT_FN_ATTRS128
1185_mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1186{
1187 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1188 (__v8hi)_mm_subs_epi16(__A, __B),
1189 (__v8hi)__W);
1190}
1191
1192static __inline__ __m128i __DEFAULT_FN_ATTRS128
1193_mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B)
1194{
1195 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1196 (__v8hi)_mm_subs_epi16(__A, __B),
1197 (__v8hi)_mm_setzero_si128());
1198}
1199
1200static __inline__ __m256i __DEFAULT_FN_ATTRS256
1201_mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
1202{
1203 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1204 (__v16hi)_mm256_subs_epi16(__A, __B),
1205 (__v16hi)__W);
1206}
1207
1208static __inline__ __m256i __DEFAULT_FN_ATTRS256
1209_mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B)
1210{
1211 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1212 (__v16hi)_mm256_subs_epi16(__A, __B),
1213 (__v16hi)_mm256_setzero_si256());
1214}
1215
1216static __inline__ __m128i __DEFAULT_FN_ATTRS128
1217_mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
1218{
1219 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1220 (__v16qi)_mm_subs_epu8(__A, __B),
1221 (__v16qi)__W);
1222}
1223
1224static __inline__ __m128i __DEFAULT_FN_ATTRS128
1225_mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B)
1226{
1227 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1228 (__v16qi)_mm_subs_epu8(__A, __B),
1229 (__v16qi)_mm_setzero_si128());
1230}
1231
1232static __inline__ __m256i __DEFAULT_FN_ATTRS256
1233_mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
1234{
1235 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1236 (__v32qi)_mm256_subs_epu8(__A, __B),
1237 (__v32qi)__W);
1238}
1239
1240static __inline__ __m256i __DEFAULT_FN_ATTRS256
1241_mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B)
1242{
1243 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1244 (__v32qi)_mm256_subs_epu8(__A, __B),
1245 (__v32qi)_mm256_setzero_si256());
1246}
1247
1248static __inline__ __m128i __DEFAULT_FN_ATTRS128
1249_mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1250{
1251 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1252 (__v8hi)_mm_subs_epu16(__A, __B),
1253 (__v8hi)__W);
1254}
1255
1256static __inline__ __m128i __DEFAULT_FN_ATTRS128
1257_mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B)
1258{
1259 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1260 (__v8hi)_mm_subs_epu16(__A, __B),
1261 (__v8hi)_mm_setzero_si128());
1262}
1263
1264static __inline__ __m256i __DEFAULT_FN_ATTRS256
1265_mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A,
1266 __m256i __B) {
1267 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1268 (__v16hi)_mm256_subs_epu16(__A, __B),
1269 (__v16hi)__W);
1270}
1271
1272static __inline__ __m256i __DEFAULT_FN_ATTRS256
1273_mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B)
1274{
1275 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1276 (__v16hi)_mm256_subs_epu16(__A, __B),
1277 (__v16hi)_mm256_setzero_si256());
1278}
1279
1280static __inline__ __m128i __DEFAULT_FN_ATTRS128
1281_mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B)
1282{
1283 return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
1284 (__v8hi) __B);
1285}
1286
1287static __inline__ __m128i __DEFAULT_FN_ATTRS128
1288_mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I,
1289 __m128i __B)
1290{
1291 return (__m128i)__builtin_ia32_selectw_128(__U,
1292 (__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
1293 (__v8hi)__A);
1294}
1295
1296static __inline__ __m128i __DEFAULT_FN_ATTRS128
1297_mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U,
1298 __m128i __B)
1299{
1300 return (__m128i)__builtin_ia32_selectw_128(__U,
1301 (__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
1302 (__v8hi)__I);
1303}
1304
1305static __inline__ __m128i __DEFAULT_FN_ATTRS128
1306_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I,
1307 __m128i __B)
1308{
1309 return (__m128i)__builtin_ia32_selectw_128(__U,
1310 (__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
1311 (__v8hi)_mm_setzero_si128());
1312}
1313
1314static __inline__ __m256i __DEFAULT_FN_ATTRS256
1315_mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B)
1316{
1317 return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
1318 (__v16hi)__B);
1319}
1320
1321static __inline__ __m256i __DEFAULT_FN_ATTRS256
1322_mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I,
1323 __m256i __B)
1324{
1325 return (__m256i)__builtin_ia32_selectw_256(__U,
1326 (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
1327 (__v16hi)__A);
1328}
1329
1330static __inline__ __m256i __DEFAULT_FN_ATTRS256
1331_mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U,
1332 __m256i __B)
1333{
1334 return (__m256i)__builtin_ia32_selectw_256(__U,
1335 (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
1336 (__v16hi)__I);
1337}
1338
1339static __inline__ __m256i __DEFAULT_FN_ATTRS256
1340_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, __m256i __I,
1341 __m256i __B)
1342{
1343 return (__m256i)__builtin_ia32_selectw_256(__U,
1344 (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
1345 (__v16hi)_mm256_setzero_si256());
1346}
1347
1348static __inline__ __m128i __DEFAULT_FN_ATTRS128
1349_mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
1350 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1351 (__v8hi)_mm_maddubs_epi16(__X, __Y),
1352 (__v8hi)__W);
1353}
1354
1355static __inline__ __m128i __DEFAULT_FN_ATTRS128
1356_mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
1357 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1358 (__v8hi)_mm_maddubs_epi16(__X, __Y),
1359 (__v8hi)_mm_setzero_si128());
1360}
1361
1362static __inline__ __m256i __DEFAULT_FN_ATTRS256
1363_mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X,
1364 __m256i __Y) {
1365 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1366 (__v16hi)_mm256_maddubs_epi16(__X, __Y),
1367 (__v16hi)__W);
1368}
1369
1370static __inline__ __m256i __DEFAULT_FN_ATTRS256
1371_mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
1372 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1373 (__v16hi)_mm256_maddubs_epi16(__X, __Y),
1374 (__v16hi)_mm256_setzero_si256());
1375}
1376
1377static __inline__ __m128i __DEFAULT_FN_ATTRS128
1378_mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1379 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1380 (__v4si)_mm_madd_epi16(__A, __B),
1381 (__v4si)__W);
1382}
1383
1384static __inline__ __m128i __DEFAULT_FN_ATTRS128
1385_mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1386 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1387 (__v4si)_mm_madd_epi16(__A, __B),
1388 (__v4si)_mm_setzero_si128());
1389}
1390
1391static __inline__ __m256i __DEFAULT_FN_ATTRS256
1392_mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
1393 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1394 (__v8si)_mm256_madd_epi16(__A, __B),
1395 (__v8si)__W);
1396}
1397
1398static __inline__ __m256i __DEFAULT_FN_ATTRS256
1399_mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B) {
1400 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1401 (__v8si)_mm256_madd_epi16(__A, __B),
1402 (__v8si)_mm256_setzero_si256());
1403}
1404
1405static __inline__ __m128i __DEFAULT_FN_ATTRS128
1406_mm_cvtsepi16_epi8 (__m128i __A) {
1407 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1408 (__v16qi) _mm_setzero_si128(),
1409 (__mmask8) -1);
1410}
1411
1412static __inline__ __m128i __DEFAULT_FN_ATTRS128
1413_mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
1414 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1415 (__v16qi) __O,
1416 __M);
1417}
1418
1419static __inline__ __m128i __DEFAULT_FN_ATTRS128
1421 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1422 (__v16qi) _mm_setzero_si128(),
1423 __M);
1424}
1425
1426static __inline__ __m128i __DEFAULT_FN_ATTRS256
1428 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1429 (__v16qi) _mm_setzero_si128(),
1430 (__mmask16) -1);
1431}
1432
1433static __inline__ __m128i __DEFAULT_FN_ATTRS256
1434_mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
1435 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1436 (__v16qi) __O,
1437 __M);
1438}
1439
1440static __inline__ __m128i __DEFAULT_FN_ATTRS256
1442 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1443 (__v16qi) _mm_setzero_si128(),
1444 __M);
1445}
1446
1447static __inline__ __m128i __DEFAULT_FN_ATTRS128
1448_mm_cvtusepi16_epi8 (__m128i __A) {
1449 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1450 (__v16qi) _mm_setzero_si128(),
1451 (__mmask8) -1);
1452}
1453
1454static __inline__ __m128i __DEFAULT_FN_ATTRS128
1455_mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
1456 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1457 (__v16qi) __O,
1458 __M);
1459}
1460
1461static __inline__ __m128i __DEFAULT_FN_ATTRS128
1463 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1464 (__v16qi) _mm_setzero_si128(),
1465 __M);
1466}
1467
1468static __inline__ __m128i __DEFAULT_FN_ATTRS256
1470 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1471 (__v16qi) _mm_setzero_si128(),
1472 (__mmask16) -1);
1473}
1474
1475static __inline__ __m128i __DEFAULT_FN_ATTRS256
1476_mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
1477 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1478 (__v16qi) __O,
1479 __M);
1480}
1481
1482static __inline__ __m128i __DEFAULT_FN_ATTRS256
1484 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1485 (__v16qi) _mm_setzero_si128(),
1486 __M);
1487}
1488
1489static __inline__ __m128i __DEFAULT_FN_ATTRS128
1490_mm_cvtepi16_epi8 (__m128i __A) {
1491 return (__m128i)__builtin_shufflevector(
1492 __builtin_convertvector((__v8hi)__A, __v8qi),
1493 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
1494 12, 13, 14, 15);
1495}
1496
1497static __inline__ __m128i __DEFAULT_FN_ATTRS128
1498_mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
1499 return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
1500 (__v16qi) __O,
1501 __M);
1502}
1503
1504static __inline__ __m128i __DEFAULT_FN_ATTRS128
1506 return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
1507 (__v16qi) _mm_setzero_si128(),
1508 __M);
1509}
1510
1511static __inline__ void __DEFAULT_FN_ATTRS128
1513{
1514 __builtin_ia32_pmovwb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1515}
1516
1517
1518static __inline__ void __DEFAULT_FN_ATTRS128
1520{
1521 __builtin_ia32_pmovswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1522}
1523
1524static __inline__ void __DEFAULT_FN_ATTRS128
1526{
1527 __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1528}
1529
1530static __inline__ __m128i __DEFAULT_FN_ATTRS256
1532 return (__m128i)__builtin_convertvector((__v16hi) __A, __v16qi);
1533}
1534
1535static __inline__ __m128i __DEFAULT_FN_ATTRS256
1536_mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
1537 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1538 (__v16qi)_mm256_cvtepi16_epi8(__A),
1539 (__v16qi)__O);
1540}
1541
1542static __inline__ __m128i __DEFAULT_FN_ATTRS256
1544 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1545 (__v16qi)_mm256_cvtepi16_epi8(__A),
1546 (__v16qi)_mm_setzero_si128());
1547}
1548
1549static __inline__ void __DEFAULT_FN_ATTRS256
1551{
1552 __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
1553}
1554
1555static __inline__ void __DEFAULT_FN_ATTRS256
1557{
1558 __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
1559}
1560
1561static __inline__ void __DEFAULT_FN_ATTRS256
1563{
1564 __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M);
1565}
1566
1567static __inline__ __m128i __DEFAULT_FN_ATTRS128
1568_mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
1569 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1570 (__v8hi)_mm_mulhrs_epi16(__X, __Y),
1571 (__v8hi)__W);
1572}
1573
1574static __inline__ __m128i __DEFAULT_FN_ATTRS128
1575_mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
1576 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1577 (__v8hi)_mm_mulhrs_epi16(__X, __Y),
1578 (__v8hi)_mm_setzero_si128());
1579}
1580
1581static __inline__ __m256i __DEFAULT_FN_ATTRS256
1582_mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) {
1583 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1584 (__v16hi)_mm256_mulhrs_epi16(__X, __Y),
1585 (__v16hi)__W);
1586}
1587
1588static __inline__ __m256i __DEFAULT_FN_ATTRS256
1589_mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
1590 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1591 (__v16hi)_mm256_mulhrs_epi16(__X, __Y),
1592 (__v16hi)_mm256_setzero_si256());
1593}
1594
1595static __inline__ __m128i __DEFAULT_FN_ATTRS128
1596_mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1597 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1598 (__v8hi)_mm_mulhi_epu16(__A, __B),
1599 (__v8hi)__W);
1600}
1601
1602static __inline__ __m128i __DEFAULT_FN_ATTRS128
1603_mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
1604 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1605 (__v8hi)_mm_mulhi_epu16(__A, __B),
1606 (__v8hi)_mm_setzero_si128());
1607}
1608
1609static __inline__ __m256i __DEFAULT_FN_ATTRS256
1610_mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1611 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1612 (__v16hi)_mm256_mulhi_epu16(__A, __B),
1613 (__v16hi)__W);
1614}
1615
1616static __inline__ __m256i __DEFAULT_FN_ATTRS256
1617_mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
1618 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1619 (__v16hi)_mm256_mulhi_epu16(__A, __B),
1620 (__v16hi)_mm256_setzero_si256());
1621}
1622
1623static __inline__ __m128i __DEFAULT_FN_ATTRS128
1624_mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1625 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1626 (__v8hi)_mm_mulhi_epi16(__A, __B),
1627 (__v8hi)__W);
1628}
1629
1630static __inline__ __m128i __DEFAULT_FN_ATTRS128
1631_mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1632 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1633 (__v8hi)_mm_mulhi_epi16(__A, __B),
1634 (__v8hi)_mm_setzero_si128());
1635}
1636
1637static __inline__ __m256i __DEFAULT_FN_ATTRS256
1638_mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1639 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1640 (__v16hi)_mm256_mulhi_epi16(__A, __B),
1641 (__v16hi)__W);
1642}
1643
1644static __inline__ __m256i __DEFAULT_FN_ATTRS256
1645_mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
1646 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1647 (__v16hi)_mm256_mulhi_epi16(__A, __B),
1648 (__v16hi)_mm256_setzero_si256());
1649}
1650
1651static __inline__ __m128i __DEFAULT_FN_ATTRS128
1652_mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
1653 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1654 (__v16qi)_mm_unpackhi_epi8(__A, __B),
1655 (__v16qi)__W);
1656}
1657
1658static __inline__ __m128i __DEFAULT_FN_ATTRS128
1659_mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
1660 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1661 (__v16qi)_mm_unpackhi_epi8(__A, __B),
1662 (__v16qi)_mm_setzero_si128());
1663}
1664
1665static __inline__ __m256i __DEFAULT_FN_ATTRS256
1666_mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
1667 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1668 (__v32qi)_mm256_unpackhi_epi8(__A, __B),
1669 (__v32qi)__W);
1670}
1671
1672static __inline__ __m256i __DEFAULT_FN_ATTRS256
1673_mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
1674 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1675 (__v32qi)_mm256_unpackhi_epi8(__A, __B),
1676 (__v32qi)_mm256_setzero_si256());
1677}
1678
1679static __inline__ __m128i __DEFAULT_FN_ATTRS128
1680_mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1681 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1682 (__v8hi)_mm_unpackhi_epi16(__A, __B),
1683 (__v8hi)__W);
1684}
1685
1686static __inline__ __m128i __DEFAULT_FN_ATTRS128
1687_mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1688 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1689 (__v8hi)_mm_unpackhi_epi16(__A, __B),
1690 (__v8hi) _mm_setzero_si128());
1691}
1692
1693static __inline__ __m256i __DEFAULT_FN_ATTRS256
1694_mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1695 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1696 (__v16hi)_mm256_unpackhi_epi16(__A, __B),
1697 (__v16hi)__W);
1698}
1699
1700static __inline__ __m256i __DEFAULT_FN_ATTRS256
1701_mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
1702 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1703 (__v16hi)_mm256_unpackhi_epi16(__A, __B),
1704 (__v16hi)_mm256_setzero_si256());
1705}
1706
1707static __inline__ __m128i __DEFAULT_FN_ATTRS128
1708_mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
1709 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1710 (__v16qi)_mm_unpacklo_epi8(__A, __B),
1711 (__v16qi)__W);
1712}
1713
1714static __inline__ __m128i __DEFAULT_FN_ATTRS128
1715_mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
1716 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1717 (__v16qi)_mm_unpacklo_epi8(__A, __B),
1718 (__v16qi)_mm_setzero_si128());
1719}
1720
1721static __inline__ __m256i __DEFAULT_FN_ATTRS256
1722_mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
1723 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1724 (__v32qi)_mm256_unpacklo_epi8(__A, __B),
1725 (__v32qi)__W);
1726}
1727
1728static __inline__ __m256i __DEFAULT_FN_ATTRS256
1729_mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
1730 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1731 (__v32qi)_mm256_unpacklo_epi8(__A, __B),
1732 (__v32qi)_mm256_setzero_si256());
1733}
1734
1735static __inline__ __m128i __DEFAULT_FN_ATTRS128
1736_mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1737 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1738 (__v8hi)_mm_unpacklo_epi16(__A, __B),
1739 (__v8hi)__W);
1740}
1741
1742static __inline__ __m128i __DEFAULT_FN_ATTRS128
1743_mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1744 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1745 (__v8hi)_mm_unpacklo_epi16(__A, __B),
1746 (__v8hi) _mm_setzero_si128());
1747}
1748
1749static __inline__ __m256i __DEFAULT_FN_ATTRS256
1750_mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1751 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1752 (__v16hi)_mm256_unpacklo_epi16(__A, __B),
1753 (__v16hi)__W);
1754}
1755
1756static __inline__ __m256i __DEFAULT_FN_ATTRS256
1757_mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
1758 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1759 (__v16hi)_mm256_unpacklo_epi16(__A, __B),
1760 (__v16hi)_mm256_setzero_si256());
1761}
1762
1763static __inline__ __m128i __DEFAULT_FN_ATTRS128
1764_mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
1765{
1766 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1767 (__v8hi)_mm_cvtepi8_epi16(__A),
1768 (__v8hi)__W);
1769}
1770
1771static __inline__ __m128i __DEFAULT_FN_ATTRS128
1773{
1774 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1775 (__v8hi)_mm_cvtepi8_epi16(__A),
1776 (__v8hi)_mm_setzero_si128());
1777}
1778
1779static __inline__ __m256i __DEFAULT_FN_ATTRS256
1780_mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
1781{
1782 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1783 (__v16hi)_mm256_cvtepi8_epi16(__A),
1784 (__v16hi)__W);
1785}
1786
1787static __inline__ __m256i __DEFAULT_FN_ATTRS256
1789{
1790 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1791 (__v16hi)_mm256_cvtepi8_epi16(__A),
1792 (__v16hi)_mm256_setzero_si256());
1793}
1794
1795
1796static __inline__ __m128i __DEFAULT_FN_ATTRS128
1797_mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
1798{
1799 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1800 (__v8hi)_mm_cvtepu8_epi16(__A),
1801 (__v8hi)__W);
1802}
1803
1804static __inline__ __m128i __DEFAULT_FN_ATTRS128
1806{
1807 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1808 (__v8hi)_mm_cvtepu8_epi16(__A),
1809 (__v8hi)_mm_setzero_si128());
1810}
1811
1812static __inline__ __m256i __DEFAULT_FN_ATTRS256
1813_mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
1814{
1815 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1816 (__v16hi)_mm256_cvtepu8_epi16(__A),
1817 (__v16hi)__W);
1818}
1819
1820static __inline__ __m256i __DEFAULT_FN_ATTRS256
1822{
1823 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1824 (__v16hi)_mm256_cvtepu8_epi16(__A),
1825 (__v16hi)_mm256_setzero_si256());
1826}
1827
1828
1829#define _mm_mask_shufflehi_epi16(W, U, A, imm) \
1830 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1831 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
1832 (__v8hi)(__m128i)(W)))
1833
1834#define _mm_maskz_shufflehi_epi16(U, A, imm) \
1835 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1836 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
1837 (__v8hi)_mm_setzero_si128()))
1838
1839#define _mm256_mask_shufflehi_epi16(W, U, A, imm) \
1840 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1841 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
1842 (__v16hi)(__m256i)(W)))
1843
1844#define _mm256_maskz_shufflehi_epi16(U, A, imm) \
1845 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1846 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
1847 (__v16hi)_mm256_setzero_si256()))
1848
1849#define _mm_mask_shufflelo_epi16(W, U, A, imm) \
1850 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1851 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
1852 (__v8hi)(__m128i)(W)))
1853
1854#define _mm_maskz_shufflelo_epi16(U, A, imm) \
1855 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1856 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
1857 (__v8hi)_mm_setzero_si128()))
1858
1859#define _mm256_mask_shufflelo_epi16(W, U, A, imm) \
1860 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1861 (__v16hi)_mm256_shufflelo_epi16((A), \
1862 (imm)), \
1863 (__v16hi)(__m256i)(W)))
1864
1865#define _mm256_maskz_shufflelo_epi16(U, A, imm) \
1866 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1867 (__v16hi)_mm256_shufflelo_epi16((A), \
1868 (imm)), \
1869 (__v16hi)_mm256_setzero_si256()))
1870
1871static __inline__ __m256i __DEFAULT_FN_ATTRS256
1872_mm256_sllv_epi16(__m256i __A, __m256i __B)
1873{
1874 return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B);
1875}
1876
1877static __inline__ __m256i __DEFAULT_FN_ATTRS256
1878_mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
1879{
1880 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1881 (__v16hi)_mm256_sllv_epi16(__A, __B),
1882 (__v16hi)__W);
1883}
1884
1885static __inline__ __m256i __DEFAULT_FN_ATTRS256
1886_mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
1887{
1888 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1889 (__v16hi)_mm256_sllv_epi16(__A, __B),
1890 (__v16hi)_mm256_setzero_si256());
1891}
1892
1893static __inline__ __m128i __DEFAULT_FN_ATTRS128
1894_mm_sllv_epi16(__m128i __A, __m128i __B)
1895{
1896 return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B);
1897}
1898
1899static __inline__ __m128i __DEFAULT_FN_ATTRS128
1900_mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1901{
1902 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1903 (__v8hi)_mm_sllv_epi16(__A, __B),
1904 (__v8hi)__W);
1905}
1906
1907static __inline__ __m128i __DEFAULT_FN_ATTRS128
1908_mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
1909{
1910 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1911 (__v8hi)_mm_sllv_epi16(__A, __B),
1912 (__v8hi)_mm_setzero_si128());
1913}
1914
1915static __inline__ __m128i __DEFAULT_FN_ATTRS128
1916_mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1917{
1918 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1919 (__v8hi)_mm_sll_epi16(__A, __B),
1920 (__v8hi)__W);
1921}
1922
1923static __inline__ __m128i __DEFAULT_FN_ATTRS128
1924_mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
1925{
1926 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1927 (__v8hi)_mm_sll_epi16(__A, __B),
1928 (__v8hi)_mm_setzero_si128());
1929}
1930
1931static __inline__ __m256i __DEFAULT_FN_ATTRS256
1932_mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
1933{
1934 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1935 (__v16hi)_mm256_sll_epi16(__A, __B),
1936 (__v16hi)__W);
1937}
1938
1939static __inline__ __m256i __DEFAULT_FN_ATTRS256
1940_mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B)
1941{
1942 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1943 (__v16hi)_mm256_sll_epi16(__A, __B),
1944 (__v16hi)_mm256_setzero_si256());
1945}
1946
1947static __inline__ __m128i __DEFAULT_FN_ATTRS128
1948_mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
1949{
1950 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1951 (__v8hi)_mm_slli_epi16(__A, (int)__B),
1952 (__v8hi)__W);
1953}
1954
1955static __inline__ __m128i __DEFAULT_FN_ATTRS128
1956_mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
1957{
1958 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1959 (__v8hi)_mm_slli_epi16(__A, (int)__B),
1960 (__v8hi)_mm_setzero_si128());
1961}
1962
1963static __inline__ __m256i __DEFAULT_FN_ATTRS256
1964_mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A,
1965 unsigned int __B)
1966{
1967 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1968 (__v16hi)_mm256_slli_epi16(__A, (int)__B),
1969 (__v16hi)__W);
1970}
1971
1972static __inline__ __m256i __DEFAULT_FN_ATTRS256
1973_mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
1974{
1975 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1976 (__v16hi)_mm256_slli_epi16(__A, (int)__B),
1977 (__v16hi)_mm256_setzero_si256());
1978}
1979
1980static __inline__ __m256i __DEFAULT_FN_ATTRS256
1981_mm256_srlv_epi16(__m256i __A, __m256i __B)
1982{
1983 return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B);
1984}
1985
1986static __inline__ __m256i __DEFAULT_FN_ATTRS256
1987_mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
1988{
1989 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1990 (__v16hi)_mm256_srlv_epi16(__A, __B),
1991 (__v16hi)__W);
1992}
1993
1994static __inline__ __m256i __DEFAULT_FN_ATTRS256
1995_mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
1996{
1997 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1998 (__v16hi)_mm256_srlv_epi16(__A, __B),
1999 (__v16hi)_mm256_setzero_si256());
2000}
2001
2002static __inline__ __m128i __DEFAULT_FN_ATTRS128
2003_mm_srlv_epi16(__m128i __A, __m128i __B)
2004{
2005 return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B);
2006}
2007
2008static __inline__ __m128i __DEFAULT_FN_ATTRS128
2009_mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
2010{
2011 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2012 (__v8hi)_mm_srlv_epi16(__A, __B),
2013 (__v8hi)__W);
2014}
2015
2016static __inline__ __m128i __DEFAULT_FN_ATTRS128
2017_mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
2018{
2019 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2020 (__v8hi)_mm_srlv_epi16(__A, __B),
2021 (__v8hi)_mm_setzero_si128());
2022}
2023
2024static __inline__ __m256i __DEFAULT_FN_ATTRS256
2025_mm256_srav_epi16(__m256i __A, __m256i __B)
2026{
2027 return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B);
2028}
2029
2030static __inline__ __m256i __DEFAULT_FN_ATTRS256
2031_mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
2032{
2033 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2034 (__v16hi)_mm256_srav_epi16(__A, __B),
2035 (__v16hi)__W);
2036}
2037
2038static __inline__ __m256i __DEFAULT_FN_ATTRS256
2039_mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B)
2040{
2041 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2042 (__v16hi)_mm256_srav_epi16(__A, __B),
2043 (__v16hi)_mm256_setzero_si256());
2044}
2045
2046static __inline__ __m128i __DEFAULT_FN_ATTRS128
2047_mm_srav_epi16(__m128i __A, __m128i __B)
2048{
2049 return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B);
2050}
2051
2052static __inline__ __m128i __DEFAULT_FN_ATTRS128
2053_mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
2054{
2055 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2056 (__v8hi)_mm_srav_epi16(__A, __B),
2057 (__v8hi)__W);
2058}
2059
2060static __inline__ __m128i __DEFAULT_FN_ATTRS128
2061_mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B)
2062{
2063 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2064 (__v8hi)_mm_srav_epi16(__A, __B),
2065 (__v8hi)_mm_setzero_si128());
2066}
2067
2068static __inline__ __m128i __DEFAULT_FN_ATTRS128
2069_mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
2070{
2071 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2072 (__v8hi)_mm_sra_epi16(__A, __B),
2073 (__v8hi)__W);
2074}
2075
2076static __inline__ __m128i __DEFAULT_FN_ATTRS128
2077_mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B)
2078{
2079 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2080 (__v8hi)_mm_sra_epi16(__A, __B),
2081 (__v8hi)_mm_setzero_si128());
2082}
2083
2084static __inline__ __m256i __DEFAULT_FN_ATTRS256
2085_mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
2086{
2087 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2088 (__v16hi)_mm256_sra_epi16(__A, __B),
2089 (__v16hi)__W);
2090}
2091
2092static __inline__ __m256i __DEFAULT_FN_ATTRS256
2093_mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B)
2094{
2095 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2096 (__v16hi)_mm256_sra_epi16(__A, __B),
2097 (__v16hi)_mm256_setzero_si256());
2098}
2099
2100static __inline__ __m128i __DEFAULT_FN_ATTRS128
2101_mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
2102{
2103 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2104 (__v8hi)_mm_srai_epi16(__A, (int)__B),
2105 (__v8hi)__W);
2106}
2107
2108static __inline__ __m128i __DEFAULT_FN_ATTRS128
2109_mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, unsigned int __B)
2110{
2111 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2112 (__v8hi)_mm_srai_epi16(__A, (int)__B),
2113 (__v8hi)_mm_setzero_si128());
2114}
2115
2116static __inline__ __m256i __DEFAULT_FN_ATTRS256
2117_mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A,
2118 unsigned int __B)
2119{
2120 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2121 (__v16hi)_mm256_srai_epi16(__A, (int)__B),
2122 (__v16hi)__W);
2123}
2124
2125static __inline__ __m256i __DEFAULT_FN_ATTRS256
2126_mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
2127{
2128 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2129 (__v16hi)_mm256_srai_epi16(__A, (int)__B),
2130 (__v16hi)_mm256_setzero_si256());
2131}
2132
2133static __inline__ __m128i __DEFAULT_FN_ATTRS128
2134_mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
2135{
2136 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2137 (__v8hi)_mm_srl_epi16(__A, __B),
2138 (__v8hi)__W);
2139}
2140
2141static __inline__ __m128i __DEFAULT_FN_ATTRS128
2142_mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
2143{
2144 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2145 (__v8hi)_mm_srl_epi16(__A, __B),
2146 (__v8hi)_mm_setzero_si128());
2147}
2148
2149static __inline__ __m256i __DEFAULT_FN_ATTRS256
2150_mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
2151{
2152 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2153 (__v16hi)_mm256_srl_epi16(__A, __B),
2154 (__v16hi)__W);
2155}
2156
2157static __inline__ __m256i __DEFAULT_FN_ATTRS256
2158_mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B)
2159{
2160 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2161 (__v16hi)_mm256_srl_epi16(__A, __B),
2162 (__v16hi)_mm256_setzero_si256());
2163}
2164
2165static __inline__ __m128i __DEFAULT_FN_ATTRS128
2166_mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
2167{
2168 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2169 (__v8hi)_mm_srli_epi16(__A, __B),
2170 (__v8hi)__W);
2171}
2172
2173static __inline__ __m128i __DEFAULT_FN_ATTRS128
2174_mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, int __B)
2175{
2176 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2177 (__v8hi)_mm_srli_epi16(__A, __B),
2178 (__v8hi)_mm_setzero_si128());
2179}
2180
2181static __inline__ __m256i __DEFAULT_FN_ATTRS256
2182_mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
2183{
2184 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2185 (__v16hi)_mm256_srli_epi16(__A, __B),
2186 (__v16hi)__W);
2187}
2188
2189static __inline__ __m256i __DEFAULT_FN_ATTRS256
2190_mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B)
2191{
2192 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2193 (__v16hi)_mm256_srli_epi16(__A, __B),
2194 (__v16hi)_mm256_setzero_si256());
2195}
2196
2197static __inline__ __m128i __DEFAULT_FN_ATTRS128
2198_mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
2199{
2200 return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
2201 (__v8hi) __A,
2202 (__v8hi) __W);
2203}
2204
2205static __inline__ __m128i __DEFAULT_FN_ATTRS128
2207{
2208 return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
2209 (__v8hi) __A,
2210 (__v8hi) _mm_setzero_si128 ());
2211}
2212
2213static __inline__ __m256i __DEFAULT_FN_ATTRS256
2214_mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
2215{
2216 return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
2217 (__v16hi) __A,
2218 (__v16hi) __W);
2219}
2220
2221static __inline__ __m256i __DEFAULT_FN_ATTRS256
2223{
2224 return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
2225 (__v16hi) __A,
2226 (__v16hi) _mm256_setzero_si256 ());
2227}
2228
2229static __inline__ __m128i __DEFAULT_FN_ATTRS128
2230_mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
2231{
2232 return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
2233 (__v16qi) __A,
2234 (__v16qi) __W);
2235}
2236
2237static __inline__ __m128i __DEFAULT_FN_ATTRS128
2239{
2240 return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
2241 (__v16qi) __A,
2242 (__v16qi) _mm_setzero_si128 ());
2243}
2244
2245static __inline__ __m256i __DEFAULT_FN_ATTRS256
2246_mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
2247{
2248 return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
2249 (__v32qi) __A,
2250 (__v32qi) __W);
2251}
2252
2253static __inline__ __m256i __DEFAULT_FN_ATTRS256
2255{
2256 return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
2257 (__v32qi) __A,
2258 (__v32qi) _mm256_setzero_si256 ());
2259}
2260
2261
2262static __inline__ __m128i __DEFAULT_FN_ATTRS128
2263_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
2264{
2265 return (__m128i) __builtin_ia32_selectb_128(__M,
2266 (__v16qi) _mm_set1_epi8(__A),
2267 (__v16qi) __O);
2268}
2269
2270static __inline__ __m128i __DEFAULT_FN_ATTRS128
2272{
2273 return (__m128i) __builtin_ia32_selectb_128(__M,
2274 (__v16qi) _mm_set1_epi8(__A),
2275 (__v16qi) _mm_setzero_si128());
2276}
2277
2278static __inline__ __m256i __DEFAULT_FN_ATTRS256
2279_mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A)
2280{
2281 return (__m256i) __builtin_ia32_selectb_256(__M,
2282 (__v32qi) _mm256_set1_epi8(__A),
2283 (__v32qi) __O);
2284}
2285
2286static __inline__ __m256i __DEFAULT_FN_ATTRS256
2288{
2289 return (__m256i) __builtin_ia32_selectb_256(__M,
2290 (__v32qi) _mm256_set1_epi8(__A),
2291 (__v32qi) _mm256_setzero_si256());
2292}
2293
2294static __inline __m128i __DEFAULT_FN_ATTRS128
2296{
2297 struct __loadu_epi16 {
2298 __m128i_u __v;
2299 } __attribute__((__packed__, __may_alias__));
2300 return ((const struct __loadu_epi16*)__P)->__v;
2301}
2302
2303static __inline__ __m128i __DEFAULT_FN_ATTRS128
2304_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
2305{
2306 return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P,
2307 (__v8hi) __W,
2308 (__mmask8) __U);
2309}
2310
2311static __inline__ __m128i __DEFAULT_FN_ATTRS128
2313{
2314 return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P,
2315 (__v8hi)
2317 (__mmask8) __U);
2318}
2319
2320static __inline __m256i __DEFAULT_FN_ATTRS256
2322{
2323 struct __loadu_epi16 {
2324 __m256i_u __v;
2325 } __attribute__((__packed__, __may_alias__));
2326 return ((const struct __loadu_epi16*)__P)->__v;
2327}
2328
2329static __inline__ __m256i __DEFAULT_FN_ATTRS256
2330_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
2331{
2332 return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P,
2333 (__v16hi) __W,
2334 (__mmask16) __U);
2335}
2336
2337static __inline__ __m256i __DEFAULT_FN_ATTRS256
2339{
2340 return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P,
2341 (__v16hi)
2343 (__mmask16) __U);
2344}
2345
2346static __inline __m128i __DEFAULT_FN_ATTRS128
2347_mm_loadu_epi8 (void const *__P)
2348{
2349 struct __loadu_epi8 {
2350 __m128i_u __v;
2351 } __attribute__((__packed__, __may_alias__));
2352 return ((const struct __loadu_epi8*)__P)->__v;
2353}
2354
2355static __inline__ __m128i __DEFAULT_FN_ATTRS128
2356_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
2357{
2358 return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P,
2359 (__v16qi) __W,
2360 (__mmask16) __U);
2361}
2362
2363static __inline__ __m128i __DEFAULT_FN_ATTRS128
2365{
2366 return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P,
2367 (__v16qi)
2369 (__mmask16) __U);
2370}
2371
2372static __inline __m256i __DEFAULT_FN_ATTRS256
2374{
2375 struct __loadu_epi8 {
2376 __m256i_u __v;
2377 } __attribute__((__packed__, __may_alias__));
2378 return ((const struct __loadu_epi8*)__P)->__v;
2379}
2380
2381static __inline__ __m256i __DEFAULT_FN_ATTRS256
2382_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
2383{
2384 return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P,
2385 (__v32qi) __W,
2386 (__mmask32) __U);
2387}
2388
2389static __inline__ __m256i __DEFAULT_FN_ATTRS256
2391{
2392 return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P,
2393 (__v32qi)
2395 (__mmask32) __U);
2396}
2397
2398static __inline void __DEFAULT_FN_ATTRS128
2399_mm_storeu_epi16 (void *__P, __m128i __A)
2400{
2401 struct __storeu_epi16 {
2402 __m128i_u __v;
2403 } __attribute__((__packed__, __may_alias__));
2404 ((struct __storeu_epi16*)__P)->__v = __A;
2405}
2406
2407static __inline__ void __DEFAULT_FN_ATTRS128
2408_mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
2409{
2410 __builtin_ia32_storedquhi128_mask ((__v8hi *) __P,
2411 (__v8hi) __A,
2412 (__mmask8) __U);
2413}
2414
2415static __inline void __DEFAULT_FN_ATTRS256
2416_mm256_storeu_epi16 (void *__P, __m256i __A)
2417{
2418 struct __storeu_epi16 {
2419 __m256i_u __v;
2420 } __attribute__((__packed__, __may_alias__));
2421 ((struct __storeu_epi16*)__P)->__v = __A;
2422}
2423
2424static __inline__ void __DEFAULT_FN_ATTRS256
2425_mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
2426{
2427 __builtin_ia32_storedquhi256_mask ((__v16hi *) __P,
2428 (__v16hi) __A,
2429 (__mmask16) __U);
2430}
2431
2432static __inline void __DEFAULT_FN_ATTRS128
2433_mm_storeu_epi8 (void *__P, __m128i __A)
2434{
2435 struct __storeu_epi8 {
2436 __m128i_u __v;
2437 } __attribute__((__packed__, __may_alias__));
2438 ((struct __storeu_epi8*)__P)->__v = __A;
2439}
2440
2441static __inline__ void __DEFAULT_FN_ATTRS128
2442_mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
2443{
2444 __builtin_ia32_storedquqi128_mask ((__v16qi *) __P,
2445 (__v16qi) __A,
2446 (__mmask16) __U);
2447}
2448
2449static __inline void __DEFAULT_FN_ATTRS256
2450_mm256_storeu_epi8 (void *__P, __m256i __A)
2451{
2452 struct __storeu_epi8 {
2453 __m256i_u __v;
2454 } __attribute__((__packed__, __may_alias__));
2455 ((struct __storeu_epi8*)__P)->__v = __A;
2456}
2457
2458static __inline__ void __DEFAULT_FN_ATTRS256
2459_mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
2460{
2461 __builtin_ia32_storedquqi256_mask ((__v32qi *) __P,
2462 (__v32qi) __A,
2463 (__mmask32) __U);
2464}
2465
2466static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2467_mm_test_epi8_mask (__m128i __A, __m128i __B)
2468{
2470}
2471
2472static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2473_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
2474{
2475 return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B),
2477}
2478
2479static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2480_mm256_test_epi8_mask (__m256i __A, __m256i __B)
2481{
2482 return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B),
2484}
2485
2486static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2487_mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
2488{
2489 return _mm256_mask_cmpneq_epi8_mask (__U, _mm256_and_si256(__A, __B),
2491}
2492
2493static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2494_mm_test_epi16_mask (__m128i __A, __m128i __B)
2495{
2497}
2498
2499static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2500_mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
2501{
2502 return _mm_mask_cmpneq_epi16_mask (__U, _mm_and_si128 (__A, __B),
2504}
2505
2506static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2507_mm256_test_epi16_mask (__m256i __A, __m256i __B)
2508{
2509 return _mm256_cmpneq_epi16_mask (_mm256_and_si256 (__A, __B),
2511}
2512
2513static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2514_mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
2515{
2516 return _mm256_mask_cmpneq_epi16_mask (__U, _mm256_and_si256(__A, __B),
2518}
2519
2520static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2521_mm_testn_epi8_mask (__m128i __A, __m128i __B)
2522{
2524}
2525
2526static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2527_mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
2528{
2529 return _mm_mask_cmpeq_epi8_mask (__U, _mm_and_si128 (__A, __B),
2531}
2532
2533static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2534_mm256_testn_epi8_mask (__m256i __A, __m256i __B)
2535{
2536 return _mm256_cmpeq_epi8_mask (_mm256_and_si256 (__A, __B),
2538}
2539
2540static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2541_mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
2542{
2543 return _mm256_mask_cmpeq_epi8_mask (__U, _mm256_and_si256 (__A, __B),
2545}
2546
2547static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2548_mm_testn_epi16_mask (__m128i __A, __m128i __B)
2549{
2551}
2552
2553static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2554_mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
2555{
2557}
2558
2559static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2560_mm256_testn_epi16_mask (__m256i __A, __m256i __B)
2561{
2562 return _mm256_cmpeq_epi16_mask (_mm256_and_si256(__A, __B),
2564}
2565
2566static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2567_mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
2568{
2569 return _mm256_mask_cmpeq_epi16_mask (__U, _mm256_and_si256 (__A, __B),
2571}
2572
2573static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2574_mm_movepi8_mask (__m128i __A)
2575{
2576 return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
2577}
2578
2579static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2581{
2582 return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
2583}
2584
2585static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2587{
2588 return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
2589}
2590
2591static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2593{
2594 return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
2595}
2596
2597static __inline__ __m128i __DEFAULT_FN_ATTRS128
2599{
2600 return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
2601}
2602
2603static __inline__ __m256i __DEFAULT_FN_ATTRS256
2605{
2606 return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
2607}
2608
2609static __inline__ __m128i __DEFAULT_FN_ATTRS128
2611{
2612 return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
2613}
2614
2615static __inline__ __m256i __DEFAULT_FN_ATTRS256
2617{
2618 return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
2619}
2620
2621static __inline__ __m128i __DEFAULT_FN_ATTRS128
2622_mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A)
2623{
2624 return (__m128i)__builtin_ia32_selectb_128(__M,
2625 (__v16qi) _mm_broadcastb_epi8(__A),
2626 (__v16qi) __O);
2627}
2628
2629static __inline__ __m128i __DEFAULT_FN_ATTRS128
2631{
2632 return (__m128i)__builtin_ia32_selectb_128(__M,
2633 (__v16qi) _mm_broadcastb_epi8(__A),
2634 (__v16qi) _mm_setzero_si128());
2635}
2636
2637static __inline__ __m256i __DEFAULT_FN_ATTRS256
2638_mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A)
2639{
2640 return (__m256i)__builtin_ia32_selectb_256(__M,
2641 (__v32qi) _mm256_broadcastb_epi8(__A),
2642 (__v32qi) __O);
2643}
2644
2645static __inline__ __m256i __DEFAULT_FN_ATTRS256
2647{
2648 return (__m256i)__builtin_ia32_selectb_256(__M,
2649 (__v32qi) _mm256_broadcastb_epi8(__A),
2650 (__v32qi) _mm256_setzero_si256());
2651}
2652
2653static __inline__ __m128i __DEFAULT_FN_ATTRS128
2654_mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2655{
2656 return (__m128i)__builtin_ia32_selectw_128(__M,
2657 (__v8hi) _mm_broadcastw_epi16(__A),
2658 (__v8hi) __O);
2659}
2660
2661static __inline__ __m128i __DEFAULT_FN_ATTRS128
2663{
2664 return (__m128i)__builtin_ia32_selectw_128(__M,
2665 (__v8hi) _mm_broadcastw_epi16(__A),
2666 (__v8hi) _mm_setzero_si128());
2667}
2668
2669static __inline__ __m256i __DEFAULT_FN_ATTRS256
2670_mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A)
2671{
2672 return (__m256i)__builtin_ia32_selectw_256(__M,
2673 (__v16hi) _mm256_broadcastw_epi16(__A),
2674 (__v16hi) __O);
2675}
2676
2677static __inline__ __m256i __DEFAULT_FN_ATTRS256
2679{
2680 return (__m256i)__builtin_ia32_selectw_256(__M,
2681 (__v16hi) _mm256_broadcastw_epi16(__A),
2682 (__v16hi) _mm256_setzero_si256());
2683}
2684
2685static __inline__ __m256i __DEFAULT_FN_ATTRS256
2686_mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A)
2687{
2688 return (__m256i) __builtin_ia32_selectw_256 (__M,
2689 (__v16hi) _mm256_set1_epi16(__A),
2690 (__v16hi) __O);
2691}
2692
2693static __inline__ __m256i __DEFAULT_FN_ATTRS256
2695{
2696 return (__m256i) __builtin_ia32_selectw_256(__M,
2697 (__v16hi)_mm256_set1_epi16(__A),
2698 (__v16hi) _mm256_setzero_si256());
2699}
2700
2701static __inline__ __m128i __DEFAULT_FN_ATTRS128
2702_mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A)
2703{
2704 return (__m128i) __builtin_ia32_selectw_128(__M,
2705 (__v8hi) _mm_set1_epi16(__A),
2706 (__v8hi) __O);
2707}
2708
2709static __inline__ __m128i __DEFAULT_FN_ATTRS128
2711{
2712 return (__m128i) __builtin_ia32_selectw_128(__M,
2713 (__v8hi) _mm_set1_epi16(__A),
2714 (__v8hi) _mm_setzero_si128());
2715}
2716
2717static __inline__ __m128i __DEFAULT_FN_ATTRS128
2718_mm_permutexvar_epi16 (__m128i __A, __m128i __B)
2719{
2720 return (__m128i)__builtin_ia32_permvarhi128((__v8hi) __B, (__v8hi) __A);
2721}
2722
2723static __inline__ __m128i __DEFAULT_FN_ATTRS128
2724_mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
2725{
2726 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
2727 (__v8hi)_mm_permutexvar_epi16(__A, __B),
2728 (__v8hi) _mm_setzero_si128());
2729}
2730
2731static __inline__ __m128i __DEFAULT_FN_ATTRS128
2732_mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
2733 __m128i __B)
2734{
2735 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
2736 (__v8hi)_mm_permutexvar_epi16(__A, __B),
2737 (__v8hi)__W);
2738}
2739
2740static __inline__ __m256i __DEFAULT_FN_ATTRS256
2741_mm256_permutexvar_epi16 (__m256i __A, __m256i __B)
2742{
2743 return (__m256i)__builtin_ia32_permvarhi256((__v16hi) __B, (__v16hi) __A);
2744}
2745
2746static __inline__ __m256i __DEFAULT_FN_ATTRS256
2748 __m256i __B)
2749{
2750 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
2751 (__v16hi)_mm256_permutexvar_epi16(__A, __B),
2752 (__v16hi)_mm256_setzero_si256());
2753}
2754
2755static __inline__ __m256i __DEFAULT_FN_ATTRS256
2756_mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
2757 __m256i __B)
2758{
2759 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
2760 (__v16hi)_mm256_permutexvar_epi16(__A, __B),
2761 (__v16hi)__W);
2762}
2763
2764#define _mm_mask_alignr_epi8(W, U, A, B, N) \
2765 ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
2766 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
2767 (__v16qi)(__m128i)(W)))
2768
2769#define _mm_maskz_alignr_epi8(U, A, B, N) \
2770 ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
2771 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
2772 (__v16qi)_mm_setzero_si128()))
2773
2774#define _mm256_mask_alignr_epi8(W, U, A, B, N) \
2775 ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
2776 (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
2777 (__v32qi)(__m256i)(W)))
2778
2779#define _mm256_maskz_alignr_epi8(U, A, B, N) \
2780 ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
2781 (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
2782 (__v32qi)_mm256_setzero_si256()))
2783
2784#define _mm_dbsad_epu8(A, B, imm) \
2785 ((__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \
2786 (__v16qi)(__m128i)(B), (int)(imm)))
2787
2788#define _mm_mask_dbsad_epu8(W, U, A, B, imm) \
2789 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2790 (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
2791 (__v8hi)(__m128i)(W)))
2792
2793#define _mm_maskz_dbsad_epu8(U, A, B, imm) \
2794 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2795 (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
2796 (__v8hi)_mm_setzero_si128()))
2797
2798#define _mm256_dbsad_epu8(A, B, imm) \
2799 ((__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \
2800 (__v32qi)(__m256i)(B), (int)(imm)))
2801
2802#define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \
2803 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2804 (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
2805 (__v16hi)(__m256i)(W)))
2806
2807#define _mm256_maskz_dbsad_epu8(U, A, B, imm) \
2808 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2809 (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
2810 (__v16hi)_mm256_setzero_si256()))
2811
2812static __inline__ short __DEFAULT_FN_ATTRS128
2814 return __builtin_reduce_add((__v8hi)__W);
2815}
2816
2817static __inline__ short __DEFAULT_FN_ATTRS128
2819 return __builtin_reduce_mul((__v8hi)__W);
2820}
2821
2822static __inline__ short __DEFAULT_FN_ATTRS128
2824 return __builtin_reduce_and((__v8hi)__W);
2825}
2826
2827static __inline__ short __DEFAULT_FN_ATTRS128
2829 return __builtin_reduce_or((__v8hi)__W);
2830}
2831
2832static __inline__ short __DEFAULT_FN_ATTRS128
2834 __W = _mm_maskz_mov_epi16(__M, __W);
2835 return __builtin_reduce_add((__v8hi)__W);
2836}
2837
2838static __inline__ short __DEFAULT_FN_ATTRS128
2840 __W = _mm_mask_mov_epi16(_mm_set1_epi16(1), __M, __W);
2841 return __builtin_reduce_mul((__v8hi)__W);
2842}
2843
2844static __inline__ short __DEFAULT_FN_ATTRS128
2846 __W = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __W);
2847 return __builtin_reduce_and((__v8hi)__W);
2848}
2849
2850static __inline__ short __DEFAULT_FN_ATTRS128
2852 __W = _mm_maskz_mov_epi16(__M, __W);
2853 return __builtin_reduce_or((__v8hi)__W);
2854}
2855
2856static __inline__ short __DEFAULT_FN_ATTRS128
2858 return __builtin_reduce_max((__v8hi)__V);
2859}
2860
2861static __inline__ unsigned short __DEFAULT_FN_ATTRS128
2863 return __builtin_reduce_max((__v8hu)__V);
2864}
2865
2866static __inline__ short __DEFAULT_FN_ATTRS128
2868 return __builtin_reduce_min((__v8hi)__V);
2869}
2870
2871static __inline__ unsigned short __DEFAULT_FN_ATTRS128
2873 return __builtin_reduce_min((__v8hu)__V);
2874}
2875
2876static __inline__ short __DEFAULT_FN_ATTRS128
2878 __V = _mm_mask_mov_epi16(_mm_set1_epi16(-32767-1), __M, __V);
2879 return __builtin_reduce_max((__v8hi)__V);
2880}
2881
2882static __inline__ unsigned short __DEFAULT_FN_ATTRS128
2884 __V = _mm_maskz_mov_epi16(__M, __V);
2885 return __builtin_reduce_max((__v8hu)__V);
2886}
2887
2888static __inline__ short __DEFAULT_FN_ATTRS128
2890 __V = _mm_mask_mov_epi16(_mm_set1_epi16(32767), __M, __V);
2891 return __builtin_reduce_min((__v8hi)__V);
2892}
2893
2894static __inline__ unsigned short __DEFAULT_FN_ATTRS128
2896 __V = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __V);
2897 return __builtin_reduce_min((__v8hu)__V);
2898}
2899
2900static __inline__ short __DEFAULT_FN_ATTRS256
2902 return __builtin_reduce_add((__v16hi)__W);
2903}
2904
2905static __inline__ short __DEFAULT_FN_ATTRS256
2907 return __builtin_reduce_mul((__v16hi)__W);
2908}
2909
2910static __inline__ short __DEFAULT_FN_ATTRS256
2912 return __builtin_reduce_and((__v16hi)__W);
2913}
2914
2915static __inline__ short __DEFAULT_FN_ATTRS256
2917 return __builtin_reduce_or((__v16hi)__W);
2918}
2919
2920static __inline__ short __DEFAULT_FN_ATTRS256
2922 __W = _mm256_maskz_mov_epi16(__M, __W);
2923 return __builtin_reduce_add((__v16hi)__W);
2924}
2925
2926static __inline__ short __DEFAULT_FN_ATTRS256
2928 __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(1), __M, __W);
2929 return __builtin_reduce_mul((__v16hi)__W);
2930}
2931
2932static __inline__ short __DEFAULT_FN_ATTRS256
2934 __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __W);
2935 return __builtin_reduce_and((__v16hi)__W);
2936}
2937
2938static __inline__ short __DEFAULT_FN_ATTRS256
2940 __W = _mm256_maskz_mov_epi16(__M, __W);
2941 return __builtin_reduce_or((__v16hi)__W);
2942}
2943
2944static __inline__ short __DEFAULT_FN_ATTRS256
2946 return __builtin_reduce_max((__v16hi)__V);
2947}
2948
2949static __inline__ unsigned short __DEFAULT_FN_ATTRS256
2951 return __builtin_reduce_max((__v16hu)__V);
2952}
2953
2954static __inline__ short __DEFAULT_FN_ATTRS256
2956 return __builtin_reduce_min((__v16hi)__V);
2957}
2958
2959static __inline__ unsigned short __DEFAULT_FN_ATTRS256
2961 return __builtin_reduce_min((__v16hu)__V);
2962}
2963
2964static __inline__ short __DEFAULT_FN_ATTRS256
2966 __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-32767-1), __M, __V);
2967 return __builtin_reduce_max((__v16hi)__V);
2968}
2969
2970static __inline__ unsigned short __DEFAULT_FN_ATTRS256
2972 __V = _mm256_maskz_mov_epi16(__M, __V);
2973 return __builtin_reduce_max((__v16hu)__V);
2974}
2975
2976static __inline__ short __DEFAULT_FN_ATTRS256
2978 __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(32767), __M, __V);
2979 return __builtin_reduce_min((__v16hi)__V);
2980}
2981
2982static __inline__ unsigned short __DEFAULT_FN_ATTRS256
2984 __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __V);
2985 return __builtin_reduce_min((__v16hu)__V);
2986}
2987
2988static __inline__ signed char __DEFAULT_FN_ATTRS128
2990 return __builtin_reduce_add((__v16qs)__W);
2991}
2992
2993static __inline__ signed char __DEFAULT_FN_ATTRS128
2995 return __builtin_reduce_mul((__v16qs)__W);
2996}
2997
2998static __inline__ signed char __DEFAULT_FN_ATTRS128
3000 return __builtin_reduce_and((__v16qs)__W);
3001}
3002
3003static __inline__ signed char __DEFAULT_FN_ATTRS128
3005 return __builtin_reduce_or((__v16qs)__W);
3006}
3007
3008static __inline__ signed char __DEFAULT_FN_ATTRS128
3010 __W = _mm_maskz_mov_epi8(__M, __W);
3011 return __builtin_reduce_add((__v16qs)__W);
3012}
3013
3014static __inline__ signed char __DEFAULT_FN_ATTRS128
3016 __W = _mm_mask_mov_epi8(_mm_set1_epi8(1), __M, __W);
3017 return __builtin_reduce_mul((__v16qs)__W);
3018}
3019
3020static __inline__ signed char __DEFAULT_FN_ATTRS128
3022 __W = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __W);
3023 return __builtin_reduce_and((__v16qs)__W);
3024}
3025
3026static __inline__ signed char __DEFAULT_FN_ATTRS128
3028 __W = _mm_maskz_mov_epi8(__M, __W);
3029 return __builtin_reduce_or((__v16qs)__W);
3030}
3031
3032static __inline__ signed char __DEFAULT_FN_ATTRS128
3034 return __builtin_reduce_max((__v16qs)__V);
3035}
3036
3037static __inline__ unsigned char __DEFAULT_FN_ATTRS128
3039 return __builtin_reduce_max((__v16qu)__V);
3040}
3041
3042static __inline__ signed char __DEFAULT_FN_ATTRS128
3044 return __builtin_reduce_min((__v16qs)__V);
3045}
3046
3047static __inline__ unsigned char __DEFAULT_FN_ATTRS128
3049 return __builtin_reduce_min((__v16qu)__V);
3050}
3051
3052static __inline__ signed char __DEFAULT_FN_ATTRS128
3054 __V = _mm_mask_mov_epi8(_mm_set1_epi8(-127-1), __M, __V);
3055 return __builtin_reduce_max((__v16qs)__V);
3056}
3057
3058static __inline__ unsigned char __DEFAULT_FN_ATTRS128
3060 __V = _mm_maskz_mov_epi8(__M, __V);
3061 return __builtin_reduce_max((__v16qu)__V);
3062}
3063
3064static __inline__ signed char __DEFAULT_FN_ATTRS128
3066 __V = _mm_mask_mov_epi8(_mm_set1_epi8(127), __M, __V);
3067 return __builtin_reduce_min((__v16qs)__V);
3068}
3069
3070static __inline__ unsigned char __DEFAULT_FN_ATTRS128
3072 __V = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __V);
3073 return __builtin_reduce_min((__v16qu)__V);
3074}
3075
3076static __inline__ signed char __DEFAULT_FN_ATTRS256
3078 return __builtin_reduce_add((__v32qs)__W);
3079}
3080
3081static __inline__ signed char __DEFAULT_FN_ATTRS256
3083 return __builtin_reduce_mul((__v32qs)__W);
3084}
3085
3086static __inline__ signed char __DEFAULT_FN_ATTRS256
3088 return __builtin_reduce_and((__v32qs)__W);
3089}
3090
3091static __inline__ signed char __DEFAULT_FN_ATTRS256
3093 return __builtin_reduce_or((__v32qs)__W);
3094}
3095
3096static __inline__ signed char __DEFAULT_FN_ATTRS256
3098 __W = _mm256_maskz_mov_epi8(__M, __W);
3099 return __builtin_reduce_add((__v32qs)__W);
3100}
3101
3102static __inline__ signed char __DEFAULT_FN_ATTRS256
3104 __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(1), __M, __W);
3105 return __builtin_reduce_mul((__v32qs)__W);
3106}
3107
3108static __inline__ signed char __DEFAULT_FN_ATTRS256
3110 __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __W);
3111 return __builtin_reduce_and((__v32qs)__W);
3112}
3113
3114static __inline__ signed char __DEFAULT_FN_ATTRS256
3116 __W = _mm256_maskz_mov_epi8(__M, __W);
3117 return __builtin_reduce_or((__v32qs)__W);
3118}
3119
3120static __inline__ signed char __DEFAULT_FN_ATTRS256
3122 return __builtin_reduce_max((__v32qs)__V);
3123}
3124
3125static __inline__ unsigned char __DEFAULT_FN_ATTRS256
3127 return __builtin_reduce_max((__v32qu)__V);
3128}
3129
3130static __inline__ signed char __DEFAULT_FN_ATTRS256
3132 return __builtin_reduce_min((__v32qs)__V);
3133}
3134
3135static __inline__ unsigned char __DEFAULT_FN_ATTRS256
3137 return __builtin_reduce_min((__v32qu)__V);
3138}
3139
3140static __inline__ signed char __DEFAULT_FN_ATTRS256
3142 __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-127-1), __M, __V);
3143 return __builtin_reduce_max((__v32qs)__V);
3144}
3145
3146static __inline__ unsigned char __DEFAULT_FN_ATTRS256
3148 __V = _mm256_maskz_mov_epi8(__M, __V);
3149 return __builtin_reduce_max((__v32qu)__V);
3150}
3151
3152static __inline__ signed char __DEFAULT_FN_ATTRS256
3154 __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(127), __M, __V);
3155 return __builtin_reduce_min((__v32qs)__V);
3156}
3157
3158static __inline__ unsigned char __DEFAULT_FN_ATTRS256
3160 __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __V);
3161 return __builtin_reduce_min((__v32qu)__V);
3162}
3163
3164#undef __DEFAULT_FN_ATTRS128
3165#undef __DEFAULT_FN_ATTRS256
3166
3167#endif /* __AVX512VLBWINTRIN_H */
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi8(__m256i __a)
Computes the absolute value of each signed byte in the 256-bit integer vector __a and returns each va...
Definition: avx2intrin.h:99
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi16(__m256i __a, __m128i __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a right by the number of bits giv...
Definition: avx2intrin.h:2394
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_si256(__m256i __a, __m256i __b)
Computes the bitwise AND of the 256-bit integer vectors in __a and __b.
Definition: avx2intrin.h:455
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu16(__m256i __a, __m256i __b)
Compares the corresponding unsigned 16-bit integers in the two 256-bit vectors of [16 x i16] in __a a...
Definition: avx2intrin.h:1176
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu16(__m256i __a, __m256i __b)
Compares the corresponding unsigned 16-bit integers in the two 256-bit vectors of [16 x i16] in __a a...
Definition: avx2intrin.h:1290
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packs_epi32(__m256i __a, __m256i __b)
Converts the elements of two 256-bit vectors of [8 x i32] to 16-bit integers using signed saturation,...
Definition: avx2intrin.h:196
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi16(__m128i __V)
Sign-extends bytes from the 128-bit integer vector in __V and returns the 16-bit values in the corres...
Definition: avx2intrin.h:1358
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shuffle_epi8(__m256i __a, __m256i __b)
Shuffles 8-bit integers in the 256-bit integer vector __a according to control information in the 256...
Definition: avx2intrin.h:1901
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastb_epi8(__m128i __X)
Broadcasts the low byte from the 128-bit integer vector in __X to all bytes of the 128-bit result.
Definition: avx2intrin.h:3227
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi16(__m128i __V)
Zero-extends bytes from the 128-bit integer vector in __V and returns the 16-bit values in the corres...
Definition: avx2intrin.h:1517
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epu8(__m256i __a, __m256i __b)
Subtracts 8-bit integers from corresponding bytes of two 256-bit integer vectors using unsigned satur...
Definition: avx2intrin.h:2659
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi8(__m256i __a, __m256i __b)
Compares the corresponding signed bytes in the two 256-bit integer vectors in __a and __b and returns...
Definition: avx2intrin.h:1214
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi16(__m256i __a, __m256i __b)
Subtracts 16-bit integers from corresponding elements of two 256-bit vectors of [16 x i16].
Definition: avx2intrin.h:2528
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi8(__m256i __a, __m256i __b)
Compares the corresponding signed bytes in the two 256-bit integer vectors in __a and __b and returns...
Definition: avx2intrin.h:1100
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maddubs_epi16(__m256i __a, __m256i __b)
Multiplies each unsigned byte from the 256-bit integer vector in __a with the corresponding signed by...
Definition: avx2intrin.h:1049
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi8(__m256i __a, __m256i __b)
Unpacks and interleaves 8-bit integers from parts of the 256-bit integer vectors in __a and __b to fo...
Definition: avx2intrin.h:2719
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epu16(__m256i __a, __m256i __b)
Adds 16-bit integers from corresponding elements of two 256-bit vectors of [16 x i16] using unsigned ...
Definition: avx2intrin.h:409
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhrs_epi16(__m256i __a, __m256i __b)
Multiplies signed 16-bit integer elements of two 256-bit vectors of [16 x i16], truncates the 32-bit ...
Definition: avx2intrin.h:1697
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi16(__m256i __a, __m128i __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a left by the number of bits spec...
Definition: avx2intrin.h:2150
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi8(__m256i __a, __m256i __b)
Adds 8-bit integers from corresponding bytes of two 256-bit integer vectors and returns the lower 8 b...
Definition: avx2intrin.h:278
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi16(__m256i __a, __m256i __b)
Adds 16-bit integers from corresponding elements of two 256-bit vectors of [16 x i16] and returns the...
Definition: avx2intrin.h:297
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epi8(__m256i __a, __m256i __b)
Subtracts 8-bit integers from corresponding bytes of two 256-bit integer vectors using signed saturat...
Definition: avx2intrin.h:2606
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu8(__m256i __a, __m256i __b)
Compares the corresponding unsigned bytes in the two 256-bit integer vectors in __a and __b and retur...
Definition: avx2intrin.h:1157
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi16(__m256i __a, __m256i __b)
Compares the corresponding signed 16-bit integers in the two 256-bit vectors of [16 x i16] in __a and...
Definition: avx2intrin.h:1233
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi16(__m256i __a, __m256i __b)
Compares the corresponding signed 16-bit integers in the two 256-bit vectors of [16 x i16] in __a and...
Definition: avx2intrin.h:1119
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastb_epi8(__m128i __X)
Broadcasts the low byte from the 128-bit integer vector in __X to all bytes of the 256-bit result.
Definition: avx2intrin.h:3163
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi16(__m256i __a, __m256i __b)
Unpacks and interleaves 16-bit integers from parts of the 256-bit vectors of [16 x i16] in __a and __...
Definition: avx2intrin.h:2754
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhi_epi16(__m256i __a, __m256i __b)
Multiplies signed 16-bit integer elements of two 256-bit vectors of [16 x i16], and returns the upper...
Definition: avx2intrin.h:1735
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packs_epi16(__m256i __a, __m256i __b)
Converts the elements of two 256-bit vectors of [16 x i16] to 8-bit integers using signed saturation,...
Definition: avx2intrin.h:164
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastw_epi16(__m128i __X)
Broadcasts the low element from the 128-bit vector of [8 x i16] in __X to all elements of the result'...
Definition: avx2intrin.h:3243
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi8(__m256i __a, __m256i __b)
Subtracts 8-bit integers from corresponding bytes of two 256-bit integer vectors.
Definition: avx2intrin.h:2501
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhi_epu16(__m256i __a, __m256i __b)
Multiplies unsigned 16-bit integer elements of two 256-bit vectors of [16 x i16], and returns the upp...
Definition: avx2intrin.h:1716
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi16(__m256i __a, int __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a left by __count bits,...
Definition: avx2intrin.h:2129
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_madd_epi16(__m256i __a, __m256i __b)
Multiplies corresponding 16-bit elements of two 256-bit vectors of [16 x i16], forming 32-bit interme...
Definition: avx2intrin.h:1081
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu16(__m256i __a, __m256i __b)
Computes the averages of the corresponding unsigned 16-bit integers in the two 256-bit vectors of [16...
Definition: avx2intrin.h:525
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu8(__m256i __a, __m256i __b)
Compares the corresponding unsigned bytes in the two 256-bit integer vectors in __a and __b and retur...
Definition: avx2intrin.h:1271
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packus_epi32(__m256i __V1, __m256i __V2)
Converts elements from two 256-bit vectors of [8 x i32] to 16-bit integers using unsigned saturation,...
Definition: avx2intrin.h:259
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi8(__m256i __a, __m256i __b)
Unpacks and interleaves 8-bit integers from parts of the 256-bit integer vectors in __a and __b to fo...
Definition: avx2intrin.h:2852
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epi16(__m256i __a, __m256i __b)
Subtracts 16-bit integers from corresponding elements of two 256-bit vectors of [16 x i16] using sign...
Definition: avx2intrin.h:2632
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastw_epi16(__m128i __X)
Broadcasts the low element from the 128-bit vector of [8 x i16] in __X to all elements of the result'...
Definition: avx2intrin.h:3179
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi16(__m256i __a, __m256i __b)
Multiplies signed 16-bit integer elements of two 256-bit vectors of [16 x i16], and returns the lower...
Definition: avx2intrin.h:1754
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi16(__m256i __a, int __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a right by __count bits,...
Definition: avx2intrin.h:2250
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi16(__m256i __a, __m256i __b)
Unpacks and interleaves 16-bit integers from parts of the 256-bit vectors of [16 x i16] in __a and __...
Definition: avx2intrin.h:2887
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epu8(__m256i __a, __m256i __b)
Adds 8-bit integers from corresponding bytes of two 256-bit integer vectors using unsigned saturation...
Definition: avx2intrin.h:391
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packus_epi16(__m256i __a, __m256i __b)
Converts elements from two 256-bit vectors of [16 x i16] to 8-bit integers using unsigned saturation,...
Definition: avx2intrin.h:227
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epi8(__m256i __a, __m256i __b)
Adds 8-bit integers from corresponding bytes of two 256-bit integer vectors using signed saturation,...
Definition: avx2intrin.h:354
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi16(__m256i __a, __m128i __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a right by the number of bits giv...
Definition: avx2intrin.h:2272
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi16(__m256i __a)
Computes the absolute value of each signed 16-bit element in the 256-bit vector of [16 x i16] in __a ...
Definition: avx2intrin.h:116
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epi16(__m256i __a, __m256i __b)
Adds 16-bit integers from corresponding elements of two 256-bit vectors of [16 x i16] using signed sa...
Definition: avx2intrin.h:372
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu8(__m256i __a, __m256i __b)
Computes the averages of the corresponding unsigned bytes in the two 256-bit integer vectors in __a a...
Definition: avx2intrin.h:499
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epu16(__m256i __a, __m256i __b)
Subtracts 16-bit integers from corresponding elements of two 256-bit vectors of [16 x i16] using unsi...
Definition: avx2intrin.h:2685
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi16(__m256i __a, int __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a right by __count bits,...
Definition: avx2intrin.h:2373
unsigned int __mmask32
unsigned char __mmask8
Definition: avx512fintrin.h:41
unsigned short __mmask16
Definition: avx512fintrin.h:42
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W)
#define _mm256_mask_cmpeq_epi16_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi16_epi8(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi16(void *__P, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi16(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastb_epi8(__m128i __O, __mmask16 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi16_epi8(__mmask8 __M, __m128i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_movepi8_mask(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi8(__m128i __W, __mmask16 __U, void const *__P)
static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi8(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi16(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastb_epi8(__m256i __O, __mmask32 __M, __m128i __A)
static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_add_epi16(__mmask16 __M, __m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi8(__mmask16 __U, __m128i __A)
#define _mm256_cmpeq_epi8_mask(A, B)
static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_reduce_max_epi8(__m128i __V)
static __inline__ unsigned char __DEFAULT_FN_ATTRS256 _mm256_reduce_min_epu8(__m256i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi16_mask(__m128i __A)
static __inline__ unsigned short __DEFAULT_FN_ATTRS256 _mm256_reduce_min_epu16(__m256i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __V)
#define _mm_cmpneq_epi16_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_reduce_and_epi8(__m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B)
#define _mm_mask_cmpeq_epi8_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_and_epi16(__mmask16 __M, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS128 _mm_reduce_or_epi16(__m128i __W)
static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_reduce_and_epi16(__m256i __W)
static __inline__ short __DEFAULT_FN_ATTRS128 _mm_mask_reduce_max_epi16(__mmask16 __M, __m128i __V)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_testn_epi8_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi16_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastb_epi8(__mmask32 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B)
#define _mm_cmpneq_epi8_mask(A, B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi16_storeu_epi8(void *__P, __mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi16_mask(__m128i __A, __m128i __B)
#define _mm256_mask_cmpneq_epi8_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi8(__m128i __W, __mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS128 _mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi16_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi16(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi16(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi8(__mmask16 __M, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi16(void *__P, __mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi16_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi16(__mmask8 __U, __m128i __A, int __B)
#define _mm256_cmpneq_epi8_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi16_mask(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_reduce_min_epi8(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi16(__mmask8 __M, short __A)
static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A)
static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi8(__mmask16 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_reduce_min_epi8(__m256i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi16(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi16_mask(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi16_epi8(__mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi8(__m128i __O, __mmask16 __M, char __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS128 _mm_mask_reduce_min_epi16(__mmask16 __M, __m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_movepi8_mask(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B)
#define _mm_cmpeq_epi8_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi16(void *__P, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ unsigned short __DEFAULT_FN_ATTRS128 _mm_mask_reduce_max_epu16(__mmask16 __M, __m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi8(__mmask32 __M, char __A)
static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_testn_epi8_mask(__m256i __A, __m256i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi16(__mmask16 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi8(__m256i __W, __mmask32 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ unsigned short __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_reduce_mul_epi16(__m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi8(__mmask32 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS128 _mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi16(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi16(__m128i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_reduce_and_epi8(__m128i __W)
static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_reduce_or_epi16(__m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi8(void *__P, __mmask32 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi16(__m256i __A, __m256i __B)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_test_epi8_mask(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi16(__mmask16 __M, short __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_reduce_add_epi8(__m128i __W)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi16(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi8(__mmask16 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W)
#define _mm256_cmpeq_epi16_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi8(void *__P, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_reduce_add_epi16(__m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS128 _mm_mask_reduce_mul_epi16(__mmask8 __M, __m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi16(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi8(__mmask32 __U, __m256i __A)
static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi16_epi8(__m128i __A)
static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_movepi16_mask(__m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi16_storeu_epi8(void *__P, __mmask16 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi16(__m128i __O, __mmask8 __M, short __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi16(__m256i __W, __mmask16 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutexvar_epi16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_reduce_mul_epi8(__m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastw_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi16(__m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi16_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi8(__mmask16 __M, char __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi8(__mmask32 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi16(__m256i __W, __mmask16 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_test_epi8_mask(__m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi16_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __V)
static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_reduce_or_epi8(__m128i __W)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_storeu_epi8(void *__P, __mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi16(__mmask16 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
#define __DEFAULT_FN_ATTRS256
#define _mm_mask_cmpneq_epi16_mask(k, A, B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_test_epi16_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi16_epi8(__mmask16 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi8(__mmask32 __A)
#define _mm_cmpeq_epi16_mask(A, B)
#define _mm_mask_cmpeq_epi16_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
#define __DEFAULT_FN_ATTRS128
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi8(void *__P, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi16(__mmask16 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi8(__mmask16 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutexvar_epi16(__m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi8(void *__P, __mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS256 _mm256_reduce_max_epu8(__m256i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi16(__mmask8 __U, void const *__P)
static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __V)
static __inline__ unsigned short __DEFAULT_FN_ATTRS128 _mm_reduce_min_epu16(__m128i __V)
static __inline__ unsigned short __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __V)
static __inline__ short __DEFAULT_FN_ATTRS128 _mm_reduce_mul_epi16(__m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastw_epi16(__m256i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_testn_epi16_mask(__m256i __A, __m256i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi16_mask(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi16(__mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi16(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ unsigned short __DEFAULT_FN_ATTRS128 _mm_reduce_max_epu16(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_reduce_max_epi16(__m256i __V)
static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_reduce_max_epi8(__m256i __V)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi8(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A)
static __inline__ short __DEFAULT_FN_ATTRS128 _mm_mask_reduce_and_epi16(__mmask8 __M, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_reduce_add_epi8(__m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
#define _mm256_mask_cmpeq_epi8_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi16_epi8(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi16_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastw_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ short __DEFAULT_FN_ATTRS128 _mm_reduce_max_epi16(__m128i __V)
static __inline__ unsigned char __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastb_epi8(__mmask16 __M, __m128i __A)
#define _mm256_mask_cmpneq_epi16_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_mul_epi16(__mmask16 __M, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS128 _mm_reduce_min_epi16(__m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_reduce_or_epi8(__m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi16(__m256i __O, __mmask16 __M, short __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS128 _mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi16(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi16(__mmask8 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ unsigned short __DEFAULT_FN_ATTRS256 _mm256_reduce_max_epu16(__m256i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS128 _mm_reduce_add_epi16(__m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi16_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi16(__mmask8 __U, __m128i __A, __m128i __B)
#define _mm_mask_cmpneq_epi8_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastw_epi16(__mmask16 __M, __m128i __A)
static __inline__ short __DEFAULT_FN_ATTRS128 _mm_reduce_and_epi16(__m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
#define _mm256_cmpneq_epi16_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi16(__mmask16 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi8(__m256i __W, __mmask32 __U, __m256i __A)
static __inline__ short __DEFAULT_FN_ATTRS128 _mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W)
static __inline__ unsigned char __DEFAULT_FN_ATTRS128 _mm_reduce_max_epu8(__m128i __V)
static __inline__ signed char __DEFAULT_FN_ATTRS128 _mm_reduce_mul_epi8(__m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi16(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS128 _mm_reduce_min_epu8(__m128i __V)
static __inline__ unsigned short __DEFAULT_FN_ATTRS128 _mm_mask_reduce_min_epu16(__mmask16 __M, __m128i __V)
static __inline__ signed char __DEFAULT_FN_ATTRS256 _mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi16(void const *__P)
static __inline__ short __DEFAULT_FN_ATTRS256 _mm256_reduce_min_epi16(__m256i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi8(char __b)
Constructs a 256-bit integer vector of [32 x i8], with each of the 8-bit integral vector elements set...
Definition: avxintrin.h:4290
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition: avxintrin.h:4353
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi16(short __w)
Constructs a 256-bit integer vector of [16 x i16], with each of the 16-bit integral vector elements s...
Definition: avxintrin.h:4272
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-3) values from each of the two 128-bit vectors of [8 x i16] and interl...
Definition: emmintrin.h:4532
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, __m128i __b)
Converts, with saturation, 16-bit signed integers from both 128-bit integer vector operands into 8-bi...
Definition: emmintrin.h:4189
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the smaller value f...
Definition: emmintrin.h:2361
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
Definition: emmintrin.h:2663
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [16 x i8] vectors,...
Definition: emmintrin.h:2156
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, __m128i __b)
Subtracts the corresponding 16-bit integer values in the operands.
Definition: emmintrin.h:2509
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the lower 16 bits of ea...
Definition: emmintrin.h:2418
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, int __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2867
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the greater value f...
Definition: emmintrin.h:2323
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, __m128i __b)
Computes the rounded averages of corresponding elements of two 128-bit unsigned [8 x i16] vectors,...
Definition: emmintrin.h:2260
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, __m128i __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:2985
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-7) values from two 128-bit vectors of [16 x i8] and interleaves them i...
Definition: emmintrin.h:4503
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, __m128i __b)
Subtracts, with saturation, corresponding 8-bit signed integer values in the input and returns the di...
Definition: emmintrin.h:2582
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, __m128i __b)
Subtracts, with saturation, corresponding 16-bit unsigned integer values in the input and returns the...
Definition: emmintrin.h:2646
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, __m128i __b)
Unpacks the high-order (index 4-7) values from two 128-bit vectors of [8 x i16] and interleaves them ...
Definition: emmintrin.h:4425
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two 128-bit signed [8 x i16] vectors, producing eight interm...
Definition: emmintrin.h:2285
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [8 x i16] vectors,...
Definition: emmintrin.h:2222
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, int __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2758
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [16 x i8] vectors,...
Definition: emmintrin.h:2200
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [8 x i16] vectors,...
Definition: emmintrin.h:2178
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, __m128i __b)
Subtracts, with saturation, corresponding 16-bit signed integer values in the input and returns the d...
Definition: emmintrin.h:2604
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, __m128i __b)
Subtracts the corresponding 8-bit integer values in the operands.
Definition: emmintrin.h:2492
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the upper 16 bits of ea...
Definition: emmintrin.h:2380
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two unsigned [8 x i16] vectors, saving the upper 16 bits of ...
Definition: emmintrin.h:2399
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the greater value fro...
Definition: emmintrin.h:2304
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, __m128i __b)
Converts, with saturation, 16-bit signed integers from both 128-bit integer vector operands into 8-bi...
Definition: emmintrin.h:4143
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, __m128i __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2886
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
Definition: emmintrin.h:3724
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, __m128i __b)
Converts, with saturation, 32-bit signed integers from both 128-bit integer vector operands into 16-b...
Definition: emmintrin.h:4166
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, __m128i __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2776
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [16 x i8], saving the lower 8 bits of each ...
Definition: emmintrin.h:2055
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the smaller value fro...
Definition: emmintrin.h:2342
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w)
Initializes all values in a 128-bit vector of [8 x i16] with the specified 16-bit value.
Definition: emmintrin.h:3707
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, __m128i __b)
Computes the rounded averages of corresponding elements of two 128-bit unsigned [16 x i8] vectors,...
Definition: emmintrin.h:2241
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, __m128i __b)
Unpacks the high-order (index 8-15) values from two 128-bit vectors of [16 x i8] and interleaves them...
Definition: emmintrin.h:4397
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, int __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:2967
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [8 x i16], saving the lower 16 bits of each...
Definition: emmintrin.h:2076
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, __m128i __b)
Subtracts, with saturation, corresponding 8-bit unsigned integer values in the input and returns the ...
Definition: emmintrin.h:2625
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition: emmintrin.h:3859
struct __storeu_i16 *__P __v
Definition: immintrin.h:472
__inline unsigned int unsigned int unsigned int * __P
Definition: bmi2intrin.h:25
__inline unsigned int unsigned int __Y
Definition: bmi2intrin.h:19
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu16(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [8 x u16] and returns a 128-bit vector ...
Definition: smmintrin.h:702
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi16(__m128i __V)
Zero-extends each of the lower eight 8-bit integer elements of a 128-bit vector of [16 x i8] to 16-bi...
Definition: smmintrin.h:1338
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi8(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [16 x i8] and returns a 128-bit vector ...
Definition: smmintrin.h:666
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi32(__m128i __V1, __m128i __V2)
Converts, with saturation, 32-bit signed integers from both 128-bit integer vector operands into 16-b...
Definition: smmintrin.h:1454
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi8(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [16 x i8] and returns a 128-bit vector ...
Definition: smmintrin.h:684
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu16(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [8 x u16] and returns a 128-bit vector ...
Definition: smmintrin.h:720
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V)
Sign-extends each of the lower eight 8-bit integer elements of a 128-bit vector of [16 x i8] to 16-bi...
Definition: smmintrin.h:1221
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition: tmmintrin.h:61
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition: tmmintrin.h:561
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition: tmmintrin.h:97
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition: tmmintrin.h:510
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
Definition: tmmintrin.h:608