clang 20.0.0git
avx512vlintrin.h
Go to the documentation of this file.
1/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __AVX512VLINTRIN_H
15#define __AVX512VLINTRIN_H
16
17#define __DEFAULT_FN_ATTRS128 \
18 __attribute__((__always_inline__, __nodebug__, \
19 __target__("avx512vl,no-evex512"), \
20 __min_vector_width__(128)))
21#define __DEFAULT_FN_ATTRS256 \
22 __attribute__((__always_inline__, __nodebug__, \
23 __target__("avx512vl,no-evex512"), \
24 __min_vector_width__(256)))
25
26typedef short __v2hi __attribute__((__vector_size__(4)));
27typedef char __v4qi __attribute__((__vector_size__(4)));
28typedef char __v2qi __attribute__((__vector_size__(2)));
29
30/* Integer compare */
31
32#define _mm_cmpeq_epi32_mask(A, B) \
33 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
34#define _mm_mask_cmpeq_epi32_mask(k, A, B) \
35 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
36#define _mm_cmpge_epi32_mask(A, B) \
37 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
38#define _mm_mask_cmpge_epi32_mask(k, A, B) \
39 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
40#define _mm_cmpgt_epi32_mask(A, B) \
41 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
42#define _mm_mask_cmpgt_epi32_mask(k, A, B) \
43 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
44#define _mm_cmple_epi32_mask(A, B) \
45 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
46#define _mm_mask_cmple_epi32_mask(k, A, B) \
47 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
48#define _mm_cmplt_epi32_mask(A, B) \
49 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
50#define _mm_mask_cmplt_epi32_mask(k, A, B) \
51 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
52#define _mm_cmpneq_epi32_mask(A, B) \
53 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
54#define _mm_mask_cmpneq_epi32_mask(k, A, B) \
55 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
56
57#define _mm256_cmpeq_epi32_mask(A, B) \
58 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
59#define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
60 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
61#define _mm256_cmpge_epi32_mask(A, B) \
62 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
63#define _mm256_mask_cmpge_epi32_mask(k, A, B) \
64 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
65#define _mm256_cmpgt_epi32_mask(A, B) \
66 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
67#define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
68 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
69#define _mm256_cmple_epi32_mask(A, B) \
70 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
71#define _mm256_mask_cmple_epi32_mask(k, A, B) \
72 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
73#define _mm256_cmplt_epi32_mask(A, B) \
74 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
75#define _mm256_mask_cmplt_epi32_mask(k, A, B) \
76 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
77#define _mm256_cmpneq_epi32_mask(A, B) \
78 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
79#define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
80 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
81
82#define _mm_cmpeq_epu32_mask(A, B) \
83 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
84#define _mm_mask_cmpeq_epu32_mask(k, A, B) \
85 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
86#define _mm_cmpge_epu32_mask(A, B) \
87 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
88#define _mm_mask_cmpge_epu32_mask(k, A, B) \
89 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
90#define _mm_cmpgt_epu32_mask(A, B) \
91 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
92#define _mm_mask_cmpgt_epu32_mask(k, A, B) \
93 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
94#define _mm_cmple_epu32_mask(A, B) \
95 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
96#define _mm_mask_cmple_epu32_mask(k, A, B) \
97 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
98#define _mm_cmplt_epu32_mask(A, B) \
99 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
100#define _mm_mask_cmplt_epu32_mask(k, A, B) \
101 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
102#define _mm_cmpneq_epu32_mask(A, B) \
103 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
104#define _mm_mask_cmpneq_epu32_mask(k, A, B) \
105 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
106
107#define _mm256_cmpeq_epu32_mask(A, B) \
108 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
109#define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
110 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
111#define _mm256_cmpge_epu32_mask(A, B) \
112 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
113#define _mm256_mask_cmpge_epu32_mask(k, A, B) \
114 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
115#define _mm256_cmpgt_epu32_mask(A, B) \
116 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
117#define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
118 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
119#define _mm256_cmple_epu32_mask(A, B) \
120 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
121#define _mm256_mask_cmple_epu32_mask(k, A, B) \
122 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
123#define _mm256_cmplt_epu32_mask(A, B) \
124 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
125#define _mm256_mask_cmplt_epu32_mask(k, A, B) \
126 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
127#define _mm256_cmpneq_epu32_mask(A, B) \
128 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
129#define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
130 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
131
132#define _mm_cmpeq_epi64_mask(A, B) \
133 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
134#define _mm_mask_cmpeq_epi64_mask(k, A, B) \
135 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
136#define _mm_cmpge_epi64_mask(A, B) \
137 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
138#define _mm_mask_cmpge_epi64_mask(k, A, B) \
139 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
140#define _mm_cmpgt_epi64_mask(A, B) \
141 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
142#define _mm_mask_cmpgt_epi64_mask(k, A, B) \
143 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
144#define _mm_cmple_epi64_mask(A, B) \
145 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
146#define _mm_mask_cmple_epi64_mask(k, A, B) \
147 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
148#define _mm_cmplt_epi64_mask(A, B) \
149 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
150#define _mm_mask_cmplt_epi64_mask(k, A, B) \
151 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
152#define _mm_cmpneq_epi64_mask(A, B) \
153 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
154#define _mm_mask_cmpneq_epi64_mask(k, A, B) \
155 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
156
157#define _mm256_cmpeq_epi64_mask(A, B) \
158 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
159#define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
160 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
161#define _mm256_cmpge_epi64_mask(A, B) \
162 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
163#define _mm256_mask_cmpge_epi64_mask(k, A, B) \
164 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
165#define _mm256_cmpgt_epi64_mask(A, B) \
166 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
167#define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
168 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
169#define _mm256_cmple_epi64_mask(A, B) \
170 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
171#define _mm256_mask_cmple_epi64_mask(k, A, B) \
172 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
173#define _mm256_cmplt_epi64_mask(A, B) \
174 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
175#define _mm256_mask_cmplt_epi64_mask(k, A, B) \
176 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
177#define _mm256_cmpneq_epi64_mask(A, B) \
178 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
179#define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
180 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
181
182#define _mm_cmpeq_epu64_mask(A, B) \
183 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
184#define _mm_mask_cmpeq_epu64_mask(k, A, B) \
185 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
186#define _mm_cmpge_epu64_mask(A, B) \
187 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
188#define _mm_mask_cmpge_epu64_mask(k, A, B) \
189 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
190#define _mm_cmpgt_epu64_mask(A, B) \
191 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
192#define _mm_mask_cmpgt_epu64_mask(k, A, B) \
193 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
194#define _mm_cmple_epu64_mask(A, B) \
195 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
196#define _mm_mask_cmple_epu64_mask(k, A, B) \
197 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
198#define _mm_cmplt_epu64_mask(A, B) \
199 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
200#define _mm_mask_cmplt_epu64_mask(k, A, B) \
201 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
202#define _mm_cmpneq_epu64_mask(A, B) \
203 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
204#define _mm_mask_cmpneq_epu64_mask(k, A, B) \
205 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
206
207#define _mm256_cmpeq_epu64_mask(A, B) \
208 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
209#define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
210 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
211#define _mm256_cmpge_epu64_mask(A, B) \
212 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
213#define _mm256_mask_cmpge_epu64_mask(k, A, B) \
214 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
215#define _mm256_cmpgt_epu64_mask(A, B) \
216 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
217#define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
218 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
219#define _mm256_cmple_epu64_mask(A, B) \
220 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
221#define _mm256_mask_cmple_epu64_mask(k, A, B) \
222 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
223#define _mm256_cmplt_epu64_mask(A, B) \
224 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
225#define _mm256_mask_cmplt_epu64_mask(k, A, B) \
226 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
227#define _mm256_cmpneq_epu64_mask(A, B) \
228 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
229#define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
230 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
231
232static __inline__ __m256i __DEFAULT_FN_ATTRS256
233_mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
234{
235 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
236 (__v8si)_mm256_add_epi32(__A, __B),
237 (__v8si)__W);
238}
239
240static __inline__ __m256i __DEFAULT_FN_ATTRS256
241_mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
242{
243 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
244 (__v8si)_mm256_add_epi32(__A, __B),
245 (__v8si)_mm256_setzero_si256());
246}
247
248static __inline__ __m256i __DEFAULT_FN_ATTRS256
249_mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
250{
251 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
252 (__v4di)_mm256_add_epi64(__A, __B),
253 (__v4di)__W);
254}
255
256static __inline__ __m256i __DEFAULT_FN_ATTRS256
257_mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
258{
259 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
260 (__v4di)_mm256_add_epi64(__A, __B),
261 (__v4di)_mm256_setzero_si256());
262}
263
264static __inline__ __m256i __DEFAULT_FN_ATTRS256
265_mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
266{
267 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
268 (__v8si)_mm256_sub_epi32(__A, __B),
269 (__v8si)__W);
270}
271
272static __inline__ __m256i __DEFAULT_FN_ATTRS256
273_mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
274{
275 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
276 (__v8si)_mm256_sub_epi32(__A, __B),
277 (__v8si)_mm256_setzero_si256());
278}
279
280static __inline__ __m256i __DEFAULT_FN_ATTRS256
281_mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
282{
283 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
284 (__v4di)_mm256_sub_epi64(__A, __B),
285 (__v4di)__W);
286}
287
288static __inline__ __m256i __DEFAULT_FN_ATTRS256
289_mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
290{
291 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
292 (__v4di)_mm256_sub_epi64(__A, __B),
293 (__v4di)_mm256_setzero_si256());
294}
295
296static __inline__ __m128i __DEFAULT_FN_ATTRS128
297_mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
298{
299 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
300 (__v4si)_mm_add_epi32(__A, __B),
301 (__v4si)__W);
302}
303
304static __inline__ __m128i __DEFAULT_FN_ATTRS128
305_mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
306{
307 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
308 (__v4si)_mm_add_epi32(__A, __B),
309 (__v4si)_mm_setzero_si128());
310}
311
312static __inline__ __m128i __DEFAULT_FN_ATTRS128
313_mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
314{
315 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
316 (__v2di)_mm_add_epi64(__A, __B),
317 (__v2di)__W);
318}
319
320static __inline__ __m128i __DEFAULT_FN_ATTRS128
321_mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
322{
323 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
324 (__v2di)_mm_add_epi64(__A, __B),
325 (__v2di)_mm_setzero_si128());
326}
327
328static __inline__ __m128i __DEFAULT_FN_ATTRS128
329_mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
330{
331 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
332 (__v4si)_mm_sub_epi32(__A, __B),
333 (__v4si)__W);
334}
335
336static __inline__ __m128i __DEFAULT_FN_ATTRS128
337_mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
338{
339 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
340 (__v4si)_mm_sub_epi32(__A, __B),
341 (__v4si)_mm_setzero_si128());
342}
343
344static __inline__ __m128i __DEFAULT_FN_ATTRS128
345_mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
346{
347 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
348 (__v2di)_mm_sub_epi64(__A, __B),
349 (__v2di)__W);
350}
351
352static __inline__ __m128i __DEFAULT_FN_ATTRS128
353_mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
354{
355 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
356 (__v2di)_mm_sub_epi64(__A, __B),
357 (__v2di)_mm_setzero_si128());
358}
359
360static __inline__ __m256i __DEFAULT_FN_ATTRS256
361_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
362{
363 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
364 (__v4di)_mm256_mul_epi32(__X, __Y),
365 (__v4di)__W);
366}
367
368static __inline__ __m256i __DEFAULT_FN_ATTRS256
369_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
370{
371 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
372 (__v4di)_mm256_mul_epi32(__X, __Y),
373 (__v4di)_mm256_setzero_si256());
374}
375
376static __inline__ __m128i __DEFAULT_FN_ATTRS128
377_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
378{
379 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
380 (__v2di)_mm_mul_epi32(__X, __Y),
381 (__v2di)__W);
382}
383
384static __inline__ __m128i __DEFAULT_FN_ATTRS128
385_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
386{
387 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
388 (__v2di)_mm_mul_epi32(__X, __Y),
389 (__v2di)_mm_setzero_si128());
390}
391
392static __inline__ __m256i __DEFAULT_FN_ATTRS256
393_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
394{
395 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
396 (__v4di)_mm256_mul_epu32(__X, __Y),
397 (__v4di)__W);
398}
399
400static __inline__ __m256i __DEFAULT_FN_ATTRS256
401_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
402{
403 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
404 (__v4di)_mm256_mul_epu32(__X, __Y),
405 (__v4di)_mm256_setzero_si256());
406}
407
408static __inline__ __m128i __DEFAULT_FN_ATTRS128
409_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
410{
411 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
412 (__v2di)_mm_mul_epu32(__X, __Y),
413 (__v2di)__W);
414}
415
416static __inline__ __m128i __DEFAULT_FN_ATTRS128
417_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
418{
419 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
420 (__v2di)_mm_mul_epu32(__X, __Y),
421 (__v2di)_mm_setzero_si128());
422}
423
424static __inline__ __m256i __DEFAULT_FN_ATTRS256
425_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
426{
427 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
428 (__v8si)_mm256_mullo_epi32(__A, __B),
429 (__v8si)_mm256_setzero_si256());
430}
431
432static __inline__ __m256i __DEFAULT_FN_ATTRS256
433_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
434{
435 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
436 (__v8si)_mm256_mullo_epi32(__A, __B),
437 (__v8si)__W);
438}
439
440static __inline__ __m128i __DEFAULT_FN_ATTRS128
441_mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
442{
443 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
444 (__v4si)_mm_mullo_epi32(__A, __B),
445 (__v4si)_mm_setzero_si128());
446}
447
448static __inline__ __m128i __DEFAULT_FN_ATTRS128
449_mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
450{
451 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
452 (__v4si)_mm_mullo_epi32(__A, __B),
453 (__v4si)__W);
454}
455
456static __inline__ __m256i __DEFAULT_FN_ATTRS256
457_mm256_and_epi32(__m256i __a, __m256i __b)
458{
459 return (__m256i)((__v8su)__a & (__v8su)__b);
460}
461
462static __inline__ __m256i __DEFAULT_FN_ATTRS256
463_mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
464{
465 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
466 (__v8si)_mm256_and_epi32(__A, __B),
467 (__v8si)__W);
468}
469
470static __inline__ __m256i __DEFAULT_FN_ATTRS256
471_mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
472{
473 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
474}
475
476static __inline__ __m128i __DEFAULT_FN_ATTRS128
477_mm_and_epi32(__m128i __a, __m128i __b)
478{
479 return (__m128i)((__v4su)__a & (__v4su)__b);
480}
481
482static __inline__ __m128i __DEFAULT_FN_ATTRS128
483_mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
484{
485 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
486 (__v4si)_mm_and_epi32(__A, __B),
487 (__v4si)__W);
488}
489
490static __inline__ __m128i __DEFAULT_FN_ATTRS128
491_mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
492{
493 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
494}
495
496static __inline__ __m256i __DEFAULT_FN_ATTRS256
497_mm256_andnot_epi32(__m256i __A, __m256i __B)
498{
499 return (__m256i)(~(__v8su)__A & (__v8su)__B);
500}
501
502static __inline__ __m256i __DEFAULT_FN_ATTRS256
503_mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
504{
505 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
506 (__v8si)_mm256_andnot_epi32(__A, __B),
507 (__v8si)__W);
508}
509
510static __inline__ __m256i __DEFAULT_FN_ATTRS256
511_mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
512{
514 __U, __A, __B);
515}
516
517static __inline__ __m128i __DEFAULT_FN_ATTRS128
518_mm_andnot_epi32(__m128i __A, __m128i __B)
519{
520 return (__m128i)(~(__v4su)__A & (__v4su)__B);
521}
522
523static __inline__ __m128i __DEFAULT_FN_ATTRS128
524_mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
525{
526 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
527 (__v4si)_mm_andnot_epi32(__A, __B),
528 (__v4si)__W);
529}
530
531static __inline__ __m128i __DEFAULT_FN_ATTRS128
532_mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
533{
534 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
535}
536
537static __inline__ __m256i __DEFAULT_FN_ATTRS256
538_mm256_or_epi32(__m256i __a, __m256i __b)
539{
540 return (__m256i)((__v8su)__a | (__v8su)__b);
541}
542
543static __inline__ __m256i __DEFAULT_FN_ATTRS256
544_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
545{
546 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
547 (__v8si)_mm256_or_epi32(__A, __B),
548 (__v8si)__W);
549}
550
551static __inline__ __m256i __DEFAULT_FN_ATTRS256
552_mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
553{
554 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
555}
556
557static __inline__ __m128i __DEFAULT_FN_ATTRS128
558_mm_or_epi32(__m128i __a, __m128i __b)
559{
560 return (__m128i)((__v4su)__a | (__v4su)__b);
561}
562
563static __inline__ __m128i __DEFAULT_FN_ATTRS128
564_mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
565{
566 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
567 (__v4si)_mm_or_epi32(__A, __B),
568 (__v4si)__W);
569}
570
571static __inline__ __m128i __DEFAULT_FN_ATTRS128
572_mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
573{
574 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
575}
576
577static __inline__ __m256i __DEFAULT_FN_ATTRS256
578_mm256_xor_epi32(__m256i __a, __m256i __b)
579{
580 return (__m256i)((__v8su)__a ^ (__v8su)__b);
581}
582
583static __inline__ __m256i __DEFAULT_FN_ATTRS256
584_mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
585{
586 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
587 (__v8si)_mm256_xor_epi32(__A, __B),
588 (__v8si)__W);
589}
590
591static __inline__ __m256i __DEFAULT_FN_ATTRS256
592_mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
593{
594 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
595}
596
597static __inline__ __m128i __DEFAULT_FN_ATTRS128
598_mm_xor_epi32(__m128i __a, __m128i __b)
599{
600 return (__m128i)((__v4su)__a ^ (__v4su)__b);
601}
602
603static __inline__ __m128i __DEFAULT_FN_ATTRS128
604_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
605{
606 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
607 (__v4si)_mm_xor_epi32(__A, __B),
608 (__v4si)__W);
609}
610
611static __inline__ __m128i __DEFAULT_FN_ATTRS128
612_mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
613{
614 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
615}
616
617static __inline__ __m256i __DEFAULT_FN_ATTRS256
618_mm256_and_epi64(__m256i __a, __m256i __b)
619{
620 return (__m256i)((__v4du)__a & (__v4du)__b);
621}
622
623static __inline__ __m256i __DEFAULT_FN_ATTRS256
624_mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
625{
626 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
627 (__v4di)_mm256_and_epi64(__A, __B),
628 (__v4di)__W);
629}
630
631static __inline__ __m256i __DEFAULT_FN_ATTRS256
632_mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
633{
634 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
635}
636
637static __inline__ __m128i __DEFAULT_FN_ATTRS128
638_mm_and_epi64(__m128i __a, __m128i __b)
639{
640 return (__m128i)((__v2du)__a & (__v2du)__b);
641}
642
643static __inline__ __m128i __DEFAULT_FN_ATTRS128
644_mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
645{
646 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
647 (__v2di)_mm_and_epi64(__A, __B),
648 (__v2di)__W);
649}
650
651static __inline__ __m128i __DEFAULT_FN_ATTRS128
652_mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
653{
654 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
655}
656
657static __inline__ __m256i __DEFAULT_FN_ATTRS256
658_mm256_andnot_epi64(__m256i __A, __m256i __B)
659{
660 return (__m256i)(~(__v4du)__A & (__v4du)__B);
661}
662
663static __inline__ __m256i __DEFAULT_FN_ATTRS256
664_mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
665{
666 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
667 (__v4di)_mm256_andnot_epi64(__A, __B),
668 (__v4di)__W);
669}
670
671static __inline__ __m256i __DEFAULT_FN_ATTRS256
672_mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
673{
675 __U, __A, __B);
676}
677
678static __inline__ __m128i __DEFAULT_FN_ATTRS128
679_mm_andnot_epi64(__m128i __A, __m128i __B)
680{
681 return (__m128i)(~(__v2du)__A & (__v2du)__B);
682}
683
684static __inline__ __m128i __DEFAULT_FN_ATTRS128
685_mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
686{
687 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
688 (__v2di)_mm_andnot_epi64(__A, __B),
689 (__v2di)__W);
690}
691
692static __inline__ __m128i __DEFAULT_FN_ATTRS128
693_mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
694{
695 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
696}
697
698static __inline__ __m256i __DEFAULT_FN_ATTRS256
699_mm256_or_epi64(__m256i __a, __m256i __b)
700{
701 return (__m256i)((__v4du)__a | (__v4du)__b);
702}
703
704static __inline__ __m256i __DEFAULT_FN_ATTRS256
705_mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
706{
707 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
708 (__v4di)_mm256_or_epi64(__A, __B),
709 (__v4di)__W);
710}
711
712static __inline__ __m256i __DEFAULT_FN_ATTRS256
713_mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
714{
715 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
716}
717
718static __inline__ __m128i __DEFAULT_FN_ATTRS128
719_mm_or_epi64(__m128i __a, __m128i __b)
720{
721 return (__m128i)((__v2du)__a | (__v2du)__b);
722}
723
724static __inline__ __m128i __DEFAULT_FN_ATTRS128
725_mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
726{
727 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
728 (__v2di)_mm_or_epi64(__A, __B),
729 (__v2di)__W);
730}
731
732static __inline__ __m128i __DEFAULT_FN_ATTRS128
733_mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
734{
735 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
736}
737
738static __inline__ __m256i __DEFAULT_FN_ATTRS256
739_mm256_xor_epi64(__m256i __a, __m256i __b)
740{
741 return (__m256i)((__v4du)__a ^ (__v4du)__b);
742}
743
744static __inline__ __m256i __DEFAULT_FN_ATTRS256
745_mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
746{
747 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
748 (__v4di)_mm256_xor_epi64(__A, __B),
749 (__v4di)__W);
750}
751
752static __inline__ __m256i __DEFAULT_FN_ATTRS256
753_mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
754{
755 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
756}
757
758static __inline__ __m128i __DEFAULT_FN_ATTRS128
759_mm_xor_epi64(__m128i __a, __m128i __b)
760{
761 return (__m128i)((__v2du)__a ^ (__v2du)__b);
762}
763
764static __inline__ __m128i __DEFAULT_FN_ATTRS128
765_mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
766 __m128i __B)
767{
768 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
769 (__v2di)_mm_xor_epi64(__A, __B),
770 (__v2di)__W);
771}
772
773static __inline__ __m128i __DEFAULT_FN_ATTRS128
774_mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
775{
776 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
777}
778
779#define _mm_cmp_epi32_mask(a, b, p) \
780 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
781 (__v4si)(__m128i)(b), (int)(p), \
782 (__mmask8)-1))
783
784#define _mm_mask_cmp_epi32_mask(m, a, b, p) \
785 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
786 (__v4si)(__m128i)(b), (int)(p), \
787 (__mmask8)(m)))
788
789#define _mm_cmp_epu32_mask(a, b, p) \
790 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
791 (__v4si)(__m128i)(b), (int)(p), \
792 (__mmask8)-1))
793
794#define _mm_mask_cmp_epu32_mask(m, a, b, p) \
795 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
796 (__v4si)(__m128i)(b), (int)(p), \
797 (__mmask8)(m)))
798
799#define _mm256_cmp_epi32_mask(a, b, p) \
800 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
801 (__v8si)(__m256i)(b), (int)(p), \
802 (__mmask8)-1))
803
804#define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
805 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
806 (__v8si)(__m256i)(b), (int)(p), \
807 (__mmask8)(m)))
808
809#define _mm256_cmp_epu32_mask(a, b, p) \
810 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
811 (__v8si)(__m256i)(b), (int)(p), \
812 (__mmask8)-1))
813
814#define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
815 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
816 (__v8si)(__m256i)(b), (int)(p), \
817 (__mmask8)(m)))
818
819#define _mm_cmp_epi64_mask(a, b, p) \
820 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
821 (__v2di)(__m128i)(b), (int)(p), \
822 (__mmask8)-1))
823
824#define _mm_mask_cmp_epi64_mask(m, a, b, p) \
825 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
826 (__v2di)(__m128i)(b), (int)(p), \
827 (__mmask8)(m)))
828
829#define _mm_cmp_epu64_mask(a, b, p) \
830 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
831 (__v2di)(__m128i)(b), (int)(p), \
832 (__mmask8)-1))
833
834#define _mm_mask_cmp_epu64_mask(m, a, b, p) \
835 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
836 (__v2di)(__m128i)(b), (int)(p), \
837 (__mmask8)(m)))
838
839#define _mm256_cmp_epi64_mask(a, b, p) \
840 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
841 (__v4di)(__m256i)(b), (int)(p), \
842 (__mmask8)-1))
843
844#define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
845 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
846 (__v4di)(__m256i)(b), (int)(p), \
847 (__mmask8)(m)))
848
849#define _mm256_cmp_epu64_mask(a, b, p) \
850 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
851 (__v4di)(__m256i)(b), (int)(p), \
852 (__mmask8)-1))
853
854#define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
855 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
856 (__v4di)(__m256i)(b), (int)(p), \
857 (__mmask8)(m)))
858
859#define _mm256_cmp_ps_mask(a, b, p) \
860 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
861 (__v8sf)(__m256)(b), (int)(p), \
862 (__mmask8)-1))
863
864#define _mm256_mask_cmp_ps_mask(m, a, b, p) \
865 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
866 (__v8sf)(__m256)(b), (int)(p), \
867 (__mmask8)(m)))
868
869#define _mm256_cmp_pd_mask(a, b, p) \
870 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
871 (__v4df)(__m256d)(b), (int)(p), \
872 (__mmask8)-1))
873
874#define _mm256_mask_cmp_pd_mask(m, a, b, p) \
875 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
876 (__v4df)(__m256d)(b), (int)(p), \
877 (__mmask8)(m)))
878
879#define _mm_cmp_ps_mask(a, b, p) \
880 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
881 (__v4sf)(__m128)(b), (int)(p), \
882 (__mmask8)-1))
883
884#define _mm_mask_cmp_ps_mask(m, a, b, p) \
885 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
886 (__v4sf)(__m128)(b), (int)(p), \
887 (__mmask8)(m)))
888
889#define _mm_cmp_pd_mask(a, b, p) \
890 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
891 (__v2df)(__m128d)(b), (int)(p), \
892 (__mmask8)-1))
893
894#define _mm_mask_cmp_pd_mask(m, a, b, p) \
895 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
896 (__v2df)(__m128d)(b), (int)(p), \
897 (__mmask8)(m)))
898
899static __inline__ __m128d __DEFAULT_FN_ATTRS128
900_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
901{
902 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
903 __builtin_ia32_vfmaddpd ((__v2df) __A,
904 (__v2df) __B,
905 (__v2df) __C),
906 (__v2df) __A);
907}
908
909static __inline__ __m128d __DEFAULT_FN_ATTRS128
910_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
911{
912 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
913 __builtin_ia32_vfmaddpd ((__v2df) __A,
914 (__v2df) __B,
915 (__v2df) __C),
916 (__v2df) __C);
917}
918
919static __inline__ __m128d __DEFAULT_FN_ATTRS128
920_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
921{
922 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
923 __builtin_ia32_vfmaddpd ((__v2df) __A,
924 (__v2df) __B,
925 (__v2df) __C),
926 (__v2df)_mm_setzero_pd());
927}
928
929static __inline__ __m128d __DEFAULT_FN_ATTRS128
930_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
931{
932 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
933 __builtin_ia32_vfmaddpd ((__v2df) __A,
934 (__v2df) __B,
935 -(__v2df) __C),
936 (__v2df) __A);
937}
938
939static __inline__ __m128d __DEFAULT_FN_ATTRS128
940_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
941{
942 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
943 __builtin_ia32_vfmaddpd ((__v2df) __A,
944 (__v2df) __B,
945 -(__v2df) __C),
946 (__v2df)_mm_setzero_pd());
947}
948
949static __inline__ __m128d __DEFAULT_FN_ATTRS128
950_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
951{
952 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
953 __builtin_ia32_vfmaddpd (-(__v2df) __A,
954 (__v2df) __B,
955 (__v2df) __C),
956 (__v2df) __C);
957}
958
959static __inline__ __m128d __DEFAULT_FN_ATTRS128
960_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
961{
962 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
963 __builtin_ia32_vfmaddpd (-(__v2df) __A,
964 (__v2df) __B,
965 (__v2df) __C),
966 (__v2df)_mm_setzero_pd());
967}
968
969static __inline__ __m128d __DEFAULT_FN_ATTRS128
970_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
971{
972 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
973 __builtin_ia32_vfmaddpd (-(__v2df) __A,
974 (__v2df) __B,
975 -(__v2df) __C),
976 (__v2df)_mm_setzero_pd());
977}
978
979static __inline__ __m256d __DEFAULT_FN_ATTRS256
980_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
981{
982 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
983 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
984 (__v4df) __B,
985 (__v4df) __C),
986 (__v4df) __A);
987}
988
989static __inline__ __m256d __DEFAULT_FN_ATTRS256
990_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
991{
992 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
993 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
994 (__v4df) __B,
995 (__v4df) __C),
996 (__v4df) __C);
997}
998
999static __inline__ __m256d __DEFAULT_FN_ATTRS256
1000_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1001{
1002 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1003 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1004 (__v4df) __B,
1005 (__v4df) __C),
1006 (__v4df)_mm256_setzero_pd());
1007}
1008
1009static __inline__ __m256d __DEFAULT_FN_ATTRS256
1010_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1011{
1012 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1013 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1014 (__v4df) __B,
1015 -(__v4df) __C),
1016 (__v4df) __A);
1017}
1018
1019static __inline__ __m256d __DEFAULT_FN_ATTRS256
1020_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1021{
1022 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1023 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1024 (__v4df) __B,
1025 -(__v4df) __C),
1026 (__v4df)_mm256_setzero_pd());
1027}
1028
1029static __inline__ __m256d __DEFAULT_FN_ATTRS256
1030_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1031{
1032 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1033 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1034 (__v4df) __B,
1035 (__v4df) __C),
1036 (__v4df) __C);
1037}
1038
1039static __inline__ __m256d __DEFAULT_FN_ATTRS256
1040_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1041{
1042 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1043 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1044 (__v4df) __B,
1045 (__v4df) __C),
1046 (__v4df)_mm256_setzero_pd());
1047}
1048
1049static __inline__ __m256d __DEFAULT_FN_ATTRS256
1050_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1051{
1052 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1053 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1054 (__v4df) __B,
1055 -(__v4df) __C),
1056 (__v4df)_mm256_setzero_pd());
1057}
1058
1059static __inline__ __m128 __DEFAULT_FN_ATTRS128
1060_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1061{
1062 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1063 __builtin_ia32_vfmaddps ((__v4sf) __A,
1064 (__v4sf) __B,
1065 (__v4sf) __C),
1066 (__v4sf) __A);
1067}
1068
1069static __inline__ __m128 __DEFAULT_FN_ATTRS128
1070_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1071{
1072 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1073 __builtin_ia32_vfmaddps ((__v4sf) __A,
1074 (__v4sf) __B,
1075 (__v4sf) __C),
1076 (__v4sf) __C);
1077}
1078
1079static __inline__ __m128 __DEFAULT_FN_ATTRS128
1080_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1081{
1082 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1083 __builtin_ia32_vfmaddps ((__v4sf) __A,
1084 (__v4sf) __B,
1085 (__v4sf) __C),
1086 (__v4sf)_mm_setzero_ps());
1087}
1088
1089static __inline__ __m128 __DEFAULT_FN_ATTRS128
1090_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1091{
1092 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1093 __builtin_ia32_vfmaddps ((__v4sf) __A,
1094 (__v4sf) __B,
1095 -(__v4sf) __C),
1096 (__v4sf) __A);
1097}
1098
1099static __inline__ __m128 __DEFAULT_FN_ATTRS128
1100_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1101{
1102 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1103 __builtin_ia32_vfmaddps ((__v4sf) __A,
1104 (__v4sf) __B,
1105 -(__v4sf) __C),
1106 (__v4sf)_mm_setzero_ps());
1107}
1108
1109static __inline__ __m128 __DEFAULT_FN_ATTRS128
1110_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1111{
1112 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1113 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1114 (__v4sf) __B,
1115 (__v4sf) __C),
1116 (__v4sf) __C);
1117}
1118
1119static __inline__ __m128 __DEFAULT_FN_ATTRS128
1120_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1121{
1122 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1123 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1124 (__v4sf) __B,
1125 (__v4sf) __C),
1126 (__v4sf)_mm_setzero_ps());
1127}
1128
1129static __inline__ __m128 __DEFAULT_FN_ATTRS128
1130_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1131{
1132 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1133 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1134 (__v4sf) __B,
1135 -(__v4sf) __C),
1136 (__v4sf)_mm_setzero_ps());
1137}
1138
1139static __inline__ __m256 __DEFAULT_FN_ATTRS256
1140_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1141{
1142 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1143 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1144 (__v8sf) __B,
1145 (__v8sf) __C),
1146 (__v8sf) __A);
1147}
1148
1149static __inline__ __m256 __DEFAULT_FN_ATTRS256
1150_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1151{
1152 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1153 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1154 (__v8sf) __B,
1155 (__v8sf) __C),
1156 (__v8sf) __C);
1157}
1158
1159static __inline__ __m256 __DEFAULT_FN_ATTRS256
1160_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1161{
1162 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1163 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1164 (__v8sf) __B,
1165 (__v8sf) __C),
1166 (__v8sf)_mm256_setzero_ps());
1167}
1168
1169static __inline__ __m256 __DEFAULT_FN_ATTRS256
1170_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1171{
1172 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1173 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1174 (__v8sf) __B,
1175 -(__v8sf) __C),
1176 (__v8sf) __A);
1177}
1178
1179static __inline__ __m256 __DEFAULT_FN_ATTRS256
1180_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1181{
1182 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1183 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1184 (__v8sf) __B,
1185 -(__v8sf) __C),
1186 (__v8sf)_mm256_setzero_ps());
1187}
1188
1189static __inline__ __m256 __DEFAULT_FN_ATTRS256
1190_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1191{
1192 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1193 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1194 (__v8sf) __B,
1195 (__v8sf) __C),
1196 (__v8sf) __C);
1197}
1198
1199static __inline__ __m256 __DEFAULT_FN_ATTRS256
1200_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1201{
1202 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1203 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1204 (__v8sf) __B,
1205 (__v8sf) __C),
1206 (__v8sf)_mm256_setzero_ps());
1207}
1208
1209static __inline__ __m256 __DEFAULT_FN_ATTRS256
1210_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1211{
1212 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1213 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1214 (__v8sf) __B,
1215 -(__v8sf) __C),
1216 (__v8sf)_mm256_setzero_ps());
1217}
1218
1219static __inline__ __m128d __DEFAULT_FN_ATTRS128
1220_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1221{
1222 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1223 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1224 (__v2df) __B,
1225 (__v2df) __C),
1226 (__v2df) __A);
1227}
1228
1229static __inline__ __m128d __DEFAULT_FN_ATTRS128
1230_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1231{
1232 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1233 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1234 (__v2df) __B,
1235 (__v2df) __C),
1236 (__v2df) __C);
1237}
1238
1239static __inline__ __m128d __DEFAULT_FN_ATTRS128
1240_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1241{
1242 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1243 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1244 (__v2df) __B,
1245 (__v2df) __C),
1246 (__v2df)_mm_setzero_pd());
1247}
1248
1249static __inline__ __m128d __DEFAULT_FN_ATTRS128
1250_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1251{
1252 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1253 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1254 (__v2df) __B,
1255 -(__v2df) __C),
1256 (__v2df) __A);
1257}
1258
1259static __inline__ __m128d __DEFAULT_FN_ATTRS128
1260_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1261{
1262 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1263 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1264 (__v2df) __B,
1265 -(__v2df) __C),
1266 (__v2df)_mm_setzero_pd());
1267}
1268
1269static __inline__ __m256d __DEFAULT_FN_ATTRS256
1270_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1271{
1272 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1273 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1274 (__v4df) __B,
1275 (__v4df) __C),
1276 (__v4df) __A);
1277}
1278
1279static __inline__ __m256d __DEFAULT_FN_ATTRS256
1280_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1281{
1282 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1283 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1284 (__v4df) __B,
1285 (__v4df) __C),
1286 (__v4df) __C);
1287}
1288
1289static __inline__ __m256d __DEFAULT_FN_ATTRS256
1290_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1291{
1292 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1293 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1294 (__v4df) __B,
1295 (__v4df) __C),
1296 (__v4df)_mm256_setzero_pd());
1297}
1298
1299static __inline__ __m256d __DEFAULT_FN_ATTRS256
1300_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1301{
1302 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1303 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1304 (__v4df) __B,
1305 -(__v4df) __C),
1306 (__v4df) __A);
1307}
1308
1309static __inline__ __m256d __DEFAULT_FN_ATTRS256
1310_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1311{
1312 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1313 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1314 (__v4df) __B,
1315 -(__v4df) __C),
1316 (__v4df)_mm256_setzero_pd());
1317}
1318
1319static __inline__ __m128 __DEFAULT_FN_ATTRS128
1320_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1321{
1322 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1323 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1324 (__v4sf) __B,
1325 (__v4sf) __C),
1326 (__v4sf) __A);
1327}
1328
1329static __inline__ __m128 __DEFAULT_FN_ATTRS128
1330_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1331{
1332 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1333 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1334 (__v4sf) __B,
1335 (__v4sf) __C),
1336 (__v4sf) __C);
1337}
1338
1339static __inline__ __m128 __DEFAULT_FN_ATTRS128
1340_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1341{
1342 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1343 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1344 (__v4sf) __B,
1345 (__v4sf) __C),
1346 (__v4sf)_mm_setzero_ps());
1347}
1348
1349static __inline__ __m128 __DEFAULT_FN_ATTRS128
1350_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1351{
1352 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1353 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1354 (__v4sf) __B,
1355 -(__v4sf) __C),
1356 (__v4sf) __A);
1357}
1358
1359static __inline__ __m128 __DEFAULT_FN_ATTRS128
1360_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1361{
1362 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1363 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1364 (__v4sf) __B,
1365 -(__v4sf) __C),
1366 (__v4sf)_mm_setzero_ps());
1367}
1368
1369static __inline__ __m256 __DEFAULT_FN_ATTRS256
1370_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
1371 __m256 __C)
1372{
1373 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1374 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1375 (__v8sf) __B,
1376 (__v8sf) __C),
1377 (__v8sf) __A);
1378}
1379
1380static __inline__ __m256 __DEFAULT_FN_ATTRS256
1381_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1382{
1383 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1384 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1385 (__v8sf) __B,
1386 (__v8sf) __C),
1387 (__v8sf) __C);
1388}
1389
1390static __inline__ __m256 __DEFAULT_FN_ATTRS256
1391_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1392{
1393 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1394 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1395 (__v8sf) __B,
1396 (__v8sf) __C),
1397 (__v8sf)_mm256_setzero_ps());
1398}
1399
1400static __inline__ __m256 __DEFAULT_FN_ATTRS256
1401_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1402{
1403 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1404 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1405 (__v8sf) __B,
1406 -(__v8sf) __C),
1407 (__v8sf) __A);
1408}
1409
1410static __inline__ __m256 __DEFAULT_FN_ATTRS256
1411_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1412{
1413 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1414 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1415 (__v8sf) __B,
1416 -(__v8sf) __C),
1417 (__v8sf)_mm256_setzero_ps());
1418}
1419
1420static __inline__ __m128d __DEFAULT_FN_ATTRS128
1421_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1422{
1423 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1424 __builtin_ia32_vfmaddpd ((__v2df) __A,
1425 (__v2df) __B,
1426 -(__v2df) __C),
1427 (__v2df) __C);
1428}
1429
1430static __inline__ __m256d __DEFAULT_FN_ATTRS256
1431_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1432{
1433 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1434 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1435 (__v4df) __B,
1436 -(__v4df) __C),
1437 (__v4df) __C);
1438}
1439
1440static __inline__ __m128 __DEFAULT_FN_ATTRS128
1441_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1442{
1443 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1444 __builtin_ia32_vfmaddps ((__v4sf) __A,
1445 (__v4sf) __B,
1446 -(__v4sf) __C),
1447 (__v4sf) __C);
1448}
1449
1450static __inline__ __m256 __DEFAULT_FN_ATTRS256
1451_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1452{
1453 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1454 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1455 (__v8sf) __B,
1456 -(__v8sf) __C),
1457 (__v8sf) __C);
1458}
1459
1460static __inline__ __m128d __DEFAULT_FN_ATTRS128
1461_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1462{
1463 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1464 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1465 (__v2df) __B,
1466 -(__v2df) __C),
1467 (__v2df) __C);
1468}
1469
1470static __inline__ __m256d __DEFAULT_FN_ATTRS256
1471_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1472{
1473 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1474 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1475 (__v4df) __B,
1476 -(__v4df) __C),
1477 (__v4df) __C);
1478}
1479
1480static __inline__ __m128 __DEFAULT_FN_ATTRS128
1481_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1482{
1483 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1484 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1485 (__v4sf) __B,
1486 -(__v4sf) __C),
1487 (__v4sf) __C);
1488}
1489
1490static __inline__ __m256 __DEFAULT_FN_ATTRS256
1491_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1492{
1493 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1494 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1495 (__v8sf) __B,
1496 -(__v8sf) __C),
1497 (__v8sf) __C);
1498}
1499
1500static __inline__ __m128d __DEFAULT_FN_ATTRS128
1501_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1502{
1503 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1504 __builtin_ia32_vfmaddpd ((__v2df) __A,
1505 -(__v2df) __B,
1506 (__v2df) __C),
1507 (__v2df) __A);
1508}
1509
1510static __inline__ __m256d __DEFAULT_FN_ATTRS256
1511_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1512{
1513 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1514 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1515 -(__v4df) __B,
1516 (__v4df) __C),
1517 (__v4df) __A);
1518}
1519
1520static __inline__ __m128 __DEFAULT_FN_ATTRS128
1521_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1522{
1523 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1524 __builtin_ia32_vfmaddps ((__v4sf) __A,
1525 -(__v4sf) __B,
1526 (__v4sf) __C),
1527 (__v4sf) __A);
1528}
1529
1530static __inline__ __m256 __DEFAULT_FN_ATTRS256
1531_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1532{
1533 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1534 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1535 -(__v8sf) __B,
1536 (__v8sf) __C),
1537 (__v8sf) __A);
1538}
1539
1540static __inline__ __m128d __DEFAULT_FN_ATTRS128
1541_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1542{
1543 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1544 __builtin_ia32_vfmaddpd ((__v2df) __A,
1545 -(__v2df) __B,
1546 -(__v2df) __C),
1547 (__v2df) __A);
1548}
1549
1550static __inline__ __m128d __DEFAULT_FN_ATTRS128
1551_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1552{
1553 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1554 __builtin_ia32_vfmaddpd ((__v2df) __A,
1555 -(__v2df) __B,
1556 -(__v2df) __C),
1557 (__v2df) __C);
1558}
1559
1560static __inline__ __m256d __DEFAULT_FN_ATTRS256
1561_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1562{
1563 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1564 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1565 -(__v4df) __B,
1566 -(__v4df) __C),
1567 (__v4df) __A);
1568}
1569
1570static __inline__ __m256d __DEFAULT_FN_ATTRS256
1571_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1572{
1573 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1574 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1575 -(__v4df) __B,
1576 -(__v4df) __C),
1577 (__v4df) __C);
1578}
1579
1580static __inline__ __m128 __DEFAULT_FN_ATTRS128
1581_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1582{
1583 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1584 __builtin_ia32_vfmaddps ((__v4sf) __A,
1585 -(__v4sf) __B,
1586 -(__v4sf) __C),
1587 (__v4sf) __A);
1588}
1589
1590static __inline__ __m128 __DEFAULT_FN_ATTRS128
1591_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1592{
1593 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1594 __builtin_ia32_vfmaddps ((__v4sf) __A,
1595 -(__v4sf) __B,
1596 -(__v4sf) __C),
1597 (__v4sf) __C);
1598}
1599
1600static __inline__ __m256 __DEFAULT_FN_ATTRS256
1601_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1602{
1603 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1604 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1605 -(__v8sf) __B,
1606 -(__v8sf) __C),
1607 (__v8sf) __A);
1608}
1609
1610static __inline__ __m256 __DEFAULT_FN_ATTRS256
1611_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1612{
1613 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1614 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1615 -(__v8sf) __B,
1616 -(__v8sf) __C),
1617 (__v8sf) __C);
1618}
1619
1620static __inline__ __m128d __DEFAULT_FN_ATTRS128
1621_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1622 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1623 (__v2df)_mm_add_pd(__A, __B),
1624 (__v2df)__W);
1625}
1626
1627static __inline__ __m128d __DEFAULT_FN_ATTRS128
1628_mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
1629 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1630 (__v2df)_mm_add_pd(__A, __B),
1631 (__v2df)_mm_setzero_pd());
1632}
1633
1634static __inline__ __m256d __DEFAULT_FN_ATTRS256
1635_mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
1636 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1637 (__v4df)_mm256_add_pd(__A, __B),
1638 (__v4df)__W);
1639}
1640
1641static __inline__ __m256d __DEFAULT_FN_ATTRS256
1642_mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
1643 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1644 (__v4df)_mm256_add_pd(__A, __B),
1645 (__v4df)_mm256_setzero_pd());
1646}
1647
1648static __inline__ __m128 __DEFAULT_FN_ATTRS128
1649_mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1650 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1651 (__v4sf)_mm_add_ps(__A, __B),
1652 (__v4sf)__W);
1653}
1654
1655static __inline__ __m128 __DEFAULT_FN_ATTRS128
1656_mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
1657 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1658 (__v4sf)_mm_add_ps(__A, __B),
1659 (__v4sf)_mm_setzero_ps());
1660}
1661
1662static __inline__ __m256 __DEFAULT_FN_ATTRS256
1663_mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
1664 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1665 (__v8sf)_mm256_add_ps(__A, __B),
1666 (__v8sf)__W);
1667}
1668
1669static __inline__ __m256 __DEFAULT_FN_ATTRS256
1670_mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
1671 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1672 (__v8sf)_mm256_add_ps(__A, __B),
1673 (__v8sf)_mm256_setzero_ps());
1674}
1675
1676static __inline__ __m128i __DEFAULT_FN_ATTRS128
1677_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
1678 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
1679 (__v4si) __W,
1680 (__v4si) __A);
1681}
1682
1683static __inline__ __m256i __DEFAULT_FN_ATTRS256
1684_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
1685 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
1686 (__v8si) __W,
1687 (__v8si) __A);
1688}
1689
1690static __inline__ __m128d __DEFAULT_FN_ATTRS128
1691_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
1692 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
1693 (__v2df) __W,
1694 (__v2df) __A);
1695}
1696
1697static __inline__ __m256d __DEFAULT_FN_ATTRS256
1698_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
1699 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
1700 (__v4df) __W,
1701 (__v4df) __A);
1702}
1703
1704static __inline__ __m128 __DEFAULT_FN_ATTRS128
1705_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
1706 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
1707 (__v4sf) __W,
1708 (__v4sf) __A);
1709}
1710
1711static __inline__ __m256 __DEFAULT_FN_ATTRS256
1712_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
1713 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
1714 (__v8sf) __W,
1715 (__v8sf) __A);
1716}
1717
1718static __inline__ __m128i __DEFAULT_FN_ATTRS128
1719_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
1720 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
1721 (__v2di) __W,
1722 (__v2di) __A);
1723}
1724
1725static __inline__ __m256i __DEFAULT_FN_ATTRS256
1726_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
1727 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
1728 (__v4di) __W,
1729 (__v4di) __A);
1730}
1731
1732static __inline__ __m128d __DEFAULT_FN_ATTRS128
1733_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
1734 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1735 (__v2df) __W,
1736 (__mmask8) __U);
1737}
1738
1739static __inline__ __m128d __DEFAULT_FN_ATTRS128
1741 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1742 (__v2df)
1743 _mm_setzero_pd (),
1744 (__mmask8) __U);
1745}
1746
1747static __inline__ __m256d __DEFAULT_FN_ATTRS256
1748_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
1749 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1750 (__v4df) __W,
1751 (__mmask8) __U);
1752}
1753
1754static __inline__ __m256d __DEFAULT_FN_ATTRS256
1756 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1757 (__v4df)
1759 (__mmask8) __U);
1760}
1761
1762static __inline__ __m128i __DEFAULT_FN_ATTRS128
1763_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
1764 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1765 (__v2di) __W,
1766 (__mmask8) __U);
1767}
1768
1769static __inline__ __m128i __DEFAULT_FN_ATTRS128
1771 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1772 (__v2di)
1774 (__mmask8) __U);
1775}
1776
1777static __inline__ __m256i __DEFAULT_FN_ATTRS256
1778_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
1779 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1780 (__v4di) __W,
1781 (__mmask8) __U);
1782}
1783
1784static __inline__ __m256i __DEFAULT_FN_ATTRS256
1786 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1787 (__v4di)
1789 (__mmask8) __U);
1790}
1791
1792static __inline__ __m128 __DEFAULT_FN_ATTRS128
1793_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
1794 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1795 (__v4sf) __W,
1796 (__mmask8) __U);
1797}
1798
1799static __inline__ __m128 __DEFAULT_FN_ATTRS128
1801 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1802 (__v4sf)
1803 _mm_setzero_ps (),
1804 (__mmask8) __U);
1805}
1806
1807static __inline__ __m256 __DEFAULT_FN_ATTRS256
1808_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
1809 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1810 (__v8sf) __W,
1811 (__mmask8) __U);
1812}
1813
1814static __inline__ __m256 __DEFAULT_FN_ATTRS256
1816 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1817 (__v8sf)
1819 (__mmask8) __U);
1820}
1821
1822static __inline__ __m128i __DEFAULT_FN_ATTRS128
1823_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
1824 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1825 (__v4si) __W,
1826 (__mmask8) __U);
1827}
1828
1829static __inline__ __m128i __DEFAULT_FN_ATTRS128
1831 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1832 (__v4si)
1834 (__mmask8) __U);
1835}
1836
1837static __inline__ __m256i __DEFAULT_FN_ATTRS256
1838_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
1839 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1840 (__v8si) __W,
1841 (__mmask8) __U);
1842}
1843
1844static __inline__ __m256i __DEFAULT_FN_ATTRS256
1846 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1847 (__v8si)
1849 (__mmask8) __U);
1850}
1851
1852static __inline__ void __DEFAULT_FN_ATTRS128
1853_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
1854 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
1855 (__v2df) __A,
1856 (__mmask8) __U);
1857}
1858
1859static __inline__ void __DEFAULT_FN_ATTRS256
1860_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
1861 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
1862 (__v4df) __A,
1863 (__mmask8) __U);
1864}
1865
1866static __inline__ void __DEFAULT_FN_ATTRS128
1867_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
1868 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
1869 (__v2di) __A,
1870 (__mmask8) __U);
1871}
1872
1873static __inline__ void __DEFAULT_FN_ATTRS256
1875 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
1876 (__v4di) __A,
1877 (__mmask8) __U);
1878}
1879
1880static __inline__ void __DEFAULT_FN_ATTRS128
1881_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
1882 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
1883 (__v4sf) __A,
1884 (__mmask8) __U);
1885}
1886
1887static __inline__ void __DEFAULT_FN_ATTRS256
1889 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
1890 (__v8sf) __A,
1891 (__mmask8) __U);
1892}
1893
1894static __inline__ void __DEFAULT_FN_ATTRS128
1895_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
1896 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
1897 (__v4si) __A,
1898 (__mmask8) __U);
1899}
1900
1901static __inline__ void __DEFAULT_FN_ATTRS256
1903 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
1904 (__v8si) __A,
1905 (__mmask8) __U);
1906}
1907
1908static __inline__ __m128d __DEFAULT_FN_ATTRS128
1909_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
1910 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1911 (__v2df)_mm_cvtepi32_pd(__A),
1912 (__v2df)__W);
1913}
1914
1915static __inline__ __m128d __DEFAULT_FN_ATTRS128
1917 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1918 (__v2df)_mm_cvtepi32_pd(__A),
1919 (__v2df)_mm_setzero_pd());
1920}
1921
1922static __inline__ __m256d __DEFAULT_FN_ATTRS256
1923_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
1924 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1925 (__v4df)_mm256_cvtepi32_pd(__A),
1926 (__v4df)__W);
1927}
1928
1929static __inline__ __m256d __DEFAULT_FN_ATTRS256
1931 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1932 (__v4df)_mm256_cvtepi32_pd(__A),
1933 (__v4df)_mm256_setzero_pd());
1934}
1935
1936static __inline__ __m128 __DEFAULT_FN_ATTRS128
1937_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
1938 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1939 (__v4sf)_mm_cvtepi32_ps(__A),
1940 (__v4sf)__W);
1941}
1942
1943static __inline__ __m128 __DEFAULT_FN_ATTRS128
1945 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1946 (__v4sf)_mm_cvtepi32_ps(__A),
1947 (__v4sf)_mm_setzero_ps());
1948}
1949
1950static __inline__ __m256 __DEFAULT_FN_ATTRS256
1951_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
1952 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1953 (__v8sf)_mm256_cvtepi32_ps(__A),
1954 (__v8sf)__W);
1955}
1956
1957static __inline__ __m256 __DEFAULT_FN_ATTRS256
1959 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1960 (__v8sf)_mm256_cvtepi32_ps(__A),
1961 (__v8sf)_mm256_setzero_ps());
1962}
1963
1964static __inline__ __m128i __DEFAULT_FN_ATTRS128
1965_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
1966 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1967 (__v4si) __W,
1968 (__mmask8) __U);
1969}
1970
1971static __inline__ __m128i __DEFAULT_FN_ATTRS128
1973 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1974 (__v4si)
1976 (__mmask8) __U);
1977}
1978
1979static __inline__ __m128i __DEFAULT_FN_ATTRS256
1980_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
1981 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1982 (__v4si)_mm256_cvtpd_epi32(__A),
1983 (__v4si)__W);
1984}
1985
1986static __inline__ __m128i __DEFAULT_FN_ATTRS256
1988 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1989 (__v4si)_mm256_cvtpd_epi32(__A),
1990 (__v4si)_mm_setzero_si128());
1991}
1992
1993static __inline__ __m128 __DEFAULT_FN_ATTRS128
1994_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
1995 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1996 (__v4sf) __W,
1997 (__mmask8) __U);
1998}
1999
2000static __inline__ __m128 __DEFAULT_FN_ATTRS128
2001_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
2002 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2003 (__v4sf)
2004 _mm_setzero_ps (),
2005 (__mmask8) __U);
2006}
2007
2008static __inline__ __m128 __DEFAULT_FN_ATTRS256
2009_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
2010 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2011 (__v4sf)_mm256_cvtpd_ps(__A),
2012 (__v4sf)__W);
2013}
2014
2015static __inline__ __m128 __DEFAULT_FN_ATTRS256
2017 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2018 (__v4sf)_mm256_cvtpd_ps(__A),
2019 (__v4sf)_mm_setzero_ps());
2020}
2021
2022static __inline__ __m128i __DEFAULT_FN_ATTRS128
2023_mm_cvtpd_epu32 (__m128d __A) {
2024 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2025 (__v4si)
2027 (__mmask8) -1);
2028}
2029
2030static __inline__ __m128i __DEFAULT_FN_ATTRS128
2031_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2032 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2033 (__v4si) __W,
2034 (__mmask8) __U);
2035}
2036
2037static __inline__ __m128i __DEFAULT_FN_ATTRS128
2039 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2040 (__v4si)
2042 (__mmask8) __U);
2043}
2044
2045static __inline__ __m128i __DEFAULT_FN_ATTRS256
2046_mm256_cvtpd_epu32 (__m256d __A) {
2047 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2048 (__v4si)
2050 (__mmask8) -1);
2051}
2052
2053static __inline__ __m128i __DEFAULT_FN_ATTRS256
2054_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2055 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2056 (__v4si) __W,
2057 (__mmask8) __U);
2058}
2059
2060static __inline__ __m128i __DEFAULT_FN_ATTRS256
2062 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2063 (__v4si)
2065 (__mmask8) __U);
2066}
2067
2068static __inline__ __m128i __DEFAULT_FN_ATTRS128
2069_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2070 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2071 (__v4si)_mm_cvtps_epi32(__A),
2072 (__v4si)__W);
2073}
2074
2075static __inline__ __m128i __DEFAULT_FN_ATTRS128
2077 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2078 (__v4si)_mm_cvtps_epi32(__A),
2079 (__v4si)_mm_setzero_si128());
2080}
2081
2082static __inline__ __m256i __DEFAULT_FN_ATTRS256
2083_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2084 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2085 (__v8si)_mm256_cvtps_epi32(__A),
2086 (__v8si)__W);
2087}
2088
2089static __inline__ __m256i __DEFAULT_FN_ATTRS256
2091 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2092 (__v8si)_mm256_cvtps_epi32(__A),
2093 (__v8si)_mm256_setzero_si256());
2094}
2095
2096static __inline__ __m128d __DEFAULT_FN_ATTRS128
2097_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
2098 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2099 (__v2df)_mm_cvtps_pd(__A),
2100 (__v2df)__W);
2101}
2102
2103static __inline__ __m128d __DEFAULT_FN_ATTRS128
2104_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2105 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2106 (__v2df)_mm_cvtps_pd(__A),
2107 (__v2df)_mm_setzero_pd());
2108}
2109
2110static __inline__ __m256d __DEFAULT_FN_ATTRS256
2111_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
2112 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2113 (__v4df)_mm256_cvtps_pd(__A),
2114 (__v4df)__W);
2115}
2116
2117static __inline__ __m256d __DEFAULT_FN_ATTRS256
2119 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2120 (__v4df)_mm256_cvtps_pd(__A),
2121 (__v4df)_mm256_setzero_pd());
2122}
2123
2124static __inline__ __m128i __DEFAULT_FN_ATTRS128
2125_mm_cvtps_epu32 (__m128 __A) {
2126 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2127 (__v4si)
2129 (__mmask8) -1);
2130}
2131
2132static __inline__ __m128i __DEFAULT_FN_ATTRS128
2133_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2134 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2135 (__v4si) __W,
2136 (__mmask8) __U);
2137}
2138
2139static __inline__ __m128i __DEFAULT_FN_ATTRS128
2141 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2142 (__v4si)
2144 (__mmask8) __U);
2145}
2146
2147static __inline__ __m256i __DEFAULT_FN_ATTRS256
2149 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2150 (__v8si)
2152 (__mmask8) -1);
2153}
2154
2155static __inline__ __m256i __DEFAULT_FN_ATTRS256
2156_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2157 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2158 (__v8si) __W,
2159 (__mmask8) __U);
2160}
2161
2162static __inline__ __m256i __DEFAULT_FN_ATTRS256
2164 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2165 (__v8si)
2167 (__mmask8) __U);
2168}
2169
2170static __inline__ __m128i __DEFAULT_FN_ATTRS128
2171_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
2172 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2173 (__v4si) __W,
2174 (__mmask8) __U);
2175}
2176
2177static __inline__ __m128i __DEFAULT_FN_ATTRS128
2179 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2180 (__v4si)
2182 (__mmask8) __U);
2183}
2184
2185static __inline__ __m128i __DEFAULT_FN_ATTRS256
2186_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
2187 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2188 (__v4si)_mm256_cvttpd_epi32(__A),
2189 (__v4si)__W);
2190}
2191
2192static __inline__ __m128i __DEFAULT_FN_ATTRS256
2194 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2195 (__v4si)_mm256_cvttpd_epi32(__A),
2196 (__v4si)_mm_setzero_si128());
2197}
2198
2199static __inline__ __m128i __DEFAULT_FN_ATTRS128
2200_mm_cvttpd_epu32 (__m128d __A) {
2201 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2202 (__v4si)
2204 (__mmask8) -1);
2205}
2206
2207static __inline__ __m128i __DEFAULT_FN_ATTRS128
2208_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2209 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2210 (__v4si) __W,
2211 (__mmask8) __U);
2212}
2213
2214static __inline__ __m128i __DEFAULT_FN_ATTRS128
2216 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2217 (__v4si)
2219 (__mmask8) __U);
2220}
2221
2222static __inline__ __m128i __DEFAULT_FN_ATTRS256
2223_mm256_cvttpd_epu32 (__m256d __A) {
2224 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2225 (__v4si)
2227 (__mmask8) -1);
2228}
2229
2230static __inline__ __m128i __DEFAULT_FN_ATTRS256
2231_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2232 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2233 (__v4si) __W,
2234 (__mmask8) __U);
2235}
2236
2237static __inline__ __m128i __DEFAULT_FN_ATTRS256
2239 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2240 (__v4si)
2242 (__mmask8) __U);
2243}
2244
2245static __inline__ __m128i __DEFAULT_FN_ATTRS128
2246_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2247 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2248 (__v4si)_mm_cvttps_epi32(__A),
2249 (__v4si)__W);
2250}
2251
2252static __inline__ __m128i __DEFAULT_FN_ATTRS128
2254 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2255 (__v4si)_mm_cvttps_epi32(__A),
2256 (__v4si)_mm_setzero_si128());
2257}
2258
2259static __inline__ __m256i __DEFAULT_FN_ATTRS256
2260_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2261 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2262 (__v8si)_mm256_cvttps_epi32(__A),
2263 (__v8si)__W);
2264}
2265
2266static __inline__ __m256i __DEFAULT_FN_ATTRS256
2268 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2269 (__v8si)_mm256_cvttps_epi32(__A),
2270 (__v8si)_mm256_setzero_si256());
2271}
2272
2273static __inline__ __m128i __DEFAULT_FN_ATTRS128
2274_mm_cvttps_epu32 (__m128 __A) {
2275 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2276 (__v4si)
2278 (__mmask8) -1);
2279}
2280
2281static __inline__ __m128i __DEFAULT_FN_ATTRS128
2282_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2283 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2284 (__v4si) __W,
2285 (__mmask8) __U);
2286}
2287
2288static __inline__ __m128i __DEFAULT_FN_ATTRS128
2290 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2291 (__v4si)
2293 (__mmask8) __U);
2294}
2295
2296static __inline__ __m256i __DEFAULT_FN_ATTRS256
2298 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2299 (__v8si)
2301 (__mmask8) -1);
2302}
2303
2304static __inline__ __m256i __DEFAULT_FN_ATTRS256
2305_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2306 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2307 (__v8si) __W,
2308 (__mmask8) __U);
2309}
2310
2311static __inline__ __m256i __DEFAULT_FN_ATTRS256
2313 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2314 (__v8si)
2316 (__mmask8) __U);
2317}
2318
2319static __inline__ __m128d __DEFAULT_FN_ATTRS128
2320_mm_cvtepu32_pd (__m128i __A) {
2321 return (__m128d) __builtin_convertvector(
2322 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2323}
2324
2325static __inline__ __m128d __DEFAULT_FN_ATTRS128
2326_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
2327 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2328 (__v2df)_mm_cvtepu32_pd(__A),
2329 (__v2df)__W);
2330}
2331
2332static __inline__ __m128d __DEFAULT_FN_ATTRS128
2334 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2335 (__v2df)_mm_cvtepu32_pd(__A),
2336 (__v2df)_mm_setzero_pd());
2337}
2338
2339static __inline__ __m256d __DEFAULT_FN_ATTRS256
2340_mm256_cvtepu32_pd (__m128i __A) {
2341 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2342}
2343
2344static __inline__ __m256d __DEFAULT_FN_ATTRS256
2345_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
2346 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2347 (__v4df)_mm256_cvtepu32_pd(__A),
2348 (__v4df)__W);
2349}
2350
2351static __inline__ __m256d __DEFAULT_FN_ATTRS256
2353 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2354 (__v4df)_mm256_cvtepu32_pd(__A),
2355 (__v4df)_mm256_setzero_pd());
2356}
2357
2358static __inline__ __m128 __DEFAULT_FN_ATTRS128
2359_mm_cvtepu32_ps (__m128i __A) {
2360 return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
2361}
2362
2363static __inline__ __m128 __DEFAULT_FN_ATTRS128
2364_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
2365 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2366 (__v4sf)_mm_cvtepu32_ps(__A),
2367 (__v4sf)__W);
2368}
2369
2370static __inline__ __m128 __DEFAULT_FN_ATTRS128
2372 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2373 (__v4sf)_mm_cvtepu32_ps(__A),
2374 (__v4sf)_mm_setzero_ps());
2375}
2376
2377static __inline__ __m256 __DEFAULT_FN_ATTRS256
2378_mm256_cvtepu32_ps (__m256i __A) {
2379 return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
2380}
2381
2382static __inline__ __m256 __DEFAULT_FN_ATTRS256
2383_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
2384 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2385 (__v8sf)_mm256_cvtepu32_ps(__A),
2386 (__v8sf)__W);
2387}
2388
2389static __inline__ __m256 __DEFAULT_FN_ATTRS256
2391 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2392 (__v8sf)_mm256_cvtepu32_ps(__A),
2393 (__v8sf)_mm256_setzero_ps());
2394}
2395
2396static __inline__ __m128d __DEFAULT_FN_ATTRS128
2397_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2398 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2399 (__v2df)_mm_div_pd(__A, __B),
2400 (__v2df)__W);
2401}
2402
2403static __inline__ __m128d __DEFAULT_FN_ATTRS128
2404_mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2405 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2406 (__v2df)_mm_div_pd(__A, __B),
2407 (__v2df)_mm_setzero_pd());
2408}
2409
2410static __inline__ __m256d __DEFAULT_FN_ATTRS256
2411_mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2412 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2413 (__v4df)_mm256_div_pd(__A, __B),
2414 (__v4df)__W);
2415}
2416
2417static __inline__ __m256d __DEFAULT_FN_ATTRS256
2418_mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2419 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2420 (__v4df)_mm256_div_pd(__A, __B),
2421 (__v4df)_mm256_setzero_pd());
2422}
2423
2424static __inline__ __m128 __DEFAULT_FN_ATTRS128
2425_mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2426 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2427 (__v4sf)_mm_div_ps(__A, __B),
2428 (__v4sf)__W);
2429}
2430
2431static __inline__ __m128 __DEFAULT_FN_ATTRS128
2432_mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2433 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2434 (__v4sf)_mm_div_ps(__A, __B),
2435 (__v4sf)_mm_setzero_ps());
2436}
2437
2438static __inline__ __m256 __DEFAULT_FN_ATTRS256
2439_mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2440 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2441 (__v8sf)_mm256_div_ps(__A, __B),
2442 (__v8sf)__W);
2443}
2444
2445static __inline__ __m256 __DEFAULT_FN_ATTRS256
2446_mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2447 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2448 (__v8sf)_mm256_div_ps(__A, __B),
2449 (__v8sf)_mm256_setzero_ps());
2450}
2451
2452static __inline__ __m128d __DEFAULT_FN_ATTRS128
2453_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2454 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2455 (__v2df) __W,
2456 (__mmask8) __U);
2457}
2458
2459static __inline__ __m128d __DEFAULT_FN_ATTRS128
2460_mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
2461 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2462 (__v2df)
2463 _mm_setzero_pd (),
2464 (__mmask8) __U);
2465}
2466
2467static __inline__ __m256d __DEFAULT_FN_ATTRS256
2468_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2469 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2470 (__v4df) __W,
2471 (__mmask8) __U);
2472}
2473
2474static __inline__ __m256d __DEFAULT_FN_ATTRS256
2476 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2477 (__v4df)
2479 (__mmask8) __U);
2480}
2481
2482static __inline__ __m128i __DEFAULT_FN_ATTRS128
2483_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2484 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2485 (__v2di) __W,
2486 (__mmask8) __U);
2487}
2488
2489static __inline__ __m128i __DEFAULT_FN_ATTRS128
2491 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2492 (__v2di)
2494 (__mmask8) __U);
2495}
2496
2497static __inline__ __m256i __DEFAULT_FN_ATTRS256
2498_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2499 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2500 (__v4di) __W,
2501 (__mmask8) __U);
2502}
2503
2504static __inline__ __m256i __DEFAULT_FN_ATTRS256
2506 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2507 (__v4di)
2509 (__mmask8) __U);
2510}
2511
2512static __inline__ __m128d __DEFAULT_FN_ATTRS128
2513_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
2514 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
2515 (__v2df) __W,
2516 (__mmask8)
2517 __U);
2518}
2519
2520static __inline__ __m128d __DEFAULT_FN_ATTRS128
2522 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
2523 (__v2df)
2524 _mm_setzero_pd (),
2525 (__mmask8)
2526 __U);
2527}
2528
2529static __inline__ __m256d __DEFAULT_FN_ATTRS256
2530_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
2531 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
2532 (__v4df) __W,
2533 (__mmask8)
2534 __U);
2535}
2536
2537static __inline__ __m256d __DEFAULT_FN_ATTRS256
2539 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
2540 (__v4df)
2542 (__mmask8)
2543 __U);
2544}
2545
2546static __inline__ __m128i __DEFAULT_FN_ATTRS128
2547_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
2548 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
2549 (__v2di) __W,
2550 (__mmask8)
2551 __U);
2552}
2553
2554static __inline__ __m128i __DEFAULT_FN_ATTRS128
2556 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
2557 (__v2di)
2559 (__mmask8)
2560 __U);
2561}
2562
2563static __inline__ __m256i __DEFAULT_FN_ATTRS256
2565 void const *__P) {
2566 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
2567 (__v4di) __W,
2568 (__mmask8)
2569 __U);
2570}
2571
2572static __inline__ __m256i __DEFAULT_FN_ATTRS256
2574 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
2575 (__v4di)
2577 (__mmask8)
2578 __U);
2579}
2580
2581static __inline__ __m128 __DEFAULT_FN_ATTRS128
2582_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
2583 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
2584 (__v4sf) __W,
2585 (__mmask8) __U);
2586}
2587
2588static __inline__ __m128 __DEFAULT_FN_ATTRS128
2590 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
2591 (__v4sf)
2592 _mm_setzero_ps (),
2593 (__mmask8)
2594 __U);
2595}
2596
2597static __inline__ __m256 __DEFAULT_FN_ATTRS256
2598_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
2599 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
2600 (__v8sf) __W,
2601 (__mmask8) __U);
2602}
2603
2604static __inline__ __m256 __DEFAULT_FN_ATTRS256
2606 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
2607 (__v8sf)
2609 (__mmask8)
2610 __U);
2611}
2612
2613static __inline__ __m128i __DEFAULT_FN_ATTRS128
2614_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
2615 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
2616 (__v4si) __W,
2617 (__mmask8)
2618 __U);
2619}
2620
2621static __inline__ __m128i __DEFAULT_FN_ATTRS128
2623 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
2624 (__v4si)
2626 (__mmask8) __U);
2627}
2628
2629static __inline__ __m256i __DEFAULT_FN_ATTRS256
2631 void const *__P) {
2632 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
2633 (__v8si) __W,
2634 (__mmask8)
2635 __U);
2636}
2637
2638static __inline__ __m256i __DEFAULT_FN_ATTRS256
2640 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
2641 (__v8si)
2643 (__mmask8)
2644 __U);
2645}
2646
2647static __inline__ __m128 __DEFAULT_FN_ATTRS128
2648_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2649 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2650 (__v4sf) __W,
2651 (__mmask8) __U);
2652}
2653
2654static __inline__ __m128 __DEFAULT_FN_ATTRS128
2656 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2657 (__v4sf)
2658 _mm_setzero_ps (),
2659 (__mmask8) __U);
2660}
2661
2662static __inline__ __m256 __DEFAULT_FN_ATTRS256
2663_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2664 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2665 (__v8sf) __W,
2666 (__mmask8) __U);
2667}
2668
2669static __inline__ __m256 __DEFAULT_FN_ATTRS256
2671 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2672 (__v8sf)
2674 (__mmask8) __U);
2675}
2676
2677static __inline__ __m128i __DEFAULT_FN_ATTRS128
2678_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2679 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2680 (__v4si) __W,
2681 (__mmask8) __U);
2682}
2683
2684static __inline__ __m128i __DEFAULT_FN_ATTRS128
2686 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2687 (__v4si)
2689 (__mmask8) __U);
2690}
2691
2692static __inline__ __m256i __DEFAULT_FN_ATTRS256
2693_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2694 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2695 (__v8si) __W,
2696 (__mmask8) __U);
2697}
2698
2699static __inline__ __m256i __DEFAULT_FN_ATTRS256
2701 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2702 (__v8si)
2704 (__mmask8) __U);
2705}
2706
2707static __inline__ __m128d __DEFAULT_FN_ATTRS128
2708_mm_getexp_pd (__m128d __A) {
2709 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2710 (__v2df)
2711 _mm_setzero_pd (),
2712 (__mmask8) -1);
2713}
2714
2715static __inline__ __m128d __DEFAULT_FN_ATTRS128
2716_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2717 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2718 (__v2df) __W,
2719 (__mmask8) __U);
2720}
2721
2722static __inline__ __m128d __DEFAULT_FN_ATTRS128
2723_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
2724 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2725 (__v2df)
2726 _mm_setzero_pd (),
2727 (__mmask8) __U);
2728}
2729
2730static __inline__ __m256d __DEFAULT_FN_ATTRS256
2731_mm256_getexp_pd (__m256d __A) {
2732 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2733 (__v4df)
2735 (__mmask8) -1);
2736}
2737
2738static __inline__ __m256d __DEFAULT_FN_ATTRS256
2739_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2740 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2741 (__v4df) __W,
2742 (__mmask8) __U);
2743}
2744
2745static __inline__ __m256d __DEFAULT_FN_ATTRS256
2747 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2748 (__v4df)
2750 (__mmask8) __U);
2751}
2752
2753static __inline__ __m128 __DEFAULT_FN_ATTRS128
2754_mm_getexp_ps (__m128 __A) {
2755 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2756 (__v4sf)
2757 _mm_setzero_ps (),
2758 (__mmask8) -1);
2759}
2760
2761static __inline__ __m128 __DEFAULT_FN_ATTRS128
2762_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2763 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2764 (__v4sf) __W,
2765 (__mmask8) __U);
2766}
2767
2768static __inline__ __m128 __DEFAULT_FN_ATTRS128
2770 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2771 (__v4sf)
2772 _mm_setzero_ps (),
2773 (__mmask8) __U);
2774}
2775
2776static __inline__ __m256 __DEFAULT_FN_ATTRS256
2777_mm256_getexp_ps (__m256 __A) {
2778 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2779 (__v8sf)
2781 (__mmask8) -1);
2782}
2783
2784static __inline__ __m256 __DEFAULT_FN_ATTRS256
2785_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2786 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2787 (__v8sf) __W,
2788 (__mmask8) __U);
2789}
2790
2791static __inline__ __m256 __DEFAULT_FN_ATTRS256
2793 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2794 (__v8sf)
2796 (__mmask8) __U);
2797}
2798
2799static __inline__ __m128d __DEFAULT_FN_ATTRS128
2800_mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2801 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2802 (__v2df)_mm_max_pd(__A, __B),
2803 (__v2df)__W);
2804}
2805
2806static __inline__ __m128d __DEFAULT_FN_ATTRS128
2807_mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2808 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2809 (__v2df)_mm_max_pd(__A, __B),
2810 (__v2df)_mm_setzero_pd());
2811}
2812
2813static __inline__ __m256d __DEFAULT_FN_ATTRS256
2814_mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2815 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2816 (__v4df)_mm256_max_pd(__A, __B),
2817 (__v4df)__W);
2818}
2819
2820static __inline__ __m256d __DEFAULT_FN_ATTRS256
2821_mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2822 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2823 (__v4df)_mm256_max_pd(__A, __B),
2824 (__v4df)_mm256_setzero_pd());
2825}
2826
2827static __inline__ __m128 __DEFAULT_FN_ATTRS128
2828_mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2829 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2830 (__v4sf)_mm_max_ps(__A, __B),
2831 (__v4sf)__W);
2832}
2833
2834static __inline__ __m128 __DEFAULT_FN_ATTRS128
2835_mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2836 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2837 (__v4sf)_mm_max_ps(__A, __B),
2838 (__v4sf)_mm_setzero_ps());
2839}
2840
2841static __inline__ __m256 __DEFAULT_FN_ATTRS256
2842_mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2843 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2844 (__v8sf)_mm256_max_ps(__A, __B),
2845 (__v8sf)__W);
2846}
2847
2848static __inline__ __m256 __DEFAULT_FN_ATTRS256
2849_mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2850 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2851 (__v8sf)_mm256_max_ps(__A, __B),
2852 (__v8sf)_mm256_setzero_ps());
2853}
2854
2855static __inline__ __m128d __DEFAULT_FN_ATTRS128
2856_mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2857 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2858 (__v2df)_mm_min_pd(__A, __B),
2859 (__v2df)__W);
2860}
2861
2862static __inline__ __m128d __DEFAULT_FN_ATTRS128
2863_mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2864 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2865 (__v2df)_mm_min_pd(__A, __B),
2866 (__v2df)_mm_setzero_pd());
2867}
2868
2869static __inline__ __m256d __DEFAULT_FN_ATTRS256
2870_mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2871 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2872 (__v4df)_mm256_min_pd(__A, __B),
2873 (__v4df)__W);
2874}
2875
2876static __inline__ __m256d __DEFAULT_FN_ATTRS256
2877_mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2878 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2879 (__v4df)_mm256_min_pd(__A, __B),
2880 (__v4df)_mm256_setzero_pd());
2881}
2882
2883static __inline__ __m128 __DEFAULT_FN_ATTRS128
2884_mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2885 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2886 (__v4sf)_mm_min_ps(__A, __B),
2887 (__v4sf)__W);
2888}
2889
2890static __inline__ __m128 __DEFAULT_FN_ATTRS128
2891_mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2892 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2893 (__v4sf)_mm_min_ps(__A, __B),
2894 (__v4sf)_mm_setzero_ps());
2895}
2896
2897static __inline__ __m256 __DEFAULT_FN_ATTRS256
2898_mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2899 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2900 (__v8sf)_mm256_min_ps(__A, __B),
2901 (__v8sf)__W);
2902}
2903
2904static __inline__ __m256 __DEFAULT_FN_ATTRS256
2905_mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2906 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2907 (__v8sf)_mm256_min_ps(__A, __B),
2908 (__v8sf)_mm256_setzero_ps());
2909}
2910
2911static __inline__ __m128d __DEFAULT_FN_ATTRS128
2912_mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2913 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2914 (__v2df)_mm_mul_pd(__A, __B),
2915 (__v2df)__W);
2916}
2917
2918static __inline__ __m128d __DEFAULT_FN_ATTRS128
2919_mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2920 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2921 (__v2df)_mm_mul_pd(__A, __B),
2922 (__v2df)_mm_setzero_pd());
2923}
2924
2925static __inline__ __m256d __DEFAULT_FN_ATTRS256
2926_mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2927 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2928 (__v4df)_mm256_mul_pd(__A, __B),
2929 (__v4df)__W);
2930}
2931
2932static __inline__ __m256d __DEFAULT_FN_ATTRS256
2933_mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2934 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2935 (__v4df)_mm256_mul_pd(__A, __B),
2936 (__v4df)_mm256_setzero_pd());
2937}
2938
2939static __inline__ __m128 __DEFAULT_FN_ATTRS128
2940_mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2941 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2942 (__v4sf)_mm_mul_ps(__A, __B),
2943 (__v4sf)__W);
2944}
2945
2946static __inline__ __m128 __DEFAULT_FN_ATTRS128
2947_mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2948 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2949 (__v4sf)_mm_mul_ps(__A, __B),
2950 (__v4sf)_mm_setzero_ps());
2951}
2952
2953static __inline__ __m256 __DEFAULT_FN_ATTRS256
2954_mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2955 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2956 (__v8sf)_mm256_mul_ps(__A, __B),
2957 (__v8sf)__W);
2958}
2959
2960static __inline__ __m256 __DEFAULT_FN_ATTRS256
2961_mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2962 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2963 (__v8sf)_mm256_mul_ps(__A, __B),
2964 (__v8sf)_mm256_setzero_ps());
2965}
2966
2967static __inline__ __m128i __DEFAULT_FN_ATTRS128
2968_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
2969 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2970 (__v4si)_mm_abs_epi32(__A),
2971 (__v4si)__W);
2972}
2973
2974static __inline__ __m128i __DEFAULT_FN_ATTRS128
2976 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2977 (__v4si)_mm_abs_epi32(__A),
2978 (__v4si)_mm_setzero_si128());
2979}
2980
2981static __inline__ __m256i __DEFAULT_FN_ATTRS256
2982_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
2983 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2984 (__v8si)_mm256_abs_epi32(__A),
2985 (__v8si)__W);
2986}
2987
2988static __inline__ __m256i __DEFAULT_FN_ATTRS256
2990 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2991 (__v8si)_mm256_abs_epi32(__A),
2992 (__v8si)_mm256_setzero_si256());
2993}
2994
2995static __inline__ __m128i __DEFAULT_FN_ATTRS128
2996_mm_abs_epi64 (__m128i __A) {
2997 return (__m128i)__builtin_elementwise_abs((__v2di)__A);
2998}
2999
3000static __inline__ __m128i __DEFAULT_FN_ATTRS128
3001_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
3002 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3003 (__v2di)_mm_abs_epi64(__A),
3004 (__v2di)__W);
3005}
3006
3007static __inline__ __m128i __DEFAULT_FN_ATTRS128
3008_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
3009 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3010 (__v2di)_mm_abs_epi64(__A),
3011 (__v2di)_mm_setzero_si128());
3012}
3013
3014static __inline__ __m256i __DEFAULT_FN_ATTRS256
3015_mm256_abs_epi64 (__m256i __A) {
3016 return (__m256i)__builtin_elementwise_abs((__v4di)__A);
3017}
3018
3019static __inline__ __m256i __DEFAULT_FN_ATTRS256
3020_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
3021 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3022 (__v4di)_mm256_abs_epi64(__A),
3023 (__v4di)__W);
3024}
3025
3026static __inline__ __m256i __DEFAULT_FN_ATTRS256
3028 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3029 (__v4di)_mm256_abs_epi64(__A),
3030 (__v4di)_mm256_setzero_si256());
3031}
3032
3033static __inline__ __m128i __DEFAULT_FN_ATTRS128
3034_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3035 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3036 (__v4si)_mm_max_epi32(__A, __B),
3037 (__v4si)_mm_setzero_si128());
3038}
3039
3040static __inline__ __m128i __DEFAULT_FN_ATTRS128
3041_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3042 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3043 (__v4si)_mm_max_epi32(__A, __B),
3044 (__v4si)__W);
3045}
3046
3047static __inline__ __m256i __DEFAULT_FN_ATTRS256
3048_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3049 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3050 (__v8si)_mm256_max_epi32(__A, __B),
3051 (__v8si)_mm256_setzero_si256());
3052}
3053
3054static __inline__ __m256i __DEFAULT_FN_ATTRS256
3055_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3056 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3057 (__v8si)_mm256_max_epi32(__A, __B),
3058 (__v8si)__W);
3059}
3060
3061static __inline__ __m128i __DEFAULT_FN_ATTRS128
3062_mm_max_epi64 (__m128i __A, __m128i __B) {
3063 return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B);
3064}
3065
3066static __inline__ __m128i __DEFAULT_FN_ATTRS128
3067_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3068 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3069 (__v2di)_mm_max_epi64(__A, __B),
3070 (__v2di)_mm_setzero_si128());
3071}
3072
3073static __inline__ __m128i __DEFAULT_FN_ATTRS128
3074_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3075 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3076 (__v2di)_mm_max_epi64(__A, __B),
3077 (__v2di)__W);
3078}
3079
3080static __inline__ __m256i __DEFAULT_FN_ATTRS256
3081_mm256_max_epi64 (__m256i __A, __m256i __B) {
3082 return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B);
3083}
3084
3085static __inline__ __m256i __DEFAULT_FN_ATTRS256
3086_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3087 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3088 (__v4di)_mm256_max_epi64(__A, __B),
3089 (__v4di)_mm256_setzero_si256());
3090}
3091
3092static __inline__ __m256i __DEFAULT_FN_ATTRS256
3093_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3094 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3095 (__v4di)_mm256_max_epi64(__A, __B),
3096 (__v4di)__W);
3097}
3098
3099static __inline__ __m128i __DEFAULT_FN_ATTRS128
3100_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3101 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3102 (__v4si)_mm_max_epu32(__A, __B),
3103 (__v4si)_mm_setzero_si128());
3104}
3105
3106static __inline__ __m128i __DEFAULT_FN_ATTRS128
3107_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3108 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3109 (__v4si)_mm_max_epu32(__A, __B),
3110 (__v4si)__W);
3111}
3112
3113static __inline__ __m256i __DEFAULT_FN_ATTRS256
3114_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3115 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3116 (__v8si)_mm256_max_epu32(__A, __B),
3117 (__v8si)_mm256_setzero_si256());
3118}
3119
3120static __inline__ __m256i __DEFAULT_FN_ATTRS256
3121_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3122 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3123 (__v8si)_mm256_max_epu32(__A, __B),
3124 (__v8si)__W);
3125}
3126
3127static __inline__ __m128i __DEFAULT_FN_ATTRS128
3128_mm_max_epu64 (__m128i __A, __m128i __B) {
3129 return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B);
3130}
3131
3132static __inline__ __m128i __DEFAULT_FN_ATTRS128
3133_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3134 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3135 (__v2di)_mm_max_epu64(__A, __B),
3136 (__v2di)_mm_setzero_si128());
3137}
3138
3139static __inline__ __m128i __DEFAULT_FN_ATTRS128
3140_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3141 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3142 (__v2di)_mm_max_epu64(__A, __B),
3143 (__v2di)__W);
3144}
3145
3146static __inline__ __m256i __DEFAULT_FN_ATTRS256
3147_mm256_max_epu64 (__m256i __A, __m256i __B) {
3148 return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B);
3149}
3150
3151static __inline__ __m256i __DEFAULT_FN_ATTRS256
3152_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3153 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3154 (__v4di)_mm256_max_epu64(__A, __B),
3155 (__v4di)_mm256_setzero_si256());
3156}
3157
3158static __inline__ __m256i __DEFAULT_FN_ATTRS256
3159_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3160 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3161 (__v4di)_mm256_max_epu64(__A, __B),
3162 (__v4di)__W);
3163}
3164
3165static __inline__ __m128i __DEFAULT_FN_ATTRS128
3166_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3167 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3168 (__v4si)_mm_min_epi32(__A, __B),
3169 (__v4si)_mm_setzero_si128());
3170}
3171
3172static __inline__ __m128i __DEFAULT_FN_ATTRS128
3173_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3174 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3175 (__v4si)_mm_min_epi32(__A, __B),
3176 (__v4si)__W);
3177}
3178
3179static __inline__ __m256i __DEFAULT_FN_ATTRS256
3180_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3181 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3182 (__v8si)_mm256_min_epi32(__A, __B),
3183 (__v8si)_mm256_setzero_si256());
3184}
3185
3186static __inline__ __m256i __DEFAULT_FN_ATTRS256
3187_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3188 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3189 (__v8si)_mm256_min_epi32(__A, __B),
3190 (__v8si)__W);
3191}
3192
3193static __inline__ __m128i __DEFAULT_FN_ATTRS128
3194_mm_min_epi64 (__m128i __A, __m128i __B) {
3195 return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B);
3196}
3197
3198static __inline__ __m128i __DEFAULT_FN_ATTRS128
3199_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3200 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3201 (__v2di)_mm_min_epi64(__A, __B),
3202 (__v2di)__W);
3203}
3204
3205static __inline__ __m128i __DEFAULT_FN_ATTRS128
3206_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3207 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3208 (__v2di)_mm_min_epi64(__A, __B),
3209 (__v2di)_mm_setzero_si128());
3210}
3211
3212static __inline__ __m256i __DEFAULT_FN_ATTRS256
3213_mm256_min_epi64 (__m256i __A, __m256i __B) {
3214 return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B);
3215}
3216
3217static __inline__ __m256i __DEFAULT_FN_ATTRS256
3218_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3219 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3220 (__v4di)_mm256_min_epi64(__A, __B),
3221 (__v4di)__W);
3222}
3223
3224static __inline__ __m256i __DEFAULT_FN_ATTRS256
3225_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3226 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3227 (__v4di)_mm256_min_epi64(__A, __B),
3228 (__v4di)_mm256_setzero_si256());
3229}
3230
3231static __inline__ __m128i __DEFAULT_FN_ATTRS128
3232_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3233 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3234 (__v4si)_mm_min_epu32(__A, __B),
3235 (__v4si)_mm_setzero_si128());
3236}
3237
3238static __inline__ __m128i __DEFAULT_FN_ATTRS128
3239_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3240 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3241 (__v4si)_mm_min_epu32(__A, __B),
3242 (__v4si)__W);
3243}
3244
3245static __inline__ __m256i __DEFAULT_FN_ATTRS256
3246_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3247 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3248 (__v8si)_mm256_min_epu32(__A, __B),
3249 (__v8si)_mm256_setzero_si256());
3250}
3251
3252static __inline__ __m256i __DEFAULT_FN_ATTRS256
3253_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3254 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3255 (__v8si)_mm256_min_epu32(__A, __B),
3256 (__v8si)__W);
3257}
3258
3259static __inline__ __m128i __DEFAULT_FN_ATTRS128
3260_mm_min_epu64 (__m128i __A, __m128i __B) {
3261 return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B);
3262}
3263
3264static __inline__ __m128i __DEFAULT_FN_ATTRS128
3265_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3266 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3267 (__v2di)_mm_min_epu64(__A, __B),
3268 (__v2di)__W);
3269}
3270
3271static __inline__ __m128i __DEFAULT_FN_ATTRS128
3272_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3273 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3274 (__v2di)_mm_min_epu64(__A, __B),
3275 (__v2di)_mm_setzero_si128());
3276}
3277
3278static __inline__ __m256i __DEFAULT_FN_ATTRS256
3279_mm256_min_epu64 (__m256i __A, __m256i __B) {
3280 return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B);
3281}
3282
3283static __inline__ __m256i __DEFAULT_FN_ATTRS256
3284_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3285 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3286 (__v4di)_mm256_min_epu64(__A, __B),
3287 (__v4di)__W);
3288}
3289
3290static __inline__ __m256i __DEFAULT_FN_ATTRS256
3291_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3292 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3293 (__v4di)_mm256_min_epu64(__A, __B),
3294 (__v4di)_mm256_setzero_si256());
3295}
3296
3297#define _mm_roundscale_pd(A, imm) \
3298 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3299 (int)(imm), \
3300 (__v2df)_mm_setzero_pd(), \
3301 (__mmask8)-1))
3302
3303
3304#define _mm_mask_roundscale_pd(W, U, A, imm) \
3305 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3306 (int)(imm), \
3307 (__v2df)(__m128d)(W), \
3308 (__mmask8)(U)))
3309
3310
3311#define _mm_maskz_roundscale_pd(U, A, imm) \
3312 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3313 (int)(imm), \
3314 (__v2df)_mm_setzero_pd(), \
3315 (__mmask8)(U)))
3316
3317
3318#define _mm256_roundscale_pd(A, imm) \
3319 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3320 (int)(imm), \
3321 (__v4df)_mm256_setzero_pd(), \
3322 (__mmask8)-1))
3323
3324
3325#define _mm256_mask_roundscale_pd(W, U, A, imm) \
3326 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3327 (int)(imm), \
3328 (__v4df)(__m256d)(W), \
3329 (__mmask8)(U)))
3330
3331
3332#define _mm256_maskz_roundscale_pd(U, A, imm) \
3333 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3334 (int)(imm), \
3335 (__v4df)_mm256_setzero_pd(), \
3336 (__mmask8)(U)))
3337
3338#define _mm_roundscale_ps(A, imm) \
3339 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3340 (__v4sf)_mm_setzero_ps(), \
3341 (__mmask8)-1))
3342
3343
3344#define _mm_mask_roundscale_ps(W, U, A, imm) \
3345 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3346 (__v4sf)(__m128)(W), \
3347 (__mmask8)(U)))
3348
3349
3350#define _mm_maskz_roundscale_ps(U, A, imm) \
3351 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3352 (__v4sf)_mm_setzero_ps(), \
3353 (__mmask8)(U)))
3354
3355#define _mm256_roundscale_ps(A, imm) \
3356 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3357 (__v8sf)_mm256_setzero_ps(), \
3358 (__mmask8)-1))
3359
3360#define _mm256_mask_roundscale_ps(W, U, A, imm) \
3361 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3362 (__v8sf)(__m256)(W), \
3363 (__mmask8)(U)))
3364
3365
3366#define _mm256_maskz_roundscale_ps(U, A, imm) \
3367 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3368 (__v8sf)_mm256_setzero_ps(), \
3369 (__mmask8)(U)))
3370
3371static __inline__ __m128d __DEFAULT_FN_ATTRS128
3372_mm_scalef_pd (__m128d __A, __m128d __B) {
3373 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3374 (__v2df) __B,
3375 (__v2df)
3376 _mm_setzero_pd (),
3377 (__mmask8) -1);
3378}
3379
3380static __inline__ __m128d __DEFAULT_FN_ATTRS128
3381_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3382 __m128d __B) {
3383 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3384 (__v2df) __B,
3385 (__v2df) __W,
3386 (__mmask8) __U);
3387}
3388
3389static __inline__ __m128d __DEFAULT_FN_ATTRS128
3390_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3391 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3392 (__v2df) __B,
3393 (__v2df)
3394 _mm_setzero_pd (),
3395 (__mmask8) __U);
3396}
3397
3398static __inline__ __m256d __DEFAULT_FN_ATTRS256
3399_mm256_scalef_pd (__m256d __A, __m256d __B) {
3400 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3401 (__v4df) __B,
3402 (__v4df)
3404 (__mmask8) -1);
3405}
3406
3407static __inline__ __m256d __DEFAULT_FN_ATTRS256
3408_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3409 __m256d __B) {
3410 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3411 (__v4df) __B,
3412 (__v4df) __W,
3413 (__mmask8) __U);
3414}
3415
3416static __inline__ __m256d __DEFAULT_FN_ATTRS256
3417_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3418 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3419 (__v4df) __B,
3420 (__v4df)
3422 (__mmask8) __U);
3423}
3424
3425static __inline__ __m128 __DEFAULT_FN_ATTRS128
3426_mm_scalef_ps (__m128 __A, __m128 __B) {
3427 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3428 (__v4sf) __B,
3429 (__v4sf)
3430 _mm_setzero_ps (),
3431 (__mmask8) -1);
3432}
3433
3434static __inline__ __m128 __DEFAULT_FN_ATTRS128
3435_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3436 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3437 (__v4sf) __B,
3438 (__v4sf) __W,
3439 (__mmask8) __U);
3440}
3441
3442static __inline__ __m128 __DEFAULT_FN_ATTRS128
3443_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3444 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3445 (__v4sf) __B,
3446 (__v4sf)
3447 _mm_setzero_ps (),
3448 (__mmask8) __U);
3449}
3450
3451static __inline__ __m256 __DEFAULT_FN_ATTRS256
3452_mm256_scalef_ps (__m256 __A, __m256 __B) {
3453 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3454 (__v8sf) __B,
3455 (__v8sf)
3457 (__mmask8) -1);
3458}
3459
3460static __inline__ __m256 __DEFAULT_FN_ATTRS256
3461_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3462 __m256 __B) {
3463 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3464 (__v8sf) __B,
3465 (__v8sf) __W,
3466 (__mmask8) __U);
3467}
3468
3469static __inline__ __m256 __DEFAULT_FN_ATTRS256
3470_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3471 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3472 (__v8sf) __B,
3473 (__v8sf)
3475 (__mmask8) __U);
3476}
3477
3478#define _mm_i64scatter_pd(addr, index, v1, scale) \
3479 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
3480 (__v2di)(__m128i)(index), \
3481 (__v2df)(__m128d)(v1), (int)(scale))
3482
3483#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3484 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
3485 (__v2di)(__m128i)(index), \
3486 (__v2df)(__m128d)(v1), (int)(scale))
3487
3488#define _mm_i64scatter_epi64(addr, index, v1, scale) \
3489 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
3490 (__v2di)(__m128i)(index), \
3491 (__v2di)(__m128i)(v1), (int)(scale))
3492
3493#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3494 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
3495 (__v2di)(__m128i)(index), \
3496 (__v2di)(__m128i)(v1), (int)(scale))
3497
3498#define _mm256_i64scatter_pd(addr, index, v1, scale) \
3499 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
3500 (__v4di)(__m256i)(index), \
3501 (__v4df)(__m256d)(v1), (int)(scale))
3502
3503#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3504 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
3505 (__v4di)(__m256i)(index), \
3506 (__v4df)(__m256d)(v1), (int)(scale))
3507
3508#define _mm256_i64scatter_epi64(addr, index, v1, scale) \
3509 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
3510 (__v4di)(__m256i)(index), \
3511 (__v4di)(__m256i)(v1), (int)(scale))
3512
3513#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3514 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
3515 (__v4di)(__m256i)(index), \
3516 (__v4di)(__m256i)(v1), (int)(scale))
3517
3518#define _mm_i64scatter_ps(addr, index, v1, scale) \
3519 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
3520 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3521 (int)(scale))
3522
3523#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3524 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
3525 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3526 (int)(scale))
3527
3528#define _mm_i64scatter_epi32(addr, index, v1, scale) \
3529 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
3530 (__v2di)(__m128i)(index), \
3531 (__v4si)(__m128i)(v1), (int)(scale))
3532
3533#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3534 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
3535 (__v2di)(__m128i)(index), \
3536 (__v4si)(__m128i)(v1), (int)(scale))
3537
3538#define _mm256_i64scatter_ps(addr, index, v1, scale) \
3539 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
3540 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3541 (int)(scale))
3542
3543#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3544 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
3545 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3546 (int)(scale))
3547
3548#define _mm256_i64scatter_epi32(addr, index, v1, scale) \
3549 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
3550 (__v4di)(__m256i)(index), \
3551 (__v4si)(__m128i)(v1), (int)(scale))
3552
3553#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3554 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
3555 (__v4di)(__m256i)(index), \
3556 (__v4si)(__m128i)(v1), (int)(scale))
3557
3558#define _mm_i32scatter_pd(addr, index, v1, scale) \
3559 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
3560 (__v4si)(__m128i)(index), \
3561 (__v2df)(__m128d)(v1), (int)(scale))
3562
3563#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3564 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
3565 (__v4si)(__m128i)(index), \
3566 (__v2df)(__m128d)(v1), (int)(scale))
3567
3568#define _mm_i32scatter_epi64(addr, index, v1, scale) \
3569 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
3570 (__v4si)(__m128i)(index), \
3571 (__v2di)(__m128i)(v1), (int)(scale))
3572
3573#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3574 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
3575 (__v4si)(__m128i)(index), \
3576 (__v2di)(__m128i)(v1), (int)(scale))
3577
3578#define _mm256_i32scatter_pd(addr, index, v1, scale) \
3579 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
3580 (__v4si)(__m128i)(index), \
3581 (__v4df)(__m256d)(v1), (int)(scale))
3582
3583#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3584 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
3585 (__v4si)(__m128i)(index), \
3586 (__v4df)(__m256d)(v1), (int)(scale))
3587
3588#define _mm256_i32scatter_epi64(addr, index, v1, scale) \
3589 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
3590 (__v4si)(__m128i)(index), \
3591 (__v4di)(__m256i)(v1), (int)(scale))
3592
3593#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3594 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
3595 (__v4si)(__m128i)(index), \
3596 (__v4di)(__m256i)(v1), (int)(scale))
3597
3598#define _mm_i32scatter_ps(addr, index, v1, scale) \
3599 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
3600 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3601 (int)(scale))
3602
3603#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3604 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
3605 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3606 (int)(scale))
3607
3608#define _mm_i32scatter_epi32(addr, index, v1, scale) \
3609 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
3610 (__v4si)(__m128i)(index), \
3611 (__v4si)(__m128i)(v1), (int)(scale))
3612
3613#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3614 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
3615 (__v4si)(__m128i)(index), \
3616 (__v4si)(__m128i)(v1), (int)(scale))
3617
3618#define _mm256_i32scatter_ps(addr, index, v1, scale) \
3619 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
3620 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3621 (int)(scale))
3622
3623#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3624 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
3625 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3626 (int)(scale))
3627
3628#define _mm256_i32scatter_epi32(addr, index, v1, scale) \
3629 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
3630 (__v8si)(__m256i)(index), \
3631 (__v8si)(__m256i)(v1), (int)(scale))
3632
3633#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3634 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
3635 (__v8si)(__m256i)(index), \
3636 (__v8si)(__m256i)(v1), (int)(scale))
3637
3638 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3639 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
3640 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3641 (__v2df)_mm_sqrt_pd(__A),
3642 (__v2df)__W);
3643 }
3644
3645 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3646 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
3647 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3648 (__v2df)_mm_sqrt_pd(__A),
3649 (__v2df)_mm_setzero_pd());
3650 }
3651
3652 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3653 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
3654 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3655 (__v4df)_mm256_sqrt_pd(__A),
3656 (__v4df)__W);
3657 }
3658
3659 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3660 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
3661 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3662 (__v4df)_mm256_sqrt_pd(__A),
3663 (__v4df)_mm256_setzero_pd());
3664 }
3665
3666 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3667 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
3668 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3669 (__v4sf)_mm_sqrt_ps(__A),
3670 (__v4sf)__W);
3671 }
3672
3673 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3674 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
3675 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3676 (__v4sf)_mm_sqrt_ps(__A),
3677 (__v4sf)_mm_setzero_ps());
3678 }
3679
3680 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3681 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
3682 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3683 (__v8sf)_mm256_sqrt_ps(__A),
3684 (__v8sf)__W);
3685 }
3686
3687 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3689 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3690 (__v8sf)_mm256_sqrt_ps(__A),
3691 (__v8sf)_mm256_setzero_ps());
3692 }
3693
3694 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3695 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3696 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3697 (__v2df)_mm_sub_pd(__A, __B),
3698 (__v2df)__W);
3699 }
3700
3701 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3702 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3703 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3704 (__v2df)_mm_sub_pd(__A, __B),
3705 (__v2df)_mm_setzero_pd());
3706 }
3707
3708 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3709 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3710 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3711 (__v4df)_mm256_sub_pd(__A, __B),
3712 (__v4df)__W);
3713 }
3714
3715 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3716 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3717 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3718 (__v4df)_mm256_sub_pd(__A, __B),
3719 (__v4df)_mm256_setzero_pd());
3720 }
3721
3722 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3723 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3724 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3725 (__v4sf)_mm_sub_ps(__A, __B),
3726 (__v4sf)__W);
3727 }
3728
3729 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3730 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3731 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3732 (__v4sf)_mm_sub_ps(__A, __B),
3733 (__v4sf)_mm_setzero_ps());
3734 }
3735
3736 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3737 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3738 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3739 (__v8sf)_mm256_sub_ps(__A, __B),
3740 (__v8sf)__W);
3741 }
3742
3743 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3744 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3745 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3746 (__v8sf)_mm256_sub_ps(__A, __B),
3747 (__v8sf)_mm256_setzero_ps());
3748 }
3749
3750 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3751 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
3752 return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
3753 (__v4si)__B);
3754 }
3755
3756 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3757 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I,
3758 __m128i __B) {
3759 return (__m128i)__builtin_ia32_selectd_128(__U,
3760 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3761 (__v4si)__A);
3762 }
3763
3764 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3765 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,
3766 __m128i __B) {
3767 return (__m128i)__builtin_ia32_selectd_128(__U,
3768 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3769 (__v4si)__I);
3770 }
3771
3772 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3773 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,
3774 __m128i __B) {
3775 return (__m128i)__builtin_ia32_selectd_128(__U,
3776 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3777 (__v4si)_mm_setzero_si128());
3778 }
3779
3780 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3781 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
3782 return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
3783 (__v8si) __B);
3784 }
3785
3786 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3787 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I,
3788 __m256i __B) {
3789 return (__m256i)__builtin_ia32_selectd_256(__U,
3790 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3791 (__v8si)__A);
3792 }
3793
3794 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3795 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U,
3796 __m256i __B) {
3797 return (__m256i)__builtin_ia32_selectd_256(__U,
3798 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3799 (__v8si)__I);
3800 }
3801
3802 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3803 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I,
3804 __m256i __B) {
3805 return (__m256i)__builtin_ia32_selectd_256(__U,
3806 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3807 (__v8si)_mm256_setzero_si256());
3808 }
3809
3810 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3811 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
3812 return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
3813 (__v2df)__B);
3814 }
3815
3816 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3817 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
3818 return (__m128d)__builtin_ia32_selectpd_128(__U,
3819 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3820 (__v2df)__A);
3821 }
3822
3823 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3824 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
3825 return (__m128d)__builtin_ia32_selectpd_128(__U,
3826 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3827 (__v2df)(__m128d)__I);
3828 }
3829
3830 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3831 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
3832 return (__m128d)__builtin_ia32_selectpd_128(__U,
3833 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3834 (__v2df)_mm_setzero_pd());
3835 }
3836
3837 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3838 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
3839 return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
3840 (__v4df)__B);
3841 }
3842
3843 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3844 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I,
3845 __m256d __B) {
3846 return (__m256d)__builtin_ia32_selectpd_256(__U,
3847 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3848 (__v4df)__A);
3849 }
3850
3851 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3852 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,
3853 __m256d __B) {
3854 return (__m256d)__builtin_ia32_selectpd_256(__U,
3855 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3856 (__v4df)(__m256d)__I);
3857 }
3858
3859 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3860 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,
3861 __m256d __B) {
3862 return (__m256d)__builtin_ia32_selectpd_256(__U,
3863 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3864 (__v4df)_mm256_setzero_pd());
3865 }
3866
3867 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3868 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
3869 return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
3870 (__v4sf)__B);
3871 }
3872
3873 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3874 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
3875 return (__m128)__builtin_ia32_selectps_128(__U,
3876 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3877 (__v4sf)__A);
3878 }
3879
3880 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3881 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
3882 return (__m128)__builtin_ia32_selectps_128(__U,
3883 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3884 (__v4sf)(__m128)__I);
3885 }
3886
3887 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3888 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
3889 return (__m128)__builtin_ia32_selectps_128(__U,
3890 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3891 (__v4sf)_mm_setzero_ps());
3892 }
3893
3894 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3895 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
3896 return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
3897 (__v8sf) __B);
3898 }
3899
3900 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3901 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
3902 return (__m256)__builtin_ia32_selectps_256(__U,
3903 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3904 (__v8sf)__A);
3905 }
3906
3907 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3908 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,
3909 __m256 __B) {
3910 return (__m256)__builtin_ia32_selectps_256(__U,
3911 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3912 (__v8sf)(__m256)__I);
3913 }
3914
3915 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3916 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I,
3917 __m256 __B) {
3918 return (__m256)__builtin_ia32_selectps_256(__U,
3919 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3920 (__v8sf)_mm256_setzero_ps());
3921 }
3922
3923 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3924 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
3925 return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
3926 (__v2di)__B);
3927 }
3928
3929 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3930 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I,
3931 __m128i __B) {
3932 return (__m128i)__builtin_ia32_selectq_128(__U,
3933 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3934 (__v2di)__A);
3935 }
3936
3937 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3938 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
3939 __m128i __B) {
3940 return (__m128i)__builtin_ia32_selectq_128(__U,
3941 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3942 (__v2di)__I);
3943 }
3944
3945 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3946 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I,
3947 __m128i __B) {
3948 return (__m128i)__builtin_ia32_selectq_128(__U,
3949 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3950 (__v2di)_mm_setzero_si128());
3951 }
3952
3953
3954 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3955 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
3956 return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
3957 (__v4di) __B);
3958 }
3959
3960 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3961 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I,
3962 __m256i __B) {
3963 return (__m256i)__builtin_ia32_selectq_256(__U,
3964 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3965 (__v4di)__A);
3966 }
3967
3968 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3969 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U,
3970 __m256i __B) {
3971 return (__m256i)__builtin_ia32_selectq_256(__U,
3972 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3973 (__v4di)__I);
3974 }
3975
3976 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3977 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I,
3978 __m256i __B) {
3979 return (__m256i)__builtin_ia32_selectq_256(__U,
3980 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3981 (__v4di)_mm256_setzero_si256());
3982 }
3983
3984 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3985 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3986 {
3987 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3988 (__v4si)_mm_cvtepi8_epi32(__A),
3989 (__v4si)__W);
3990 }
3991
3992 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3994 {
3995 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3996 (__v4si)_mm_cvtepi8_epi32(__A),
3997 (__v4si)_mm_setzero_si128());
3998 }
3999
4000 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4001 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
4002 {
4003 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4004 (__v8si)_mm256_cvtepi8_epi32(__A),
4005 (__v8si)__W);
4006 }
4007
4008 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4010 {
4011 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4012 (__v8si)_mm256_cvtepi8_epi32(__A),
4013 (__v8si)_mm256_setzero_si256());
4014 }
4015
4016 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4017 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4018 {
4019 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4020 (__v2di)_mm_cvtepi8_epi64(__A),
4021 (__v2di)__W);
4022 }
4023
4024 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4026 {
4027 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4028 (__v2di)_mm_cvtepi8_epi64(__A),
4029 (__v2di)_mm_setzero_si128());
4030 }
4031
4032 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4033 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4034 {
4035 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4036 (__v4di)_mm256_cvtepi8_epi64(__A),
4037 (__v4di)__W);
4038 }
4039
4040 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4042 {
4043 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4044 (__v4di)_mm256_cvtepi8_epi64(__A),
4045 (__v4di)_mm256_setzero_si256());
4046 }
4047
4048 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4049 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4050 {
4051 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4052 (__v2di)_mm_cvtepi32_epi64(__X),
4053 (__v2di)__W);
4054 }
4055
4056 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4058 {
4059 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4060 (__v2di)_mm_cvtepi32_epi64(__X),
4061 (__v2di)_mm_setzero_si128());
4062 }
4063
4064 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4065 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4066 {
4067 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4068 (__v4di)_mm256_cvtepi32_epi64(__X),
4069 (__v4di)__W);
4070 }
4071
4072 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4074 {
4075 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4076 (__v4di)_mm256_cvtepi32_epi64(__X),
4077 (__v4di)_mm256_setzero_si256());
4078 }
4079
4080 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4081 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4082 {
4083 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4084 (__v4si)_mm_cvtepi16_epi32(__A),
4085 (__v4si)__W);
4086 }
4087
4088 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4090 {
4091 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4092 (__v4si)_mm_cvtepi16_epi32(__A),
4093 (__v4si)_mm_setzero_si128());
4094 }
4095
4096 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4097 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4098 {
4099 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4100 (__v8si)_mm256_cvtepi16_epi32(__A),
4101 (__v8si)__W);
4102 }
4103
4104 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4106 {
4107 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4108 (__v8si)_mm256_cvtepi16_epi32(__A),
4109 (__v8si)_mm256_setzero_si256());
4110 }
4111
4112 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4113 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4114 {
4115 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4116 (__v2di)_mm_cvtepi16_epi64(__A),
4117 (__v2di)__W);
4118 }
4119
4120 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4122 {
4123 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4124 (__v2di)_mm_cvtepi16_epi64(__A),
4125 (__v2di)_mm_setzero_si128());
4126 }
4127
4128 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4129 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4130 {
4131 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4132 (__v4di)_mm256_cvtepi16_epi64(__A),
4133 (__v4di)__W);
4134 }
4135
4136 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4138 {
4139 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4140 (__v4di)_mm256_cvtepi16_epi64(__A),
4141 (__v4di)_mm256_setzero_si256());
4142 }
4143
4144
4145 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4146 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4147 {
4148 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4149 (__v4si)_mm_cvtepu8_epi32(__A),
4150 (__v4si)__W);
4151 }
4152
4153 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4155 {
4156 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4157 (__v4si)_mm_cvtepu8_epi32(__A),
4158 (__v4si)_mm_setzero_si128());
4159 }
4160
4161 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4162 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4163 {
4164 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4165 (__v8si)_mm256_cvtepu8_epi32(__A),
4166 (__v8si)__W);
4167 }
4168
4169 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4171 {
4172 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4173 (__v8si)_mm256_cvtepu8_epi32(__A),
4174 (__v8si)_mm256_setzero_si256());
4175 }
4176
4177 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4178 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4179 {
4180 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4181 (__v2di)_mm_cvtepu8_epi64(__A),
4182 (__v2di)__W);
4183 }
4184
4185 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4187 {
4188 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4189 (__v2di)_mm_cvtepu8_epi64(__A),
4190 (__v2di)_mm_setzero_si128());
4191 }
4192
4193 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4194 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4195 {
4196 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4197 (__v4di)_mm256_cvtepu8_epi64(__A),
4198 (__v4di)__W);
4199 }
4200
4201 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4203 {
4204 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4205 (__v4di)_mm256_cvtepu8_epi64(__A),
4206 (__v4di)_mm256_setzero_si256());
4207 }
4208
4209 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4210 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4211 {
4212 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4213 (__v2di)_mm_cvtepu32_epi64(__X),
4214 (__v2di)__W);
4215 }
4216
4217 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4219 {
4220 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4221 (__v2di)_mm_cvtepu32_epi64(__X),
4222 (__v2di)_mm_setzero_si128());
4223 }
4224
4225 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4226 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4227 {
4228 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4229 (__v4di)_mm256_cvtepu32_epi64(__X),
4230 (__v4di)__W);
4231 }
4232
4233 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4235 {
4236 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4237 (__v4di)_mm256_cvtepu32_epi64(__X),
4238 (__v4di)_mm256_setzero_si256());
4239 }
4240
4241 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4242 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4243 {
4244 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4245 (__v4si)_mm_cvtepu16_epi32(__A),
4246 (__v4si)__W);
4247 }
4248
4249 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4251 {
4252 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4253 (__v4si)_mm_cvtepu16_epi32(__A),
4254 (__v4si)_mm_setzero_si128());
4255 }
4256
4257 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4258 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4259 {
4260 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4261 (__v8si)_mm256_cvtepu16_epi32(__A),
4262 (__v8si)__W);
4263 }
4264
4265 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4267 {
4268 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4269 (__v8si)_mm256_cvtepu16_epi32(__A),
4270 (__v8si)_mm256_setzero_si256());
4271 }
4272
4273 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4274 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4275 {
4276 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4277 (__v2di)_mm_cvtepu16_epi64(__A),
4278 (__v2di)__W);
4279 }
4280
4281 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4283 {
4284 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4285 (__v2di)_mm_cvtepu16_epi64(__A),
4286 (__v2di)_mm_setzero_si128());
4287 }
4288
4289 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4290 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4291 {
4292 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4293 (__v4di)_mm256_cvtepu16_epi64(__A),
4294 (__v4di)__W);
4295 }
4296
4297 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4299 {
4300 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4301 (__v4di)_mm256_cvtepu16_epi64(__A),
4302 (__v4di)_mm256_setzero_si256());
4303 }
4304
4305
4306#define _mm_rol_epi32(a, b) \
4307 ((__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b)))
4308
4309#define _mm_mask_rol_epi32(w, u, a, b) \
4310 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4311 (__v4si)_mm_rol_epi32((a), (b)), \
4312 (__v4si)(__m128i)(w)))
4313
4314#define _mm_maskz_rol_epi32(u, a, b) \
4315 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4316 (__v4si)_mm_rol_epi32((a), (b)), \
4317 (__v4si)_mm_setzero_si128()))
4318
4319#define _mm256_rol_epi32(a, b) \
4320 ((__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b)))
4321
4322#define _mm256_mask_rol_epi32(w, u, a, b) \
4323 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4324 (__v8si)_mm256_rol_epi32((a), (b)), \
4325 (__v8si)(__m256i)(w)))
4326
4327#define _mm256_maskz_rol_epi32(u, a, b) \
4328 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4329 (__v8si)_mm256_rol_epi32((a), (b)), \
4330 (__v8si)_mm256_setzero_si256()))
4331
4332#define _mm_rol_epi64(a, b) \
4333 ((__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b)))
4334
4335#define _mm_mask_rol_epi64(w, u, a, b) \
4336 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4337 (__v2di)_mm_rol_epi64((a), (b)), \
4338 (__v2di)(__m128i)(w)))
4339
4340#define _mm_maskz_rol_epi64(u, a, b) \
4341 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4342 (__v2di)_mm_rol_epi64((a), (b)), \
4343 (__v2di)_mm_setzero_si128()))
4344
4345#define _mm256_rol_epi64(a, b) \
4346 ((__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b)))
4347
4348#define _mm256_mask_rol_epi64(w, u, a, b) \
4349 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4350 (__v4di)_mm256_rol_epi64((a), (b)), \
4351 (__v4di)(__m256i)(w)))
4352
4353#define _mm256_maskz_rol_epi64(u, a, b) \
4354 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4355 (__v4di)_mm256_rol_epi64((a), (b)), \
4356 (__v4di)_mm256_setzero_si256()))
4357
4358static __inline__ __m128i __DEFAULT_FN_ATTRS128
4359_mm_rolv_epi32 (__m128i __A, __m128i __B)
4360{
4361 return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
4362}
4363
4364static __inline__ __m128i __DEFAULT_FN_ATTRS128
4365_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4366{
4367 return (__m128i)__builtin_ia32_selectd_128(__U,
4368 (__v4si)_mm_rolv_epi32(__A, __B),
4369 (__v4si)__W);
4370}
4371
4372static __inline__ __m128i __DEFAULT_FN_ATTRS128
4373_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4374{
4375 return (__m128i)__builtin_ia32_selectd_128(__U,
4376 (__v4si)_mm_rolv_epi32(__A, __B),
4377 (__v4si)_mm_setzero_si128());
4378}
4379
4380static __inline__ __m256i __DEFAULT_FN_ATTRS256
4381_mm256_rolv_epi32 (__m256i __A, __m256i __B)
4382{
4383 return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
4384}
4385
4386static __inline__ __m256i __DEFAULT_FN_ATTRS256
4387_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4388{
4389 return (__m256i)__builtin_ia32_selectd_256(__U,
4390 (__v8si)_mm256_rolv_epi32(__A, __B),
4391 (__v8si)__W);
4392}
4393
4394static __inline__ __m256i __DEFAULT_FN_ATTRS256
4395_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4396{
4397 return (__m256i)__builtin_ia32_selectd_256(__U,
4398 (__v8si)_mm256_rolv_epi32(__A, __B),
4399 (__v8si)_mm256_setzero_si256());
4400}
4401
4402static __inline__ __m128i __DEFAULT_FN_ATTRS128
4403_mm_rolv_epi64 (__m128i __A, __m128i __B)
4404{
4405 return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
4406}
4407
4408static __inline__ __m128i __DEFAULT_FN_ATTRS128
4409_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4410{
4411 return (__m128i)__builtin_ia32_selectq_128(__U,
4412 (__v2di)_mm_rolv_epi64(__A, __B),
4413 (__v2di)__W);
4414}
4415
4416static __inline__ __m128i __DEFAULT_FN_ATTRS128
4417_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4418{
4419 return (__m128i)__builtin_ia32_selectq_128(__U,
4420 (__v2di)_mm_rolv_epi64(__A, __B),
4421 (__v2di)_mm_setzero_si128());
4422}
4423
4424static __inline__ __m256i __DEFAULT_FN_ATTRS256
4425_mm256_rolv_epi64 (__m256i __A, __m256i __B)
4426{
4427 return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
4428}
4429
4430static __inline__ __m256i __DEFAULT_FN_ATTRS256
4431_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4432{
4433 return (__m256i)__builtin_ia32_selectq_256(__U,
4434 (__v4di)_mm256_rolv_epi64(__A, __B),
4435 (__v4di)__W);
4436}
4437
4438static __inline__ __m256i __DEFAULT_FN_ATTRS256
4439_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4440{
4441 return (__m256i)__builtin_ia32_selectq_256(__U,
4442 (__v4di)_mm256_rolv_epi64(__A, __B),
4443 (__v4di)_mm256_setzero_si256());
4444}
4445
4446#define _mm_ror_epi32(a, b) \
4447 ((__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b)))
4448
4449#define _mm_mask_ror_epi32(w, u, a, b) \
4450 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4451 (__v4si)_mm_ror_epi32((a), (b)), \
4452 (__v4si)(__m128i)(w)))
4453
4454#define _mm_maskz_ror_epi32(u, a, b) \
4455 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4456 (__v4si)_mm_ror_epi32((a), (b)), \
4457 (__v4si)_mm_setzero_si128()))
4458
4459#define _mm256_ror_epi32(a, b) \
4460 ((__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b)))
4461
4462#define _mm256_mask_ror_epi32(w, u, a, b) \
4463 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4464 (__v8si)_mm256_ror_epi32((a), (b)), \
4465 (__v8si)(__m256i)(w)))
4466
4467#define _mm256_maskz_ror_epi32(u, a, b) \
4468 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4469 (__v8si)_mm256_ror_epi32((a), (b)), \
4470 (__v8si)_mm256_setzero_si256()))
4471
4472#define _mm_ror_epi64(a, b) \
4473 ((__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b)))
4474
4475#define _mm_mask_ror_epi64(w, u, a, b) \
4476 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4477 (__v2di)_mm_ror_epi64((a), (b)), \
4478 (__v2di)(__m128i)(w)))
4479
4480#define _mm_maskz_ror_epi64(u, a, b) \
4481 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4482 (__v2di)_mm_ror_epi64((a), (b)), \
4483 (__v2di)_mm_setzero_si128()))
4484
4485#define _mm256_ror_epi64(a, b) \
4486 ((__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b)))
4487
4488#define _mm256_mask_ror_epi64(w, u, a, b) \
4489 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4490 (__v4di)_mm256_ror_epi64((a), (b)), \
4491 (__v4di)(__m256i)(w)))
4492
4493#define _mm256_maskz_ror_epi64(u, a, b) \
4494 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4495 (__v4di)_mm256_ror_epi64((a), (b)), \
4496 (__v4di)_mm256_setzero_si256()))
4497
4498static __inline__ __m128i __DEFAULT_FN_ATTRS128
4499_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4500{
4501 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4502 (__v4si)_mm_sll_epi32(__A, __B),
4503 (__v4si)__W);
4504}
4505
4506static __inline__ __m128i __DEFAULT_FN_ATTRS128
4507_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4508{
4509 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4510 (__v4si)_mm_sll_epi32(__A, __B),
4511 (__v4si)_mm_setzero_si128());
4512}
4513
4514static __inline__ __m256i __DEFAULT_FN_ATTRS256
4515_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4516{
4517 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4518 (__v8si)_mm256_sll_epi32(__A, __B),
4519 (__v8si)__W);
4520}
4521
4522static __inline__ __m256i __DEFAULT_FN_ATTRS256
4523_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4524{
4525 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4526 (__v8si)_mm256_sll_epi32(__A, __B),
4527 (__v8si)_mm256_setzero_si256());
4528}
4529
4530static __inline__ __m128i __DEFAULT_FN_ATTRS128
4531_mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4532{
4533 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4534 (__v4si)_mm_slli_epi32(__A, (int)__B),
4535 (__v4si)__W);
4536}
4537
4538static __inline__ __m128i __DEFAULT_FN_ATTRS128
4539_mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
4540{
4541 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4542 (__v4si)_mm_slli_epi32(__A, (int)__B),
4543 (__v4si)_mm_setzero_si128());
4544}
4545
4546static __inline__ __m256i __DEFAULT_FN_ATTRS256
4547_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4548{
4549 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4550 (__v8si)_mm256_slli_epi32(__A, (int)__B),
4551 (__v8si)__W);
4552}
4553
4554static __inline__ __m256i __DEFAULT_FN_ATTRS256
4555_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
4556{
4557 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4558 (__v8si)_mm256_slli_epi32(__A, (int)__B),
4559 (__v8si)_mm256_setzero_si256());
4560}
4561
4562static __inline__ __m128i __DEFAULT_FN_ATTRS128
4563_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4564{
4565 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4566 (__v2di)_mm_sll_epi64(__A, __B),
4567 (__v2di)__W);
4568}
4569
4570static __inline__ __m128i __DEFAULT_FN_ATTRS128
4571_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4572{
4573 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4574 (__v2di)_mm_sll_epi64(__A, __B),
4575 (__v2di)_mm_setzero_si128());
4576}
4577
4578static __inline__ __m256i __DEFAULT_FN_ATTRS256
4579_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4580{
4581 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4582 (__v4di)_mm256_sll_epi64(__A, __B),
4583 (__v4di)__W);
4584}
4585
4586static __inline__ __m256i __DEFAULT_FN_ATTRS256
4587_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4588{
4589 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4590 (__v4di)_mm256_sll_epi64(__A, __B),
4591 (__v4di)_mm256_setzero_si256());
4592}
4593
4594static __inline__ __m128i __DEFAULT_FN_ATTRS128
4595_mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4596{
4597 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4598 (__v2di)_mm_slli_epi64(__A, (int)__B),
4599 (__v2di)__W);
4600}
4601
4602static __inline__ __m128i __DEFAULT_FN_ATTRS128
4603_mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
4604{
4605 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4606 (__v2di)_mm_slli_epi64(__A, (int)__B),
4607 (__v2di)_mm_setzero_si128());
4608}
4609
4610static __inline__ __m256i __DEFAULT_FN_ATTRS256
4611_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4612{
4613 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4614 (__v4di)_mm256_slli_epi64(__A, (int)__B),
4615 (__v4di)__W);
4616}
4617
4618static __inline__ __m256i __DEFAULT_FN_ATTRS256
4619_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
4620{
4621 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4622 (__v4di)_mm256_slli_epi64(__A, (int)__B),
4623 (__v4di)_mm256_setzero_si256());
4624}
4625
4626static __inline__ __m128i __DEFAULT_FN_ATTRS128
4627_mm_rorv_epi32 (__m128i __A, __m128i __B)
4628{
4629 return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
4630}
4631
4632static __inline__ __m128i __DEFAULT_FN_ATTRS128
4633_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4634{
4635 return (__m128i)__builtin_ia32_selectd_128(__U,
4636 (__v4si)_mm_rorv_epi32(__A, __B),
4637 (__v4si)__W);
4638}
4639
4640static __inline__ __m128i __DEFAULT_FN_ATTRS128
4641_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4642{
4643 return (__m128i)__builtin_ia32_selectd_128(__U,
4644 (__v4si)_mm_rorv_epi32(__A, __B),
4645 (__v4si)_mm_setzero_si128());
4646}
4647
4648static __inline__ __m256i __DEFAULT_FN_ATTRS256
4649_mm256_rorv_epi32 (__m256i __A, __m256i __B)
4650{
4651 return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
4652}
4653
4654static __inline__ __m256i __DEFAULT_FN_ATTRS256
4655_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4656{
4657 return (__m256i)__builtin_ia32_selectd_256(__U,
4658 (__v8si)_mm256_rorv_epi32(__A, __B),
4659 (__v8si)__W);
4660}
4661
4662static __inline__ __m256i __DEFAULT_FN_ATTRS256
4663_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4664{
4665 return (__m256i)__builtin_ia32_selectd_256(__U,
4666 (__v8si)_mm256_rorv_epi32(__A, __B),
4667 (__v8si)_mm256_setzero_si256());
4668}
4669
4670static __inline__ __m128i __DEFAULT_FN_ATTRS128
4671_mm_rorv_epi64 (__m128i __A, __m128i __B)
4672{
4673 return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
4674}
4675
4676static __inline__ __m128i __DEFAULT_FN_ATTRS128
4677_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4678{
4679 return (__m128i)__builtin_ia32_selectq_128(__U,
4680 (__v2di)_mm_rorv_epi64(__A, __B),
4681 (__v2di)__W);
4682}
4683
4684static __inline__ __m128i __DEFAULT_FN_ATTRS128
4685_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4686{
4687 return (__m128i)__builtin_ia32_selectq_128(__U,
4688 (__v2di)_mm_rorv_epi64(__A, __B),
4689 (__v2di)_mm_setzero_si128());
4690}
4691
4692static __inline__ __m256i __DEFAULT_FN_ATTRS256
4693_mm256_rorv_epi64 (__m256i __A, __m256i __B)
4694{
4695 return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
4696}
4697
4698static __inline__ __m256i __DEFAULT_FN_ATTRS256
4699_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4700{
4701 return (__m256i)__builtin_ia32_selectq_256(__U,
4702 (__v4di)_mm256_rorv_epi64(__A, __B),
4703 (__v4di)__W);
4704}
4705
4706static __inline__ __m256i __DEFAULT_FN_ATTRS256
4707_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4708{
4709 return (__m256i)__builtin_ia32_selectq_256(__U,
4710 (__v4di)_mm256_rorv_epi64(__A, __B),
4711 (__v4di)_mm256_setzero_si256());
4712}
4713
4714static __inline__ __m128i __DEFAULT_FN_ATTRS128
4715_mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4716{
4717 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4718 (__v2di)_mm_sllv_epi64(__X, __Y),
4719 (__v2di)__W);
4720}
4721
4722static __inline__ __m128i __DEFAULT_FN_ATTRS128
4723_mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4724{
4725 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4726 (__v2di)_mm_sllv_epi64(__X, __Y),
4727 (__v2di)_mm_setzero_si128());
4728}
4729
4730static __inline__ __m256i __DEFAULT_FN_ATTRS256
4731_mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4732{
4733 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4734 (__v4di)_mm256_sllv_epi64(__X, __Y),
4735 (__v4di)__W);
4736}
4737
4738static __inline__ __m256i __DEFAULT_FN_ATTRS256
4739_mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4740{
4741 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4742 (__v4di)_mm256_sllv_epi64(__X, __Y),
4743 (__v4di)_mm256_setzero_si256());
4744}
4745
4746static __inline__ __m128i __DEFAULT_FN_ATTRS128
4747_mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4748{
4749 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4750 (__v4si)_mm_sllv_epi32(__X, __Y),
4751 (__v4si)__W);
4752}
4753
4754static __inline__ __m128i __DEFAULT_FN_ATTRS128
4755_mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4756{
4757 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4758 (__v4si)_mm_sllv_epi32(__X, __Y),
4759 (__v4si)_mm_setzero_si128());
4760}
4761
4762static __inline__ __m256i __DEFAULT_FN_ATTRS256
4763_mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4764{
4765 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4766 (__v8si)_mm256_sllv_epi32(__X, __Y),
4767 (__v8si)__W);
4768}
4769
4770static __inline__ __m256i __DEFAULT_FN_ATTRS256
4771_mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4772{
4773 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4774 (__v8si)_mm256_sllv_epi32(__X, __Y),
4775 (__v8si)_mm256_setzero_si256());
4776}
4777
4778static __inline__ __m128i __DEFAULT_FN_ATTRS128
4779_mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4780{
4781 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4782 (__v2di)_mm_srlv_epi64(__X, __Y),
4783 (__v2di)__W);
4784}
4785
4786static __inline__ __m128i __DEFAULT_FN_ATTRS128
4787_mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4788{
4789 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4790 (__v2di)_mm_srlv_epi64(__X, __Y),
4791 (__v2di)_mm_setzero_si128());
4792}
4793
4794static __inline__ __m256i __DEFAULT_FN_ATTRS256
4795_mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4796{
4797 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4798 (__v4di)_mm256_srlv_epi64(__X, __Y),
4799 (__v4di)__W);
4800}
4801
4802static __inline__ __m256i __DEFAULT_FN_ATTRS256
4803_mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4804{
4805 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4806 (__v4di)_mm256_srlv_epi64(__X, __Y),
4807 (__v4di)_mm256_setzero_si256());
4808}
4809
4810static __inline__ __m128i __DEFAULT_FN_ATTRS128
4811_mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4812{
4813 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4814 (__v4si)_mm_srlv_epi32(__X, __Y),
4815 (__v4si)__W);
4816}
4817
4818static __inline__ __m128i __DEFAULT_FN_ATTRS128
4819_mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4820{
4821 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4822 (__v4si)_mm_srlv_epi32(__X, __Y),
4823 (__v4si)_mm_setzero_si128());
4824}
4825
4826static __inline__ __m256i __DEFAULT_FN_ATTRS256
4827_mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4828{
4829 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4830 (__v8si)_mm256_srlv_epi32(__X, __Y),
4831 (__v8si)__W);
4832}
4833
4834static __inline__ __m256i __DEFAULT_FN_ATTRS256
4835_mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4836{
4837 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4838 (__v8si)_mm256_srlv_epi32(__X, __Y),
4839 (__v8si)_mm256_setzero_si256());
4840}
4841
4842static __inline__ __m128i __DEFAULT_FN_ATTRS128
4843_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4844{
4845 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4846 (__v4si)_mm_srl_epi32(__A, __B),
4847 (__v4si)__W);
4848}
4849
4850static __inline__ __m128i __DEFAULT_FN_ATTRS128
4851_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4852{
4853 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4854 (__v4si)_mm_srl_epi32(__A, __B),
4855 (__v4si)_mm_setzero_si128());
4856}
4857
4858static __inline__ __m256i __DEFAULT_FN_ATTRS256
4859_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4860{
4861 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4862 (__v8si)_mm256_srl_epi32(__A, __B),
4863 (__v8si)__W);
4864}
4865
4866static __inline__ __m256i __DEFAULT_FN_ATTRS256
4867_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4868{
4869 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4870 (__v8si)_mm256_srl_epi32(__A, __B),
4871 (__v8si)_mm256_setzero_si256());
4872}
4873
4874static __inline__ __m128i __DEFAULT_FN_ATTRS128
4875_mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4876{
4877 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4878 (__v4si)_mm_srli_epi32(__A, (int)__B),
4879 (__v4si)__W);
4880}
4881
4882static __inline__ __m128i __DEFAULT_FN_ATTRS128
4883_mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
4884{
4885 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4886 (__v4si)_mm_srli_epi32(__A, (int)__B),
4887 (__v4si)_mm_setzero_si128());
4888}
4889
4890static __inline__ __m256i __DEFAULT_FN_ATTRS256
4891_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4892{
4893 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4894 (__v8si)_mm256_srli_epi32(__A, (int)__B),
4895 (__v8si)__W);
4896}
4897
4898static __inline__ __m256i __DEFAULT_FN_ATTRS256
4899_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
4900{
4901 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4902 (__v8si)_mm256_srli_epi32(__A, (int)__B),
4903 (__v8si)_mm256_setzero_si256());
4904}
4905
4906static __inline__ __m128i __DEFAULT_FN_ATTRS128
4907_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4908{
4909 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4910 (__v2di)_mm_srl_epi64(__A, __B),
4911 (__v2di)__W);
4912}
4913
4914static __inline__ __m128i __DEFAULT_FN_ATTRS128
4915_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4916{
4917 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4918 (__v2di)_mm_srl_epi64(__A, __B),
4919 (__v2di)_mm_setzero_si128());
4920}
4921
4922static __inline__ __m256i __DEFAULT_FN_ATTRS256
4923_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4924{
4925 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4926 (__v4di)_mm256_srl_epi64(__A, __B),
4927 (__v4di)__W);
4928}
4929
4930static __inline__ __m256i __DEFAULT_FN_ATTRS256
4931_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4932{
4933 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4934 (__v4di)_mm256_srl_epi64(__A, __B),
4935 (__v4di)_mm256_setzero_si256());
4936}
4937
4938static __inline__ __m128i __DEFAULT_FN_ATTRS128
4939_mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4940{
4941 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4942 (__v2di)_mm_srli_epi64(__A, (int)__B),
4943 (__v2di)__W);
4944}
4945
4946static __inline__ __m128i __DEFAULT_FN_ATTRS128
4947_mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
4948{
4949 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4950 (__v2di)_mm_srli_epi64(__A, (int)__B),
4951 (__v2di)_mm_setzero_si128());
4952}
4953
4954static __inline__ __m256i __DEFAULT_FN_ATTRS256
4955_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4956{
4957 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4958 (__v4di)_mm256_srli_epi64(__A, (int)__B),
4959 (__v4di)__W);
4960}
4961
4962static __inline__ __m256i __DEFAULT_FN_ATTRS256
4963_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
4964{
4965 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4966 (__v4di)_mm256_srli_epi64(__A, (int)__B),
4967 (__v4di)_mm256_setzero_si256());
4968}
4969
4970static __inline__ __m128i __DEFAULT_FN_ATTRS128
4971_mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4972{
4973 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4974 (__v4si)_mm_srav_epi32(__X, __Y),
4975 (__v4si)__W);
4976}
4977
4978static __inline__ __m128i __DEFAULT_FN_ATTRS128
4979_mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4980{
4981 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4982 (__v4si)_mm_srav_epi32(__X, __Y),
4983 (__v4si)_mm_setzero_si128());
4984}
4985
4986static __inline__ __m256i __DEFAULT_FN_ATTRS256
4987_mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4988{
4989 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4990 (__v8si)_mm256_srav_epi32(__X, __Y),
4991 (__v8si)__W);
4992}
4993
4994static __inline__ __m256i __DEFAULT_FN_ATTRS256
4995_mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4996{
4997 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4998 (__v8si)_mm256_srav_epi32(__X, __Y),
4999 (__v8si)_mm256_setzero_si256());
5000}
5001
5002static __inline__ __m128i __DEFAULT_FN_ATTRS128
5003_mm_srav_epi64(__m128i __X, __m128i __Y)
5004{
5005 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
5006}
5007
5008static __inline__ __m128i __DEFAULT_FN_ATTRS128
5009_mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5010{
5011 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5012 (__v2di)_mm_srav_epi64(__X, __Y),
5013 (__v2di)__W);
5014}
5015
5016static __inline__ __m128i __DEFAULT_FN_ATTRS128
5017_mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
5018{
5019 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5020 (__v2di)_mm_srav_epi64(__X, __Y),
5021 (__v2di)_mm_setzero_si128());
5022}
5023
5024static __inline__ __m256i __DEFAULT_FN_ATTRS256
5025_mm256_srav_epi64(__m256i __X, __m256i __Y)
5026{
5027 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
5028}
5029
5030static __inline__ __m256i __DEFAULT_FN_ATTRS256
5031_mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5032{
5033 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5034 (__v4di)_mm256_srav_epi64(__X, __Y),
5035 (__v4di)__W);
5036}
5037
5038static __inline__ __m256i __DEFAULT_FN_ATTRS256
5039_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
5040{
5041 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5042 (__v4di)_mm256_srav_epi64(__X, __Y),
5043 (__v4di)_mm256_setzero_si256());
5044}
5045
5046static __inline__ __m128i __DEFAULT_FN_ATTRS128
5047_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5048{
5049 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5050 (__v4si) __A,
5051 (__v4si) __W);
5052}
5053
5054static __inline__ __m128i __DEFAULT_FN_ATTRS128
5056{
5057 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5058 (__v4si) __A,
5059 (__v4si) _mm_setzero_si128 ());
5060}
5061
5062
5063static __inline__ __m256i __DEFAULT_FN_ATTRS256
5064_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5065{
5066 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5067 (__v8si) __A,
5068 (__v8si) __W);
5069}
5070
5071static __inline__ __m256i __DEFAULT_FN_ATTRS256
5073{
5074 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5075 (__v8si) __A,
5076 (__v8si) _mm256_setzero_si256 ());
5077}
5078
5079static __inline __m128i __DEFAULT_FN_ATTRS128
5080_mm_load_epi32 (void const *__P)
5081{
5082 return *(const __m128i *) __P;
5083}
5084
5085static __inline__ __m128i __DEFAULT_FN_ATTRS128
5086_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5087{
5088 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
5089 (__v4si) __W,
5090 (__mmask8)
5091 __U);
5092}
5093
5094static __inline__ __m128i __DEFAULT_FN_ATTRS128
5096{
5097 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
5098 (__v4si)
5100 (__mmask8)
5101 __U);
5102}
5103
5104static __inline __m256i __DEFAULT_FN_ATTRS256
5106{
5107 return *(const __m256i *) __P;
5108}
5109
5110static __inline__ __m256i __DEFAULT_FN_ATTRS256
5111_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5112{
5113 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
5114 (__v8si) __W,
5115 (__mmask8)
5116 __U);
5117}
5118
5119static __inline__ __m256i __DEFAULT_FN_ATTRS256
5121{
5122 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
5123 (__v8si)
5125 (__mmask8)
5126 __U);
5127}
5128
5129static __inline void __DEFAULT_FN_ATTRS128
5130_mm_store_epi32 (void *__P, __m128i __A)
5131{
5132 *(__m128i *) __P = __A;
5133}
5134
5135static __inline__ void __DEFAULT_FN_ATTRS128
5136_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
5137{
5138 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5139 (__v4si) __A,
5140 (__mmask8) __U);
5141}
5142
5143static __inline void __DEFAULT_FN_ATTRS256
5144_mm256_store_epi32 (void *__P, __m256i __A)
5145{
5146 *(__m256i *) __P = __A;
5147}
5148
5149static __inline__ void __DEFAULT_FN_ATTRS256
5150_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
5151{
5152 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5153 (__v8si) __A,
5154 (__mmask8) __U);
5155}
5156
5157static __inline__ __m128i __DEFAULT_FN_ATTRS128
5158_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5159{
5160 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5161 (__v2di) __A,
5162 (__v2di) __W);
5163}
5164
5165static __inline__ __m128i __DEFAULT_FN_ATTRS128
5167{
5168 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5169 (__v2di) __A,
5170 (__v2di) _mm_setzero_si128 ());
5171}
5172
5173static __inline__ __m256i __DEFAULT_FN_ATTRS256
5174_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5175{
5176 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5177 (__v4di) __A,
5178 (__v4di) __W);
5179}
5180
5181static __inline__ __m256i __DEFAULT_FN_ATTRS256
5183{
5184 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5185 (__v4di) __A,
5186 (__v4di) _mm256_setzero_si256 ());
5187}
5188
5189static __inline __m128i __DEFAULT_FN_ATTRS128
5190_mm_load_epi64 (void const *__P)
5191{
5192 return *(const __m128i *) __P;
5193}
5194
5195static __inline__ __m128i __DEFAULT_FN_ATTRS128
5196_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5197{
5198 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
5199 (__v2di) __W,
5200 (__mmask8)
5201 __U);
5202}
5203
5204static __inline__ __m128i __DEFAULT_FN_ATTRS128
5206{
5207 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
5208 (__v2di)
5210 (__mmask8)
5211 __U);
5212}
5213
5214static __inline __m256i __DEFAULT_FN_ATTRS256
5216{
5217 return *(const __m256i *) __P;
5218}
5219
5220static __inline__ __m256i __DEFAULT_FN_ATTRS256
5221_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5222{
5223 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
5224 (__v4di) __W,
5225 (__mmask8)
5226 __U);
5227}
5228
5229static __inline__ __m256i __DEFAULT_FN_ATTRS256
5231{
5232 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
5233 (__v4di)
5235 (__mmask8)
5236 __U);
5237}
5238
5239static __inline void __DEFAULT_FN_ATTRS128
5240_mm_store_epi64 (void *__P, __m128i __A)
5241{
5242 *(__m128i *) __P = __A;
5243}
5244
5245static __inline__ void __DEFAULT_FN_ATTRS128
5246_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
5247{
5248 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5249 (__v2di) __A,
5250 (__mmask8) __U);
5251}
5252
5253static __inline void __DEFAULT_FN_ATTRS256
5254_mm256_store_epi64 (void *__P, __m256i __A)
5255{
5256 *(__m256i *) __P = __A;
5257}
5258
5259static __inline__ void __DEFAULT_FN_ATTRS256
5260_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
5261{
5262 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5263 (__v4di) __A,
5264 (__mmask8) __U);
5265}
5266
5267static __inline__ __m128d __DEFAULT_FN_ATTRS128
5268_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5269{
5270 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5271 (__v2df)_mm_movedup_pd(__A),
5272 (__v2df)__W);
5273}
5274
5275static __inline__ __m128d __DEFAULT_FN_ATTRS128
5277{
5278 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5279 (__v2df)_mm_movedup_pd(__A),
5280 (__v2df)_mm_setzero_pd());
5281}
5282
5283static __inline__ __m256d __DEFAULT_FN_ATTRS256
5284_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5285{
5286 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5287 (__v4df)_mm256_movedup_pd(__A),
5288 (__v4df)__W);
5289}
5290
5291static __inline__ __m256d __DEFAULT_FN_ATTRS256
5293{
5294 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5295 (__v4df)_mm256_movedup_pd(__A),
5296 (__v4df)_mm256_setzero_pd());
5297}
5298
5299static __inline__ __m128i __DEFAULT_FN_ATTRS128
5300_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
5301{
5302 return (__m128i)__builtin_ia32_selectd_128(__M,
5303 (__v4si) _mm_set1_epi32(__A),
5304 (__v4si)__O);
5305}
5306
5307static __inline__ __m128i __DEFAULT_FN_ATTRS128
5309{
5310 return (__m128i)__builtin_ia32_selectd_128(__M,
5311 (__v4si) _mm_set1_epi32(__A),
5312 (__v4si)_mm_setzero_si128());
5313}
5314
5315static __inline__ __m256i __DEFAULT_FN_ATTRS256
5316_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
5317{
5318 return (__m256i)__builtin_ia32_selectd_256(__M,
5319 (__v8si) _mm256_set1_epi32(__A),
5320 (__v8si)__O);
5321}
5322
5323static __inline__ __m256i __DEFAULT_FN_ATTRS256
5325{
5326 return (__m256i)__builtin_ia32_selectd_256(__M,
5327 (__v8si) _mm256_set1_epi32(__A),
5328 (__v8si)_mm256_setzero_si256());
5329}
5330
5331
5332static __inline__ __m128i __DEFAULT_FN_ATTRS128
5333_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
5334{
5335 return (__m128i) __builtin_ia32_selectq_128(__M,
5336 (__v2di) _mm_set1_epi64x(__A),
5337 (__v2di) __O);
5338}
5339
5340static __inline__ __m128i __DEFAULT_FN_ATTRS128
5341_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
5342{
5343 return (__m128i) __builtin_ia32_selectq_128(__M,
5344 (__v2di) _mm_set1_epi64x(__A),
5345 (__v2di) _mm_setzero_si128());
5346}
5347
5348static __inline__ __m256i __DEFAULT_FN_ATTRS256
5349_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
5350{
5351 return (__m256i) __builtin_ia32_selectq_256(__M,
5352 (__v4di) _mm256_set1_epi64x(__A),
5353 (__v4di) __O) ;
5354}
5355
5356static __inline__ __m256i __DEFAULT_FN_ATTRS256
5358{
5359 return (__m256i) __builtin_ia32_selectq_256(__M,
5360 (__v4di) _mm256_set1_epi64x(__A),
5361 (__v4di) _mm256_setzero_si256());
5362}
5363
5364#define _mm_fixupimm_pd(A, B, C, imm) \
5365 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5366 (__v2df)(__m128d)(B), \
5367 (__v2di)(__m128i)(C), (int)(imm), \
5368 (__mmask8)-1))
5369
5370#define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
5371 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5372 (__v2df)(__m128d)(B), \
5373 (__v2di)(__m128i)(C), (int)(imm), \
5374 (__mmask8)(U)))
5375
5376#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
5377 ((__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5378 (__v2df)(__m128d)(B), \
5379 (__v2di)(__m128i)(C), \
5380 (int)(imm), (__mmask8)(U)))
5381
5382#define _mm256_fixupimm_pd(A, B, C, imm) \
5383 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5384 (__v4df)(__m256d)(B), \
5385 (__v4di)(__m256i)(C), (int)(imm), \
5386 (__mmask8)-1))
5387
5388#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
5389 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5390 (__v4df)(__m256d)(B), \
5391 (__v4di)(__m256i)(C), (int)(imm), \
5392 (__mmask8)(U)))
5393
5394#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
5395 ((__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5396 (__v4df)(__m256d)(B), \
5397 (__v4di)(__m256i)(C), \
5398 (int)(imm), (__mmask8)(U)))
5399
5400#define _mm_fixupimm_ps(A, B, C, imm) \
5401 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5402 (__v4sf)(__m128)(B), \
5403 (__v4si)(__m128i)(C), (int)(imm), \
5404 (__mmask8)-1))
5405
5406#define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
5407 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5408 (__v4sf)(__m128)(B), \
5409 (__v4si)(__m128i)(C), (int)(imm), \
5410 (__mmask8)(U)))
5411
5412#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
5413 ((__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5414 (__v4sf)(__m128)(B), \
5415 (__v4si)(__m128i)(C), (int)(imm), \
5416 (__mmask8)(U)))
5417
5418#define _mm256_fixupimm_ps(A, B, C, imm) \
5419 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5420 (__v8sf)(__m256)(B), \
5421 (__v8si)(__m256i)(C), (int)(imm), \
5422 (__mmask8)-1))
5423
5424#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
5425 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5426 (__v8sf)(__m256)(B), \
5427 (__v8si)(__m256i)(C), (int)(imm), \
5428 (__mmask8)(U)))
5429
5430#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
5431 ((__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5432 (__v8sf)(__m256)(B), \
5433 (__v8si)(__m256i)(C), (int)(imm), \
5434 (__mmask8)(U)))
5435
5436static __inline__ __m128d __DEFAULT_FN_ATTRS128
5437_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
5438{
5439 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
5440 (__v2df) __W,
5441 (__mmask8) __U);
5442}
5443
5444static __inline__ __m128d __DEFAULT_FN_ATTRS128
5446{
5447 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
5448 (__v2df)
5449 _mm_setzero_pd (),
5450 (__mmask8) __U);
5451}
5452
5453static __inline__ __m256d __DEFAULT_FN_ATTRS256
5454_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
5455{
5456 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
5457 (__v4df) __W,
5458 (__mmask8) __U);
5459}
5460
5461static __inline__ __m256d __DEFAULT_FN_ATTRS256
5463{
5464 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
5465 (__v4df)
5467 (__mmask8) __U);
5468}
5469
5470static __inline__ __m128 __DEFAULT_FN_ATTRS128
5471_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
5472{
5473 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
5474 (__v4sf) __W,
5475 (__mmask8) __U);
5476}
5477
5478static __inline__ __m128 __DEFAULT_FN_ATTRS128
5480{
5481 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
5482 (__v4sf)
5483 _mm_setzero_ps (),
5484 (__mmask8) __U);
5485}
5486
5487static __inline__ __m256 __DEFAULT_FN_ATTRS256
5488_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
5489{
5490 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
5491 (__v8sf) __W,
5492 (__mmask8) __U);
5493}
5494
5495static __inline__ __m256 __DEFAULT_FN_ATTRS256
5497{
5498 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
5499 (__v8sf)
5501 (__mmask8) __U);
5502}
5503
5504static __inline __m128i __DEFAULT_FN_ATTRS128
5506{
5507 struct __loadu_epi64 {
5508 __m128i_u __v;
5509 } __attribute__((__packed__, __may_alias__));
5510 return ((const struct __loadu_epi64*)__P)->__v;
5511}
5512
5513static __inline__ __m128i __DEFAULT_FN_ATTRS128
5514_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5515{
5516 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
5517 (__v2di) __W,
5518 (__mmask8) __U);
5519}
5520
5521static __inline__ __m128i __DEFAULT_FN_ATTRS128
5523{
5524 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
5525 (__v2di)
5527 (__mmask8) __U);
5528}
5529
5530static __inline __m256i __DEFAULT_FN_ATTRS256
5532{
5533 struct __loadu_epi64 {
5534 __m256i_u __v;
5535 } __attribute__((__packed__, __may_alias__));
5536 return ((const struct __loadu_epi64*)__P)->__v;
5537}
5538
5539static __inline__ __m256i __DEFAULT_FN_ATTRS256
5540_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5541{
5542 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
5543 (__v4di) __W,
5544 (__mmask8) __U);
5545}
5546
5547static __inline__ __m256i __DEFAULT_FN_ATTRS256
5549{
5550 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
5551 (__v4di)
5553 (__mmask8) __U);
5554}
5555
5556static __inline __m128i __DEFAULT_FN_ATTRS128
5558{
5559 struct __loadu_epi32 {
5560 __m128i_u __v;
5561 } __attribute__((__packed__, __may_alias__));
5562 return ((const struct __loadu_epi32*)__P)->__v;
5563}
5564
5565static __inline__ __m128i __DEFAULT_FN_ATTRS128
5566_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5567{
5568 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
5569 (__v4si) __W,
5570 (__mmask8) __U);
5571}
5572
5573static __inline__ __m128i __DEFAULT_FN_ATTRS128
5575{
5576 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
5577 (__v4si)
5579 (__mmask8) __U);
5580}
5581
5582static __inline __m256i __DEFAULT_FN_ATTRS256
5584{
5585 struct __loadu_epi32 {
5586 __m256i_u __v;
5587 } __attribute__((__packed__, __may_alias__));
5588 return ((const struct __loadu_epi32*)__P)->__v;
5589}
5590
5591static __inline__ __m256i __DEFAULT_FN_ATTRS256
5592_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5593{
5594 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
5595 (__v8si) __W,
5596 (__mmask8) __U);
5597}
5598
5599static __inline__ __m256i __DEFAULT_FN_ATTRS256
5601{
5602 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
5603 (__v8si)
5605 (__mmask8) __U);
5606}
5607
5608static __inline__ __m128d __DEFAULT_FN_ATTRS128
5609_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
5610{
5611 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
5612 (__v2df) __W,
5613 (__mmask8) __U);
5614}
5615
5616static __inline__ __m128d __DEFAULT_FN_ATTRS128
5618{
5619 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
5620 (__v2df)
5621 _mm_setzero_pd (),
5622 (__mmask8) __U);
5623}
5624
5625static __inline__ __m256d __DEFAULT_FN_ATTRS256
5626_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5627{
5628 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
5629 (__v4df) __W,
5630 (__mmask8) __U);
5631}
5632
5633static __inline__ __m256d __DEFAULT_FN_ATTRS256
5635{
5636 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
5637 (__v4df)
5639 (__mmask8) __U);
5640}
5641
5642static __inline__ __m128 __DEFAULT_FN_ATTRS128
5643_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
5644{
5645 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
5646 (__v4sf) __W,
5647 (__mmask8) __U);
5648}
5649
5650static __inline__ __m128 __DEFAULT_FN_ATTRS128
5652{
5653 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
5654 (__v4sf)
5655 _mm_setzero_ps (),
5656 (__mmask8) __U);
5657}
5658
5659static __inline__ __m256 __DEFAULT_FN_ATTRS256
5660_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
5661{
5662 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
5663 (__v8sf) __W,
5664 (__mmask8) __U);
5665}
5666
5667static __inline__ __m256 __DEFAULT_FN_ATTRS256
5669{
5670 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
5671 (__v8sf)
5673 (__mmask8) __U);
5674}
5675
5676static __inline__ void __DEFAULT_FN_ATTRS128
5677_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
5678{
5679 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
5680 (__v2df) __A,
5681 (__mmask8) __U);
5682}
5683
5684static __inline__ void __DEFAULT_FN_ATTRS256
5685_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
5686{
5687 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
5688 (__v4df) __A,
5689 (__mmask8) __U);
5690}
5691
5692static __inline__ void __DEFAULT_FN_ATTRS128
5693_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
5694{
5695 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
5696 (__v4sf) __A,
5697 (__mmask8) __U);
5698}
5699
5700static __inline__ void __DEFAULT_FN_ATTRS256
5701_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
5702{
5703 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
5704 (__v8sf) __A,
5705 (__mmask8) __U);
5706}
5707
5708static __inline void __DEFAULT_FN_ATTRS128
5709_mm_storeu_epi64 (void *__P, __m128i __A)
5710{
5711 struct __storeu_epi64 {
5712 __m128i_u __v;
5713 } __attribute__((__packed__, __may_alias__));
5714 ((struct __storeu_epi64*)__P)->__v = __A;
5715}
5716
5717static __inline__ void __DEFAULT_FN_ATTRS128
5718_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5719{
5720 __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
5721 (__v2di) __A,
5722 (__mmask8) __U);
5723}
5724
5725static __inline void __DEFAULT_FN_ATTRS256
5726_mm256_storeu_epi64 (void *__P, __m256i __A)
5727{
5728 struct __storeu_epi64 {
5729 __m256i_u __v;
5730 } __attribute__((__packed__, __may_alias__));
5731 ((struct __storeu_epi64*)__P)->__v = __A;
5732}
5733
5734static __inline__ void __DEFAULT_FN_ATTRS256
5735_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5736{
5737 __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
5738 (__v4di) __A,
5739 (__mmask8) __U);
5740}
5741
5742static __inline void __DEFAULT_FN_ATTRS128
5743_mm_storeu_epi32 (void *__P, __m128i __A)
5744{
5745 struct __storeu_epi32 {
5746 __m128i_u __v;
5747 } __attribute__((__packed__, __may_alias__));
5748 ((struct __storeu_epi32*)__P)->__v = __A;
5749}
5750
5751static __inline__ void __DEFAULT_FN_ATTRS128
5752_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5753{
5754 __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
5755 (__v4si) __A,
5756 (__mmask8) __U);
5757}
5758
5759static __inline void __DEFAULT_FN_ATTRS256
5760_mm256_storeu_epi32 (void *__P, __m256i __A)
5761{
5762 struct __storeu_epi32 {
5763 __m256i_u __v;
5764 } __attribute__((__packed__, __may_alias__));
5765 ((struct __storeu_epi32*)__P)->__v = __A;
5766}
5767
5768static __inline__ void __DEFAULT_FN_ATTRS256
5769_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5770{
5771 __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
5772 (__v8si) __A,
5773 (__mmask8) __U);
5774}
5775
5776static __inline__ void __DEFAULT_FN_ATTRS128
5777_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
5778{
5779 __builtin_ia32_storeupd128_mask ((__v2df *) __P,
5780 (__v2df) __A,
5781 (__mmask8) __U);
5782}
5783
5784static __inline__ void __DEFAULT_FN_ATTRS256
5785_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
5786{
5787 __builtin_ia32_storeupd256_mask ((__v4df *) __P,
5788 (__v4df) __A,
5789 (__mmask8) __U);
5790}
5791
5792static __inline__ void __DEFAULT_FN_ATTRS128
5793_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
5794{
5795 __builtin_ia32_storeups128_mask ((__v4sf *) __P,
5796 (__v4sf) __A,
5797 (__mmask8) __U);
5798}
5799
5800static __inline__ void __DEFAULT_FN_ATTRS256
5801_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
5802{
5803 __builtin_ia32_storeups256_mask ((__v8sf *) __P,
5804 (__v8sf) __A,
5805 (__mmask8) __U);
5806}
5807
5808
5809static __inline__ __m128d __DEFAULT_FN_ATTRS128
5810_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5811{
5812 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5813 (__v2df)_mm_unpackhi_pd(__A, __B),
5814 (__v2df)__W);
5815}
5816
5817static __inline__ __m128d __DEFAULT_FN_ATTRS128
5818_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
5819{
5820 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5821 (__v2df)_mm_unpackhi_pd(__A, __B),
5822 (__v2df)_mm_setzero_pd());
5823}
5824
5825static __inline__ __m256d __DEFAULT_FN_ATTRS256
5826_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5827{
5828 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5829 (__v4df)_mm256_unpackhi_pd(__A, __B),
5830 (__v4df)__W);
5831}
5832
5833static __inline__ __m256d __DEFAULT_FN_ATTRS256
5834_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
5835{
5836 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5837 (__v4df)_mm256_unpackhi_pd(__A, __B),
5838 (__v4df)_mm256_setzero_pd());
5839}
5840
5841static __inline__ __m128 __DEFAULT_FN_ATTRS128
5842_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5843{
5844 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5845 (__v4sf)_mm_unpackhi_ps(__A, __B),
5846 (__v4sf)__W);
5847}
5848
5849static __inline__ __m128 __DEFAULT_FN_ATTRS128
5850_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
5851{
5852 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5853 (__v4sf)_mm_unpackhi_ps(__A, __B),
5854 (__v4sf)_mm_setzero_ps());
5855}
5856
5857static __inline__ __m256 __DEFAULT_FN_ATTRS256
5858_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5859{
5860 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5861 (__v8sf)_mm256_unpackhi_ps(__A, __B),
5862 (__v8sf)__W);
5863}
5864
5865static __inline__ __m256 __DEFAULT_FN_ATTRS256
5866_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
5867{
5868 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5869 (__v8sf)_mm256_unpackhi_ps(__A, __B),
5870 (__v8sf)_mm256_setzero_ps());
5871}
5872
5873static __inline__ __m128d __DEFAULT_FN_ATTRS128
5874_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5875{
5876 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5877 (__v2df)_mm_unpacklo_pd(__A, __B),
5878 (__v2df)__W);
5879}
5880
5881static __inline__ __m128d __DEFAULT_FN_ATTRS128
5882_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
5883{
5884 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5885 (__v2df)_mm_unpacklo_pd(__A, __B),
5886 (__v2df)_mm_setzero_pd());
5887}
5888
5889static __inline__ __m256d __DEFAULT_FN_ATTRS256
5890_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5891{
5892 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5893 (__v4df)_mm256_unpacklo_pd(__A, __B),
5894 (__v4df)__W);
5895}
5896
5897static __inline__ __m256d __DEFAULT_FN_ATTRS256
5898_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
5899{
5900 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5901 (__v4df)_mm256_unpacklo_pd(__A, __B),
5902 (__v4df)_mm256_setzero_pd());
5903}
5904
5905static __inline__ __m128 __DEFAULT_FN_ATTRS128
5906_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5907{
5908 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5909 (__v4sf)_mm_unpacklo_ps(__A, __B),
5910 (__v4sf)__W);
5911}
5912
5913static __inline__ __m128 __DEFAULT_FN_ATTRS128
5914_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
5915{
5916 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5917 (__v4sf)_mm_unpacklo_ps(__A, __B),
5918 (__v4sf)_mm_setzero_ps());
5919}
5920
5921static __inline__ __m256 __DEFAULT_FN_ATTRS256
5922_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5923{
5924 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5925 (__v8sf)_mm256_unpacklo_ps(__A, __B),
5926 (__v8sf)__W);
5927}
5928
5929static __inline__ __m256 __DEFAULT_FN_ATTRS256
5930_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
5931{
5932 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5933 (__v8sf)_mm256_unpacklo_ps(__A, __B),
5934 (__v8sf)_mm256_setzero_ps());
5935}
5936
5937static __inline__ __m128d __DEFAULT_FN_ATTRS128
5938_mm_rcp14_pd (__m128d __A)
5939{
5940 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5941 (__v2df)
5942 _mm_setzero_pd (),
5943 (__mmask8) -1);
5944}
5945
5946static __inline__ __m128d __DEFAULT_FN_ATTRS128
5947_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
5948{
5949 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5950 (__v2df) __W,
5951 (__mmask8) __U);
5952}
5953
5954static __inline__ __m128d __DEFAULT_FN_ATTRS128
5956{
5957 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5958 (__v2df)
5959 _mm_setzero_pd (),
5960 (__mmask8) __U);
5961}
5962
5963static __inline__ __m256d __DEFAULT_FN_ATTRS256
5964_mm256_rcp14_pd (__m256d __A)
5965{
5966 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5967 (__v4df)
5969 (__mmask8) -1);
5970}
5971
5972static __inline__ __m256d __DEFAULT_FN_ATTRS256
5973_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
5974{
5975 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5976 (__v4df) __W,
5977 (__mmask8) __U);
5978}
5979
5980static __inline__ __m256d __DEFAULT_FN_ATTRS256
5982{
5983 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5984 (__v4df)
5986 (__mmask8) __U);
5987}
5988
5989static __inline__ __m128 __DEFAULT_FN_ATTRS128
5990_mm_rcp14_ps (__m128 __A)
5991{
5992 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5993 (__v4sf)
5994 _mm_setzero_ps (),
5995 (__mmask8) -1);
5996}
5997
5998static __inline__ __m128 __DEFAULT_FN_ATTRS128
5999_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6000{
6001 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6002 (__v4sf) __W,
6003 (__mmask8) __U);
6004}
6005
6006static __inline__ __m128 __DEFAULT_FN_ATTRS128
6008{
6009 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6010 (__v4sf)
6011 _mm_setzero_ps (),
6012 (__mmask8) __U);
6013}
6014
6015static __inline__ __m256 __DEFAULT_FN_ATTRS256
6017{
6018 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6019 (__v8sf)
6021 (__mmask8) -1);
6022}
6023
6024static __inline__ __m256 __DEFAULT_FN_ATTRS256
6025_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6026{
6027 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6028 (__v8sf) __W,
6029 (__mmask8) __U);
6030}
6031
6032static __inline__ __m256 __DEFAULT_FN_ATTRS256
6034{
6035 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6036 (__v8sf)
6038 (__mmask8) __U);
6039}
6040
6041#define _mm_mask_permute_pd(W, U, X, C) \
6042 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6043 (__v2df)_mm_permute_pd((X), (C)), \
6044 (__v2df)(__m128d)(W)))
6045
6046#define _mm_maskz_permute_pd(U, X, C) \
6047 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6048 (__v2df)_mm_permute_pd((X), (C)), \
6049 (__v2df)_mm_setzero_pd()))
6050
6051#define _mm256_mask_permute_pd(W, U, X, C) \
6052 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6053 (__v4df)_mm256_permute_pd((X), (C)), \
6054 (__v4df)(__m256d)(W)))
6055
6056#define _mm256_maskz_permute_pd(U, X, C) \
6057 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6058 (__v4df)_mm256_permute_pd((X), (C)), \
6059 (__v4df)_mm256_setzero_pd()))
6060
6061#define _mm_mask_permute_ps(W, U, X, C) \
6062 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6063 (__v4sf)_mm_permute_ps((X), (C)), \
6064 (__v4sf)(__m128)(W)))
6065
6066#define _mm_maskz_permute_ps(U, X, C) \
6067 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6068 (__v4sf)_mm_permute_ps((X), (C)), \
6069 (__v4sf)_mm_setzero_ps()))
6070
6071#define _mm256_mask_permute_ps(W, U, X, C) \
6072 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6073 (__v8sf)_mm256_permute_ps((X), (C)), \
6074 (__v8sf)(__m256)(W)))
6075
6076#define _mm256_maskz_permute_ps(U, X, C) \
6077 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6078 (__v8sf)_mm256_permute_ps((X), (C)), \
6079 (__v8sf)_mm256_setzero_ps()))
6080
6081static __inline__ __m128d __DEFAULT_FN_ATTRS128
6082_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
6083{
6084 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6085 (__v2df)_mm_permutevar_pd(__A, __C),
6086 (__v2df)__W);
6087}
6088
6089static __inline__ __m128d __DEFAULT_FN_ATTRS128
6090_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
6091{
6092 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6093 (__v2df)_mm_permutevar_pd(__A, __C),
6094 (__v2df)_mm_setzero_pd());
6095}
6096
6097static __inline__ __m256d __DEFAULT_FN_ATTRS256
6098_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
6099{
6100 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6101 (__v4df)_mm256_permutevar_pd(__A, __C),
6102 (__v4df)__W);
6103}
6104
6105static __inline__ __m256d __DEFAULT_FN_ATTRS256
6106_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
6107{
6108 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6109 (__v4df)_mm256_permutevar_pd(__A, __C),
6110 (__v4df)_mm256_setzero_pd());
6111}
6112
6113static __inline__ __m128 __DEFAULT_FN_ATTRS128
6114_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
6115{
6116 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6117 (__v4sf)_mm_permutevar_ps(__A, __C),
6118 (__v4sf)__W);
6119}
6120
6121static __inline__ __m128 __DEFAULT_FN_ATTRS128
6122_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
6123{
6124 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6125 (__v4sf)_mm_permutevar_ps(__A, __C),
6126 (__v4sf)_mm_setzero_ps());
6127}
6128
6129static __inline__ __m256 __DEFAULT_FN_ATTRS256
6130_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
6131{
6132 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6133 (__v8sf)_mm256_permutevar_ps(__A, __C),
6134 (__v8sf)__W);
6135}
6136
6137static __inline__ __m256 __DEFAULT_FN_ATTRS256
6138_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
6139{
6140 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6141 (__v8sf)_mm256_permutevar_ps(__A, __C),
6142 (__v8sf)_mm256_setzero_ps());
6143}
6144
6145static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6146_mm_test_epi32_mask (__m128i __A, __m128i __B)
6147{
6149}
6150
6151static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6152_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6153{
6154 return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6156}
6157
6158static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6159_mm256_test_epi32_mask (__m256i __A, __m256i __B)
6160{
6161 return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B),
6163}
6164
6165static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6166_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6167{
6168 return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6170}
6171
6172static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6173_mm_test_epi64_mask (__m128i __A, __m128i __B)
6174{
6176}
6177
6178static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6179_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6180{
6181 return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6183}
6184
6185static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6186_mm256_test_epi64_mask (__m256i __A, __m256i __B)
6187{
6188 return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B),
6190}
6191
6192static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6193_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6194{
6195 return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6197}
6198
6199static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6200_mm_testn_epi32_mask (__m128i __A, __m128i __B)
6201{
6203}
6204
6205static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6206_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6207{
6208 return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6210}
6211
6212static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6213_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
6214{
6215 return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B),
6217}
6218
6219static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6220_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6221{
6222 return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6224}
6225
6226static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6227_mm_testn_epi64_mask (__m128i __A, __m128i __B)
6228{
6230}
6231
6232static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6233_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6234{
6235 return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6237}
6238
6239static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6240_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
6241{
6242 return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B),
6244}
6245
6246static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6247_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6248{
6249 return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6251}
6252
6253static __inline__ __m128i __DEFAULT_FN_ATTRS128
6254_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6255{
6256 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6257 (__v4si)_mm_unpackhi_epi32(__A, __B),
6258 (__v4si)__W);
6259}
6260
6261static __inline__ __m128i __DEFAULT_FN_ATTRS128
6262_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6263{
6264 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6265 (__v4si)_mm_unpackhi_epi32(__A, __B),
6266 (__v4si)_mm_setzero_si128());
6267}
6268
6269static __inline__ __m256i __DEFAULT_FN_ATTRS256
6270_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6271{
6272 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6273 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6274 (__v8si)__W);
6275}
6276
6277static __inline__ __m256i __DEFAULT_FN_ATTRS256
6278_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6279{
6280 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6281 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6282 (__v8si)_mm256_setzero_si256());
6283}
6284
6285static __inline__ __m128i __DEFAULT_FN_ATTRS128
6286_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6287{
6288 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6289 (__v2di)_mm_unpackhi_epi64(__A, __B),
6290 (__v2di)__W);
6291}
6292
6293static __inline__ __m128i __DEFAULT_FN_ATTRS128
6294_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6295{
6296 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6297 (__v2di)_mm_unpackhi_epi64(__A, __B),
6298 (__v2di)_mm_setzero_si128());
6299}
6300
6301static __inline__ __m256i __DEFAULT_FN_ATTRS256
6302_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6303{
6304 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6305 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6306 (__v4di)__W);
6307}
6308
6309static __inline__ __m256i __DEFAULT_FN_ATTRS256
6310_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6311{
6312 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6313 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6314 (__v4di)_mm256_setzero_si256());
6315}
6316
6317static __inline__ __m128i __DEFAULT_FN_ATTRS128
6318_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6319{
6320 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6321 (__v4si)_mm_unpacklo_epi32(__A, __B),
6322 (__v4si)__W);
6323}
6324
6325static __inline__ __m128i __DEFAULT_FN_ATTRS128
6326_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6327{
6328 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6329 (__v4si)_mm_unpacklo_epi32(__A, __B),
6330 (__v4si)_mm_setzero_si128());
6331}
6332
6333static __inline__ __m256i __DEFAULT_FN_ATTRS256
6334_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6335{
6336 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6337 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6338 (__v8si)__W);
6339}
6340
6341static __inline__ __m256i __DEFAULT_FN_ATTRS256
6342_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6343{
6344 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6345 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6346 (__v8si)_mm256_setzero_si256());
6347}
6348
6349static __inline__ __m128i __DEFAULT_FN_ATTRS128
6350_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6351{
6352 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6353 (__v2di)_mm_unpacklo_epi64(__A, __B),
6354 (__v2di)__W);
6355}
6356
6357static __inline__ __m128i __DEFAULT_FN_ATTRS128
6358_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6359{
6360 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6361 (__v2di)_mm_unpacklo_epi64(__A, __B),
6362 (__v2di)_mm_setzero_si128());
6363}
6364
6365static __inline__ __m256i __DEFAULT_FN_ATTRS256
6366_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6367{
6368 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6369 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6370 (__v4di)__W);
6371}
6372
6373static __inline__ __m256i __DEFAULT_FN_ATTRS256
6374_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6375{
6376 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6377 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6378 (__v4di)_mm256_setzero_si256());
6379}
6380
6381static __inline__ __m128i __DEFAULT_FN_ATTRS128
6382_mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6383{
6384 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6385 (__v4si)_mm_sra_epi32(__A, __B),
6386 (__v4si)__W);
6387}
6388
6389static __inline__ __m128i __DEFAULT_FN_ATTRS128
6390_mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6391{
6392 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6393 (__v4si)_mm_sra_epi32(__A, __B),
6394 (__v4si)_mm_setzero_si128());
6395}
6396
6397static __inline__ __m256i __DEFAULT_FN_ATTRS256
6398_mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6399{
6400 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6401 (__v8si)_mm256_sra_epi32(__A, __B),
6402 (__v8si)__W);
6403}
6404
6405static __inline__ __m256i __DEFAULT_FN_ATTRS256
6406_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
6407{
6408 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6409 (__v8si)_mm256_sra_epi32(__A, __B),
6410 (__v8si)_mm256_setzero_si256());
6411}
6412
6413static __inline__ __m128i __DEFAULT_FN_ATTRS128
6414_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
6415{
6416 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6417 (__v4si)_mm_srai_epi32(__A, (int)__B),
6418 (__v4si)__W);
6419}
6420
6421static __inline__ __m128i __DEFAULT_FN_ATTRS128
6422_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
6423{
6424 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6425 (__v4si)_mm_srai_epi32(__A, (int)__B),
6426 (__v4si)_mm_setzero_si128());
6427}
6428
6429static __inline__ __m256i __DEFAULT_FN_ATTRS256
6430_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
6431{
6432 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6433 (__v8si)_mm256_srai_epi32(__A, (int)__B),
6434 (__v8si)__W);
6435}
6436
6437static __inline__ __m256i __DEFAULT_FN_ATTRS256
6438_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
6439{
6440 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6441 (__v8si)_mm256_srai_epi32(__A, (int)__B),
6442 (__v8si)_mm256_setzero_si256());
6443}
6444
6445static __inline__ __m128i __DEFAULT_FN_ATTRS128
6446_mm_sra_epi64(__m128i __A, __m128i __B)
6447{
6448 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
6449}
6450
6451static __inline__ __m128i __DEFAULT_FN_ATTRS128
6452_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6453{
6454 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6455 (__v2di)_mm_sra_epi64(__A, __B), \
6456 (__v2di)__W);
6457}
6458
6459static __inline__ __m128i __DEFAULT_FN_ATTRS128
6460_mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6461{
6462 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6463 (__v2di)_mm_sra_epi64(__A, __B), \
6464 (__v2di)_mm_setzero_si128());
6465}
6466
6467static __inline__ __m256i __DEFAULT_FN_ATTRS256
6468_mm256_sra_epi64(__m256i __A, __m128i __B)
6469{
6470 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
6471}
6472
6473static __inline__ __m256i __DEFAULT_FN_ATTRS256
6474_mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6475{
6476 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6477 (__v4di)_mm256_sra_epi64(__A, __B), \
6478 (__v4di)__W);
6479}
6480
6481static __inline__ __m256i __DEFAULT_FN_ATTRS256
6482_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
6483{
6484 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6485 (__v4di)_mm256_sra_epi64(__A, __B), \
6486 (__v4di)_mm256_setzero_si256());
6487}
6488
6489static __inline__ __m128i __DEFAULT_FN_ATTRS128
6490_mm_srai_epi64(__m128i __A, unsigned int __imm)
6491{
6492 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, (int)__imm);
6493}
6494
6495static __inline__ __m128i __DEFAULT_FN_ATTRS128
6496_mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm)
6497{
6498 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6499 (__v2di)_mm_srai_epi64(__A, __imm), \
6500 (__v2di)__W);
6501}
6502
6503static __inline__ __m128i __DEFAULT_FN_ATTRS128
6504_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm)
6505{
6506 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6507 (__v2di)_mm_srai_epi64(__A, __imm), \
6508 (__v2di)_mm_setzero_si128());
6509}
6510
6511static __inline__ __m256i __DEFAULT_FN_ATTRS256
6512_mm256_srai_epi64(__m256i __A, unsigned int __imm)
6513{
6514 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, (int)__imm);
6515}
6516
6517static __inline__ __m256i __DEFAULT_FN_ATTRS256
6518_mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A,
6519 unsigned int __imm)
6520{
6521 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6522 (__v4di)_mm256_srai_epi64(__A, __imm), \
6523 (__v4di)__W);
6524}
6525
6526static __inline__ __m256i __DEFAULT_FN_ATTRS256
6527_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm)
6528{
6529 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6530 (__v4di)_mm256_srai_epi64(__A, __imm), \
6531 (__v4di)_mm256_setzero_si256());
6532}
6533
6534#define _mm_ternarylogic_epi32(A, B, C, imm) \
6535 ((__m128i)__builtin_ia32_pternlogd128_mask( \
6536 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \
6537 (unsigned char)(imm), (__mmask8)-1))
6538
6539#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \
6540 ((__m128i)__builtin_ia32_pternlogd128_mask( \
6541 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \
6542 (unsigned char)(imm), (__mmask8)(U)))
6543
6544#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6545 ((__m128i)__builtin_ia32_pternlogd128_maskz( \
6546 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \
6547 (unsigned char)(imm), (__mmask8)(U)))
6548
6549#define _mm256_ternarylogic_epi32(A, B, C, imm) \
6550 ((__m256i)__builtin_ia32_pternlogd256_mask( \
6551 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \
6552 (unsigned char)(imm), (__mmask8)-1))
6553
6554#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \
6555 ((__m256i)__builtin_ia32_pternlogd256_mask( \
6556 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \
6557 (unsigned char)(imm), (__mmask8)(U)))
6558
6559#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6560 ((__m256i)__builtin_ia32_pternlogd256_maskz( \
6561 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \
6562 (unsigned char)(imm), (__mmask8)(U)))
6563
6564#define _mm_ternarylogic_epi64(A, B, C, imm) \
6565 ((__m128i)__builtin_ia32_pternlogq128_mask( \
6566 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \
6567 (unsigned char)(imm), (__mmask8)-1))
6568
6569#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \
6570 ((__m128i)__builtin_ia32_pternlogq128_mask( \
6571 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \
6572 (unsigned char)(imm), (__mmask8)(U)))
6573
6574#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6575 ((__m128i)__builtin_ia32_pternlogq128_maskz( \
6576 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \
6577 (unsigned char)(imm), (__mmask8)(U)))
6578
6579#define _mm256_ternarylogic_epi64(A, B, C, imm) \
6580 ((__m256i)__builtin_ia32_pternlogq256_mask( \
6581 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \
6582 (unsigned char)(imm), (__mmask8)-1))
6583
6584#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \
6585 ((__m256i)__builtin_ia32_pternlogq256_mask( \
6586 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \
6587 (unsigned char)(imm), (__mmask8)(U)))
6588
6589#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6590 ((__m256i)__builtin_ia32_pternlogq256_maskz( \
6591 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \
6592 (unsigned char)(imm), (__mmask8)(U)))
6593
6594#define _mm256_shuffle_f32x4(A, B, imm) \
6595 ((__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
6596 (__v8sf)(__m256)(B), (int)(imm)))
6597
6598#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \
6599 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6600 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6601 (__v8sf)(__m256)(W)))
6602
6603#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \
6604 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6605 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6606 (__v8sf)_mm256_setzero_ps()))
6607
6608#define _mm256_shuffle_f64x2(A, B, imm) \
6609 ((__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
6610 (__v4df)(__m256d)(B), (int)(imm)))
6611
6612#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \
6613 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6614 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6615 (__v4df)(__m256d)(W)))
6616
6617#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \
6618 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6619 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6620 (__v4df)_mm256_setzero_pd()))
6621
6622#define _mm256_shuffle_i32x4(A, B, imm) \
6623 ((__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
6624 (__v8si)(__m256i)(B), (int)(imm)))
6625
6626#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \
6627 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6628 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6629 (__v8si)(__m256i)(W)))
6630
6631#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \
6632 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6633 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6634 (__v8si)_mm256_setzero_si256()))
6635
6636#define _mm256_shuffle_i64x2(A, B, imm) \
6637 ((__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
6638 (__v4di)(__m256i)(B), (int)(imm)))
6639
6640#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \
6641 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6642 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6643 (__v4di)(__m256i)(W)))
6644
6645
6646#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \
6647 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6648 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6649 (__v4di)_mm256_setzero_si256()))
6650
6651#define _mm_mask_shuffle_pd(W, U, A, B, M) \
6652 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6653 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6654 (__v2df)(__m128d)(W)))
6655
6656#define _mm_maskz_shuffle_pd(U, A, B, M) \
6657 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6658 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6659 (__v2df)_mm_setzero_pd()))
6660
6661#define _mm256_mask_shuffle_pd(W, U, A, B, M) \
6662 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6663 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6664 (__v4df)(__m256d)(W)))
6665
6666#define _mm256_maskz_shuffle_pd(U, A, B, M) \
6667 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6668 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6669 (__v4df)_mm256_setzero_pd()))
6670
6671#define _mm_mask_shuffle_ps(W, U, A, B, M) \
6672 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6673 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6674 (__v4sf)(__m128)(W)))
6675
6676#define _mm_maskz_shuffle_ps(U, A, B, M) \
6677 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6678 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6679 (__v4sf)_mm_setzero_ps()))
6680
6681#define _mm256_mask_shuffle_ps(W, U, A, B, M) \
6682 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6683 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6684 (__v8sf)(__m256)(W)))
6685
6686#define _mm256_maskz_shuffle_ps(U, A, B, M) \
6687 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6688 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6689 (__v8sf)_mm256_setzero_ps()))
6690
6691static __inline__ __m128d __DEFAULT_FN_ATTRS128
6692_mm_rsqrt14_pd (__m128d __A)
6693{
6694 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6695 (__v2df)
6696 _mm_setzero_pd (),
6697 (__mmask8) -1);
6698}
6699
6700static __inline__ __m128d __DEFAULT_FN_ATTRS128
6701_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
6702{
6703 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6704 (__v2df) __W,
6705 (__mmask8) __U);
6706}
6707
6708static __inline__ __m128d __DEFAULT_FN_ATTRS128
6710{
6711 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6712 (__v2df)
6713 _mm_setzero_pd (),
6714 (__mmask8) __U);
6715}
6716
6717static __inline__ __m256d __DEFAULT_FN_ATTRS256
6719{
6720 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6721 (__v4df)
6723 (__mmask8) -1);
6724}
6725
6726static __inline__ __m256d __DEFAULT_FN_ATTRS256
6727_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
6728{
6729 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6730 (__v4df) __W,
6731 (__mmask8) __U);
6732}
6733
6734static __inline__ __m256d __DEFAULT_FN_ATTRS256
6736{
6737 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6738 (__v4df)
6740 (__mmask8) __U);
6741}
6742
6743static __inline__ __m128 __DEFAULT_FN_ATTRS128
6744_mm_rsqrt14_ps (__m128 __A)
6745{
6746 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6747 (__v4sf)
6748 _mm_setzero_ps (),
6749 (__mmask8) -1);
6750}
6751
6752static __inline__ __m128 __DEFAULT_FN_ATTRS128
6753_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6754{
6755 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6756 (__v4sf) __W,
6757 (__mmask8) __U);
6758}
6759
6760static __inline__ __m128 __DEFAULT_FN_ATTRS128
6762{
6763 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6764 (__v4sf)
6765 _mm_setzero_ps (),
6766 (__mmask8) __U);
6767}
6768
6769static __inline__ __m256 __DEFAULT_FN_ATTRS256
6771{
6772 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6773 (__v8sf)
6775 (__mmask8) -1);
6776}
6777
6778static __inline__ __m256 __DEFAULT_FN_ATTRS256
6779_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6780{
6781 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6782 (__v8sf) __W,
6783 (__mmask8) __U);
6784}
6785
6786static __inline__ __m256 __DEFAULT_FN_ATTRS256
6788{
6789 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6790 (__v8sf)
6792 (__mmask8) __U);
6793}
6794
6795static __inline__ __m256 __DEFAULT_FN_ATTRS256
6797{
6798 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6799 0, 1, 2, 3, 0, 1, 2, 3);
6800}
6801
6802static __inline__ __m256 __DEFAULT_FN_ATTRS256
6803_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
6804{
6805 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6806 (__v8sf)_mm256_broadcast_f32x4(__A),
6807 (__v8sf)__O);
6808}
6809
6810static __inline__ __m256 __DEFAULT_FN_ATTRS256
6812{
6813 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6814 (__v8sf)_mm256_broadcast_f32x4(__A),
6815 (__v8sf)_mm256_setzero_ps());
6816}
6817
6818static __inline__ __m256i __DEFAULT_FN_ATTRS256
6820{
6821 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6822 0, 1, 2, 3, 0, 1, 2, 3);
6823}
6824
6825static __inline__ __m256i __DEFAULT_FN_ATTRS256
6826_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
6827{
6828 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6829 (__v8si)_mm256_broadcast_i32x4(__A),
6830 (__v8si)__O);
6831}
6832
6833static __inline__ __m256i __DEFAULT_FN_ATTRS256
6835{
6836 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6837 (__v8si)_mm256_broadcast_i32x4(__A),
6838 (__v8si)_mm256_setzero_si256());
6839}
6840
6841static __inline__ __m256d __DEFAULT_FN_ATTRS256
6842_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
6843{
6844 return (__m256d)__builtin_ia32_selectpd_256(__M,
6845 (__v4df) _mm256_broadcastsd_pd(__A),
6846 (__v4df) __O);
6847}
6848
6849static __inline__ __m256d __DEFAULT_FN_ATTRS256
6851{
6852 return (__m256d)__builtin_ia32_selectpd_256(__M,
6853 (__v4df) _mm256_broadcastsd_pd(__A),
6854 (__v4df) _mm256_setzero_pd());
6855}
6856
6857static __inline__ __m128 __DEFAULT_FN_ATTRS128
6858_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
6859{
6860 return (__m128)__builtin_ia32_selectps_128(__M,
6861 (__v4sf) _mm_broadcastss_ps(__A),
6862 (__v4sf) __O);
6863}
6864
6865static __inline__ __m128 __DEFAULT_FN_ATTRS128
6867{
6868 return (__m128)__builtin_ia32_selectps_128(__M,
6869 (__v4sf) _mm_broadcastss_ps(__A),
6870 (__v4sf) _mm_setzero_ps());
6871}
6872
6873static __inline__ __m256 __DEFAULT_FN_ATTRS256
6874_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
6875{
6876 return (__m256)__builtin_ia32_selectps_256(__M,
6877 (__v8sf) _mm256_broadcastss_ps(__A),
6878 (__v8sf) __O);
6879}
6880
6881static __inline__ __m256 __DEFAULT_FN_ATTRS256
6883{
6884 return (__m256)__builtin_ia32_selectps_256(__M,
6885 (__v8sf) _mm256_broadcastss_ps(__A),
6886 (__v8sf) _mm256_setzero_ps());
6887}
6888
6889static __inline__ __m128i __DEFAULT_FN_ATTRS128
6890_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
6891{
6892 return (__m128i)__builtin_ia32_selectd_128(__M,
6893 (__v4si) _mm_broadcastd_epi32(__A),
6894 (__v4si) __O);
6895}
6896
6897static __inline__ __m128i __DEFAULT_FN_ATTRS128
6899{
6900 return (__m128i)__builtin_ia32_selectd_128(__M,
6901 (__v4si) _mm_broadcastd_epi32(__A),
6902 (__v4si) _mm_setzero_si128());
6903}
6904
6905static __inline__ __m256i __DEFAULT_FN_ATTRS256
6906_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
6907{
6908 return (__m256i)__builtin_ia32_selectd_256(__M,
6909 (__v8si) _mm256_broadcastd_epi32(__A),
6910 (__v8si) __O);
6911}
6912
6913static __inline__ __m256i __DEFAULT_FN_ATTRS256
6915{
6916 return (__m256i)__builtin_ia32_selectd_256(__M,
6917 (__v8si) _mm256_broadcastd_epi32(__A),
6918 (__v8si) _mm256_setzero_si256());
6919}
6920
6921static __inline__ __m128i __DEFAULT_FN_ATTRS128
6922_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
6923{
6924 return (__m128i)__builtin_ia32_selectq_128(__M,
6925 (__v2di) _mm_broadcastq_epi64(__A),
6926 (__v2di) __O);
6927}
6928
6929static __inline__ __m128i __DEFAULT_FN_ATTRS128
6931{
6932 return (__m128i)__builtin_ia32_selectq_128(__M,
6933 (__v2di) _mm_broadcastq_epi64(__A),
6934 (__v2di) _mm_setzero_si128());
6935}
6936
6937static __inline__ __m256i __DEFAULT_FN_ATTRS256
6938_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
6939{
6940 return (__m256i)__builtin_ia32_selectq_256(__M,
6941 (__v4di) _mm256_broadcastq_epi64(__A),
6942 (__v4di) __O);
6943}
6944
6945static __inline__ __m256i __DEFAULT_FN_ATTRS256
6947{
6948 return (__m256i)__builtin_ia32_selectq_256(__M,
6949 (__v4di) _mm256_broadcastq_epi64(__A),
6950 (__v4di) _mm256_setzero_si256());
6951}
6952
6953static __inline__ __m128i __DEFAULT_FN_ATTRS128
6955{
6956 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6957 (__v16qi)_mm_undefined_si128(),
6958 (__mmask8) -1);
6959}
6960
6961static __inline__ __m128i __DEFAULT_FN_ATTRS128
6962_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6963{
6964 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6965 (__v16qi) __O, __M);
6966}
6967
6968static __inline__ __m128i __DEFAULT_FN_ATTRS128
6970{
6971 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6972 (__v16qi) _mm_setzero_si128 (),
6973 __M);
6974}
6975
6976static __inline__ void __DEFAULT_FN_ATTRS128
6978{
6979 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
6980}
6981
6982static __inline__ __m128i __DEFAULT_FN_ATTRS256
6984{
6985 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6986 (__v16qi)_mm_undefined_si128(),
6987 (__mmask8) -1);
6988}
6989
6990static __inline__ __m128i __DEFAULT_FN_ATTRS256
6991_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
6992{
6993 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6994 (__v16qi) __O, __M);
6995}
6996
6997static __inline__ __m128i __DEFAULT_FN_ATTRS256
6999{
7000 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7001 (__v16qi) _mm_setzero_si128 (),
7002 __M);
7003}
7004
7005static __inline__ void __DEFAULT_FN_ATTRS256
7007{
7008 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7009}
7010
7011static __inline__ __m128i __DEFAULT_FN_ATTRS128
7013{
7014 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7015 (__v8hi)_mm_setzero_si128 (),
7016 (__mmask8) -1);
7017}
7018
7019static __inline__ __m128i __DEFAULT_FN_ATTRS128
7020_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7021{
7022 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7023 (__v8hi)__O,
7024 __M);
7025}
7026
7027static __inline__ __m128i __DEFAULT_FN_ATTRS128
7029{
7030 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7031 (__v8hi) _mm_setzero_si128 (),
7032 __M);
7033}
7034
7035static __inline__ void __DEFAULT_FN_ATTRS128
7037{
7038 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7039}
7040
7041static __inline__ __m128i __DEFAULT_FN_ATTRS256
7043{
7044 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7045 (__v8hi)_mm_undefined_si128(),
7046 (__mmask8) -1);
7047}
7048
7049static __inline__ __m128i __DEFAULT_FN_ATTRS256
7050_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7051{
7052 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7053 (__v8hi) __O, __M);
7054}
7055
7056static __inline__ __m128i __DEFAULT_FN_ATTRS256
7058{
7059 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7060 (__v8hi) _mm_setzero_si128 (),
7061 __M);
7062}
7063
7064static __inline__ void __DEFAULT_FN_ATTRS256
7066{
7067 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7068}
7069
7070static __inline__ __m128i __DEFAULT_FN_ATTRS128
7072{
7073 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7074 (__v16qi)_mm_undefined_si128(),
7075 (__mmask8) -1);
7076}
7077
7078static __inline__ __m128i __DEFAULT_FN_ATTRS128
7079_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7080{
7081 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7082 (__v16qi) __O, __M);
7083}
7084
7085static __inline__ __m128i __DEFAULT_FN_ATTRS128
7087{
7088 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7089 (__v16qi) _mm_setzero_si128 (),
7090 __M);
7091}
7092
7093static __inline__ void __DEFAULT_FN_ATTRS128
7095{
7096 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7097}
7098
7099static __inline__ __m128i __DEFAULT_FN_ATTRS256
7101{
7102 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7103 (__v16qi)_mm_undefined_si128(),
7104 (__mmask8) -1);
7105}
7106
7107static __inline__ __m128i __DEFAULT_FN_ATTRS256
7108_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7109{
7110 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7111 (__v16qi) __O, __M);
7112}
7113
7114static __inline__ __m128i __DEFAULT_FN_ATTRS256
7116{
7117 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7118 (__v16qi) _mm_setzero_si128 (),
7119 __M);
7120}
7121
7122static __inline__ void __DEFAULT_FN_ATTRS256
7124{
7125 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7126}
7127
7128static __inline__ __m128i __DEFAULT_FN_ATTRS128
7130{
7131 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7132 (__v4si)_mm_undefined_si128(),
7133 (__mmask8) -1);
7134}
7135
7136static __inline__ __m128i __DEFAULT_FN_ATTRS128
7137_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7138{
7139 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7140 (__v4si) __O, __M);
7141}
7142
7143static __inline__ __m128i __DEFAULT_FN_ATTRS128
7145{
7146 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7147 (__v4si) _mm_setzero_si128 (),
7148 __M);
7149}
7150
7151static __inline__ void __DEFAULT_FN_ATTRS128
7153{
7154 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7155}
7156
7157static __inline__ __m128i __DEFAULT_FN_ATTRS256
7159{
7160 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7161 (__v4si)_mm_undefined_si128(),
7162 (__mmask8) -1);
7163}
7164
7165static __inline__ __m128i __DEFAULT_FN_ATTRS256
7166_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7167{
7168 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7169 (__v4si)__O,
7170 __M);
7171}
7172
7173static __inline__ __m128i __DEFAULT_FN_ATTRS256
7175{
7176 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7177 (__v4si) _mm_setzero_si128 (),
7178 __M);
7179}
7180
7181static __inline__ void __DEFAULT_FN_ATTRS256
7183{
7184 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7185}
7186
7187static __inline__ __m128i __DEFAULT_FN_ATTRS128
7189{
7190 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7191 (__v8hi)_mm_undefined_si128(),
7192 (__mmask8) -1);
7193}
7194
7195static __inline__ __m128i __DEFAULT_FN_ATTRS128
7196_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7197{
7198 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7199 (__v8hi) __O, __M);
7200}
7201
7202static __inline__ __m128i __DEFAULT_FN_ATTRS128
7204{
7205 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7206 (__v8hi) _mm_setzero_si128 (),
7207 __M);
7208}
7209
7210static __inline__ void __DEFAULT_FN_ATTRS128
7212{
7213 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7214}
7215
7216static __inline__ __m128i __DEFAULT_FN_ATTRS256
7218{
7219 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7220 (__v8hi)_mm_undefined_si128(),
7221 (__mmask8) -1);
7222}
7223
7224static __inline__ __m128i __DEFAULT_FN_ATTRS256
7225_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7226{
7227 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7228 (__v8hi) __O, __M);
7229}
7230
7231static __inline__ __m128i __DEFAULT_FN_ATTRS256
7233{
7234 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7235 (__v8hi) _mm_setzero_si128 (),
7236 __M);
7237}
7238
7239static __inline__ void __DEFAULT_FN_ATTRS256
7241{
7242 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7243}
7244
7245static __inline__ __m128i __DEFAULT_FN_ATTRS128
7247{
7248 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7249 (__v16qi)_mm_undefined_si128(),
7250 (__mmask8) -1);
7251}
7252
7253static __inline__ __m128i __DEFAULT_FN_ATTRS128
7254_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7255{
7256 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7257 (__v16qi) __O,
7258 __M);
7259}
7260
7261static __inline__ __m128i __DEFAULT_FN_ATTRS128
7263{
7264 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7265 (__v16qi) _mm_setzero_si128 (),
7266 __M);
7267}
7268
7269static __inline__ void __DEFAULT_FN_ATTRS128
7271{
7272 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7273}
7274
7275static __inline__ __m128i __DEFAULT_FN_ATTRS256
7277{
7278 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7279 (__v16qi)_mm_undefined_si128(),
7280 (__mmask8) -1);
7281}
7282
7283static __inline__ __m128i __DEFAULT_FN_ATTRS256
7284_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7285{
7286 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7287 (__v16qi) __O,
7288 __M);
7289}
7290
7291static __inline__ __m128i __DEFAULT_FN_ATTRS256
7293{
7294 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7295 (__v16qi) _mm_setzero_si128 (),
7296 __M);
7297}
7298
7299static __inline__ void __DEFAULT_FN_ATTRS256
7301{
7302 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7303}
7304
7305static __inline__ __m128i __DEFAULT_FN_ATTRS128
7307{
7308 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7309 (__v8hi)_mm_undefined_si128(),
7310 (__mmask8) -1);
7311}
7312
7313static __inline__ __m128i __DEFAULT_FN_ATTRS128
7314_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7315{
7316 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7317 (__v8hi) __O, __M);
7318}
7319
7320static __inline__ __m128i __DEFAULT_FN_ATTRS128
7322{
7323 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7324 (__v8hi) _mm_setzero_si128 (),
7325 __M);
7326}
7327
7328static __inline__ void __DEFAULT_FN_ATTRS128
7330{
7331 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7332}
7333
7334static __inline__ __m128i __DEFAULT_FN_ATTRS256
7336{
7337 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7338 (__v8hi) _mm_undefined_si128(),
7339 (__mmask8) -1);
7340}
7341
7342static __inline__ __m128i __DEFAULT_FN_ATTRS256
7343_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7344{
7345 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7346 (__v8hi) __O, __M);
7347}
7348
7349static __inline__ __m128i __DEFAULT_FN_ATTRS256
7351{
7352 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7353 (__v8hi) _mm_setzero_si128 (),
7354 __M);
7355}
7356
7357static __inline__ void __DEFAULT_FN_ATTRS256
7359{
7360 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7361}
7362
7363static __inline__ __m128i __DEFAULT_FN_ATTRS128
7365{
7366 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7367 (__v16qi)_mm_undefined_si128(),
7368 (__mmask8) -1);
7369}
7370
7371static __inline__ __m128i __DEFAULT_FN_ATTRS128
7372_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7373{
7374 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7375 (__v16qi) __O,
7376 __M);
7377}
7378
7379static __inline__ __m128i __DEFAULT_FN_ATTRS128
7381{
7382 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7383 (__v16qi) _mm_setzero_si128 (),
7384 __M);
7385}
7386
7387static __inline__ void __DEFAULT_FN_ATTRS128
7389{
7390 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7391}
7392
7393static __inline__ __m128i __DEFAULT_FN_ATTRS256
7395{
7396 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7397 (__v16qi)_mm_undefined_si128(),
7398 (__mmask8) -1);
7399}
7400
7401static __inline__ __m128i __DEFAULT_FN_ATTRS256
7402_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7403{
7404 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7405 (__v16qi) __O,
7406 __M);
7407}
7408
7409static __inline__ __m128i __DEFAULT_FN_ATTRS256
7411{
7412 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7413 (__v16qi) _mm_setzero_si128 (),
7414 __M);
7415}
7416
7417static __inline__ void __DEFAULT_FN_ATTRS256
7419{
7420 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7421}
7422
7423static __inline__ __m128i __DEFAULT_FN_ATTRS128
7425{
7426 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7427 (__v4si)_mm_undefined_si128(),
7428 (__mmask8) -1);
7429}
7430
7431static __inline__ __m128i __DEFAULT_FN_ATTRS128
7432_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7433{
7434 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7435 (__v4si) __O, __M);
7436}
7437
7438static __inline__ __m128i __DEFAULT_FN_ATTRS128
7440{
7441 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7442 (__v4si) _mm_setzero_si128 (),
7443 __M);
7444}
7445
7446static __inline__ void __DEFAULT_FN_ATTRS128
7448{
7449 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7450}
7451
7452static __inline__ __m128i __DEFAULT_FN_ATTRS256
7454{
7455 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7456 (__v4si)_mm_undefined_si128(),
7457 (__mmask8) -1);
7458}
7459
7460static __inline__ __m128i __DEFAULT_FN_ATTRS256
7461_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7462{
7463 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7464 (__v4si) __O, __M);
7465}
7466
7467static __inline__ __m128i __DEFAULT_FN_ATTRS256
7469{
7470 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7471 (__v4si) _mm_setzero_si128 (),
7472 __M);
7473}
7474
7475static __inline__ void __DEFAULT_FN_ATTRS256
7477{
7478 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7479}
7480
7481static __inline__ __m128i __DEFAULT_FN_ATTRS128
7483{
7484 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7485 (__v8hi)_mm_undefined_si128(),
7486 (__mmask8) -1);
7487}
7488
7489static __inline__ __m128i __DEFAULT_FN_ATTRS128
7490_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7491{
7492 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7493 (__v8hi) __O, __M);
7494}
7495
7496static __inline__ __m128i __DEFAULT_FN_ATTRS128
7498{
7499 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7500 (__v8hi) _mm_setzero_si128 (),
7501 __M);
7502}
7503
7504static __inline__ void __DEFAULT_FN_ATTRS128
7506{
7507 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7508}
7509
7510static __inline__ __m128i __DEFAULT_FN_ATTRS256
7512{
7513 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7514 (__v8hi)_mm_undefined_si128(),
7515 (__mmask8) -1);
7516}
7517
7518static __inline__ __m128i __DEFAULT_FN_ATTRS256
7519_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7520{
7521 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7522 (__v8hi) __O, __M);
7523}
7524
7525static __inline__ __m128i __DEFAULT_FN_ATTRS256
7527{
7528 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7529 (__v8hi) _mm_setzero_si128 (),
7530 __M);
7531}
7532
7533static __inline__ void __DEFAULT_FN_ATTRS256
7535{
7536 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7537}
7538
7539static __inline__ __m128i __DEFAULT_FN_ATTRS128
7541{
7542 return (__m128i)__builtin_shufflevector(
7543 __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7544 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7545}
7546
7547static __inline__ __m128i __DEFAULT_FN_ATTRS128
7548_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7549{
7550 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7551 (__v16qi) __O, __M);
7552}
7553
7554static __inline__ __m128i __DEFAULT_FN_ATTRS128
7556{
7557 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7558 (__v16qi)
7560 __M);
7561}
7562
7563static __inline__ void __DEFAULT_FN_ATTRS128
7565{
7566 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7567}
7568
7569static __inline__ __m128i __DEFAULT_FN_ATTRS256
7571{
7572 return (__m128i)__builtin_shufflevector(
7573 __builtin_convertvector((__v8si)__A, __v8qi),
7574 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
7575 12, 13, 14, 15);
7576}
7577
7578static __inline__ __m128i __DEFAULT_FN_ATTRS256
7579_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7580{
7581 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7582 (__v16qi) __O, __M);
7583}
7584
7585static __inline__ __m128i __DEFAULT_FN_ATTRS256
7587{
7588 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7589 (__v16qi) _mm_setzero_si128 (),
7590 __M);
7591}
7592
7593static __inline__ void __DEFAULT_FN_ATTRS256
7595{
7596 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7597}
7598
7599static __inline__ __m128i __DEFAULT_FN_ATTRS128
7601{
7602 return (__m128i)__builtin_shufflevector(
7603 __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7604 2, 3, 4, 5, 6, 7);
7605}
7606
7607static __inline__ __m128i __DEFAULT_FN_ATTRS128
7608_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7609{
7610 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7611 (__v8hi) __O, __M);
7612}
7613
7614static __inline__ __m128i __DEFAULT_FN_ATTRS128
7616{
7617 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7618 (__v8hi) _mm_setzero_si128 (),
7619 __M);
7620}
7621
7622static __inline__ void __DEFAULT_FN_ATTRS128
7624{
7625 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7626}
7627
7628static __inline__ __m128i __DEFAULT_FN_ATTRS256
7630{
7631 return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
7632}
7633
7634static __inline__ __m128i __DEFAULT_FN_ATTRS256
7635_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7636{
7637 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7638 (__v8hi) __O, __M);
7639}
7640
7641static __inline__ __m128i __DEFAULT_FN_ATTRS256
7643{
7644 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7645 (__v8hi) _mm_setzero_si128 (),
7646 __M);
7647}
7648
7649static __inline__ void __DEFAULT_FN_ATTRS256
7651{
7652 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7653}
7654
7655static __inline__ __m128i __DEFAULT_FN_ATTRS128
7657{
7658 return (__m128i)__builtin_shufflevector(
7659 __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
7660 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
7661}
7662
7663static __inline__ __m128i __DEFAULT_FN_ATTRS128
7664_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7665{
7666 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7667 (__v16qi) __O, __M);
7668}
7669
7670static __inline__ __m128i __DEFAULT_FN_ATTRS128
7672{
7673 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7674 (__v16qi) _mm_setzero_si128 (),
7675 __M);
7676}
7677
7678static __inline__ void __DEFAULT_FN_ATTRS128
7680{
7681 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7682}
7683
7684static __inline__ __m128i __DEFAULT_FN_ATTRS256
7686{
7687 return (__m128i)__builtin_shufflevector(
7688 __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7689 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7690}
7691
7692static __inline__ __m128i __DEFAULT_FN_ATTRS256
7693_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7694{
7695 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7696 (__v16qi) __O, __M);
7697}
7698
7699static __inline__ __m128i __DEFAULT_FN_ATTRS256
7701{
7702 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7703 (__v16qi) _mm_setzero_si128 (),
7704 __M);
7705}
7706
7707static __inline__ void __DEFAULT_FN_ATTRS256
7709{
7710 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7711}
7712
7713static __inline__ __m128i __DEFAULT_FN_ATTRS128
7715{
7716 return (__m128i)__builtin_shufflevector(
7717 __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
7718}
7719
7720static __inline__ __m128i __DEFAULT_FN_ATTRS128
7721_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7722{
7723 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7724 (__v4si) __O, __M);
7725}
7726
7727static __inline__ __m128i __DEFAULT_FN_ATTRS128
7729{
7730 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7731 (__v4si) _mm_setzero_si128 (),
7732 __M);
7733}
7734
7735static __inline__ void __DEFAULT_FN_ATTRS128
7737{
7738 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7739}
7740
7741static __inline__ __m128i __DEFAULT_FN_ATTRS256
7743{
7744 return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
7745}
7746
7747static __inline__ __m128i __DEFAULT_FN_ATTRS256
7748_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7749{
7750 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7751 (__v4si)_mm256_cvtepi64_epi32(__A),
7752 (__v4si)__O);
7753}
7754
7755static __inline__ __m128i __DEFAULT_FN_ATTRS256
7757{
7758 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7759 (__v4si)_mm256_cvtepi64_epi32(__A),
7760 (__v4si)_mm_setzero_si128());
7761}
7762
7763static __inline__ void __DEFAULT_FN_ATTRS256
7765{
7766 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7767}
7768
7769static __inline__ __m128i __DEFAULT_FN_ATTRS128
7771{
7772 return (__m128i)__builtin_shufflevector(
7773 __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
7774 3, 3, 3, 3);
7775}
7776
7777static __inline__ __m128i __DEFAULT_FN_ATTRS128
7778_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7779{
7780 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7781 (__v8hi)__O,
7782 __M);
7783}
7784
7785static __inline__ __m128i __DEFAULT_FN_ATTRS128
7787{
7788 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7789 (__v8hi) _mm_setzero_si128 (),
7790 __M);
7791}
7792
7793static __inline__ void __DEFAULT_FN_ATTRS128
7795{
7796 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7797}
7798
7799static __inline__ __m128i __DEFAULT_FN_ATTRS256
7801{
7802 return (__m128i)__builtin_shufflevector(
7803 __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7804 2, 3, 4, 5, 6, 7);
7805}
7806
7807static __inline__ __m128i __DEFAULT_FN_ATTRS256
7808_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7809{
7810 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7811 (__v8hi) __O, __M);
7812}
7813
7814static __inline__ __m128i __DEFAULT_FN_ATTRS256
7816{
7817 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7818 (__v8hi) _mm_setzero_si128 (),
7819 __M);
7820}
7821
7822static __inline__ void __DEFAULT_FN_ATTRS256
7824{
7825 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7826}
7827
7828#define _mm256_extractf32x4_ps(A, imm) \
7829 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7830 (int)(imm), \
7831 (__v4sf)_mm_undefined_ps(), \
7832 (__mmask8)-1))
7833
7834#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
7835 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7836 (int)(imm), \
7837 (__v4sf)(__m128)(W), \
7838 (__mmask8)(U)))
7839
7840#define _mm256_maskz_extractf32x4_ps(U, A, imm) \
7841 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7842 (int)(imm), \
7843 (__v4sf)_mm_setzero_ps(), \
7844 (__mmask8)(U)))
7845
7846#define _mm256_extracti32x4_epi32(A, imm) \
7847 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7848 (int)(imm), \
7849 (__v4si)_mm_undefined_si128(), \
7850 (__mmask8)-1))
7851
7852#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
7853 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7854 (int)(imm), \
7855 (__v4si)(__m128i)(W), \
7856 (__mmask8)(U)))
7857
7858#define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
7859 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7860 (int)(imm), \
7861 (__v4si)_mm_setzero_si128(), \
7862 (__mmask8)(U)))
7863
7864#define _mm256_insertf32x4(A, B, imm) \
7865 ((__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
7866 (__v4sf)(__m128)(B), (int)(imm)))
7867
7868#define _mm256_mask_insertf32x4(W, U, A, B, imm) \
7869 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7870 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7871 (__v8sf)(__m256)(W)))
7872
7873#define _mm256_maskz_insertf32x4(U, A, B, imm) \
7874 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7875 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7876 (__v8sf)_mm256_setzero_ps()))
7877
7878#define _mm256_inserti32x4(A, B, imm) \
7879 ((__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
7880 (__v4si)(__m128i)(B), (int)(imm)))
7881
7882#define _mm256_mask_inserti32x4(W, U, A, B, imm) \
7883 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7884 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7885 (__v8si)(__m256i)(W)))
7886
7887#define _mm256_maskz_inserti32x4(U, A, B, imm) \
7888 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7889 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7890 (__v8si)_mm256_setzero_si256()))
7891
7892#define _mm_getmant_pd(A, B, C) \
7893 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7894 (int)(((C)<<2) | (B)), \
7895 (__v2df)_mm_setzero_pd(), \
7896 (__mmask8)-1))
7897
7898#define _mm_mask_getmant_pd(W, U, A, B, C) \
7899 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7900 (int)(((C)<<2) | (B)), \
7901 (__v2df)(__m128d)(W), \
7902 (__mmask8)(U)))
7903
7904#define _mm_maskz_getmant_pd(U, A, B, C) \
7905 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7906 (int)(((C)<<2) | (B)), \
7907 (__v2df)_mm_setzero_pd(), \
7908 (__mmask8)(U)))
7909
7910#define _mm256_getmant_pd(A, B, C) \
7911 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7912 (int)(((C)<<2) | (B)), \
7913 (__v4df)_mm256_setzero_pd(), \
7914 (__mmask8)-1))
7915
7916#define _mm256_mask_getmant_pd(W, U, A, B, C) \
7917 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7918 (int)(((C)<<2) | (B)), \
7919 (__v4df)(__m256d)(W), \
7920 (__mmask8)(U)))
7921
7922#define _mm256_maskz_getmant_pd(U, A, B, C) \
7923 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7924 (int)(((C)<<2) | (B)), \
7925 (__v4df)_mm256_setzero_pd(), \
7926 (__mmask8)(U)))
7927
7928#define _mm_getmant_ps(A, B, C) \
7929 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7930 (int)(((C)<<2) | (B)), \
7931 (__v4sf)_mm_setzero_ps(), \
7932 (__mmask8)-1))
7933
7934#define _mm_mask_getmant_ps(W, U, A, B, C) \
7935 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7936 (int)(((C)<<2) | (B)), \
7937 (__v4sf)(__m128)(W), \
7938 (__mmask8)(U)))
7939
7940#define _mm_maskz_getmant_ps(U, A, B, C) \
7941 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7942 (int)(((C)<<2) | (B)), \
7943 (__v4sf)_mm_setzero_ps(), \
7944 (__mmask8)(U)))
7945
7946#define _mm256_getmant_ps(A, B, C) \
7947 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7948 (int)(((C)<<2) | (B)), \
7949 (__v8sf)_mm256_setzero_ps(), \
7950 (__mmask8)-1))
7951
7952#define _mm256_mask_getmant_ps(W, U, A, B, C) \
7953 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7954 (int)(((C)<<2) | (B)), \
7955 (__v8sf)(__m256)(W), \
7956 (__mmask8)(U)))
7957
7958#define _mm256_maskz_getmant_ps(U, A, B, C) \
7959 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7960 (int)(((C)<<2) | (B)), \
7961 (__v8sf)_mm256_setzero_ps(), \
7962 (__mmask8)(U)))
7963
7964#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7965 ((__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
7966 (void const *)(addr), \
7967 (__v2di)(__m128i)(index), \
7968 (__mmask8)(mask), (int)(scale)))
7969
7970#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7971 ((__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
7972 (void const *)(addr), \
7973 (__v2di)(__m128i)(index), \
7974 (__mmask8)(mask), (int)(scale)))
7975
7976#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7977 ((__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
7978 (void const *)(addr), \
7979 (__v4di)(__m256i)(index), \
7980 (__mmask8)(mask), (int)(scale)))
7981
7982#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7983 ((__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
7984 (void const *)(addr), \
7985 (__v4di)(__m256i)(index), \
7986 (__mmask8)(mask), (int)(scale)))
7987
7988#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7989 ((__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
7990 (void const *)(addr), \
7991 (__v2di)(__m128i)(index), \
7992 (__mmask8)(mask), (int)(scale)))
7993
7994#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7995 ((__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
7996 (void const *)(addr), \
7997 (__v2di)(__m128i)(index), \
7998 (__mmask8)(mask), (int)(scale)))
7999
8000#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
8001 ((__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
8002 (void const *)(addr), \
8003 (__v4di)(__m256i)(index), \
8004 (__mmask8)(mask), (int)(scale)))
8005
8006#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
8007 ((__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
8008 (void const *)(addr), \
8009 (__v4di)(__m256i)(index), \
8010 (__mmask8)(mask), (int)(scale)))
8011
8012#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8013 ((__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
8014 (void const *)(addr), \
8015 (__v4si)(__m128i)(index), \
8016 (__mmask8)(mask), (int)(scale)))
8017
8018#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8019 ((__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
8020 (void const *)(addr), \
8021 (__v4si)(__m128i)(index), \
8022 (__mmask8)(mask), (int)(scale)))
8023
8024#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8025 ((__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
8026 (void const *)(addr), \
8027 (__v4si)(__m128i)(index), \
8028 (__mmask8)(mask), (int)(scale)))
8029
8030#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8031 ((__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
8032 (void const *)(addr), \
8033 (__v4si)(__m128i)(index), \
8034 (__mmask8)(mask), (int)(scale)))
8035
8036#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8037 ((__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
8038 (void const *)(addr), \
8039 (__v4si)(__m128i)(index), \
8040 (__mmask8)(mask), (int)(scale)))
8041
8042#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8043 ((__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
8044 (void const *)(addr), \
8045 (__v4si)(__m128i)(index), \
8046 (__mmask8)(mask), (int)(scale)))
8047
8048#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8049 ((__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
8050 (void const *)(addr), \
8051 (__v8si)(__m256i)(index), \
8052 (__mmask8)(mask), (int)(scale)))
8053
8054#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8055 ((__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
8056 (void const *)(addr), \
8057 (__v8si)(__m256i)(index), \
8058 (__mmask8)(mask), (int)(scale)))
8059
8060#define _mm256_permutex_pd(X, C) \
8061 ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C)))
8062
8063#define _mm256_mask_permutex_pd(W, U, X, C) \
8064 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8065 (__v4df)_mm256_permutex_pd((X), (C)), \
8066 (__v4df)(__m256d)(W)))
8067
8068#define _mm256_maskz_permutex_pd(U, X, C) \
8069 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8070 (__v4df)_mm256_permutex_pd((X), (C)), \
8071 (__v4df)_mm256_setzero_pd()))
8072
8073#define _mm256_permutex_epi64(X, C) \
8074 ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C)))
8075
8076#define _mm256_mask_permutex_epi64(W, U, X, C) \
8077 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8078 (__v4di)_mm256_permutex_epi64((X), (C)), \
8079 (__v4di)(__m256i)(W)))
8080
8081#define _mm256_maskz_permutex_epi64(U, X, C) \
8082 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8083 (__v4di)_mm256_permutex_epi64((X), (C)), \
8084 (__v4di)_mm256_setzero_si256()))
8085
8086static __inline__ __m256d __DEFAULT_FN_ATTRS256
8087_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8088{
8089 return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X);
8090}
8091
8092static __inline__ __m256d __DEFAULT_FN_ATTRS256
8093_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8094 __m256d __Y)
8095{
8096 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8097 (__v4df)_mm256_permutexvar_pd(__X, __Y),
8098 (__v4df)__W);
8099}
8100
8101static __inline__ __m256d __DEFAULT_FN_ATTRS256
8102_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8103{
8104 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8105 (__v4df)_mm256_permutexvar_pd(__X, __Y),
8106 (__v4df)_mm256_setzero_pd());
8107}
8108
8109static __inline__ __m256i __DEFAULT_FN_ATTRS256
8110_mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
8111{
8112 return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X);
8113}
8114
8115static __inline__ __m256i __DEFAULT_FN_ATTRS256
8117{
8118 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8119 (__v4di)_mm256_permutexvar_epi64(__X, __Y),
8120 (__v4di)_mm256_setzero_si256());
8121}
8122
8123static __inline__ __m256i __DEFAULT_FN_ATTRS256
8124_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
8125 __m256i __Y)
8126{
8127 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8128 (__v4di)_mm256_permutexvar_epi64(__X, __Y),
8129 (__v4di)__W);
8130}
8131
8132#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
8133
8134static __inline__ __m256 __DEFAULT_FN_ATTRS256
8135_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
8136{
8137 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8138 (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8139 (__v8sf)__W);
8140}
8141
8142static __inline__ __m256 __DEFAULT_FN_ATTRS256
8144{
8145 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8146 (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8147 (__v8sf)_mm256_setzero_ps());
8148}
8149
8150#define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
8151
8152static __inline__ __m256i __DEFAULT_FN_ATTRS256
8153_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X,
8154 __m256i __Y)
8155{
8156 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8157 (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8158 (__v8si)__W);
8159}
8160
8161static __inline__ __m256i __DEFAULT_FN_ATTRS256
8163{
8164 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8165 (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8166 (__v8si)_mm256_setzero_si256());
8167}
8168
8169#define _mm_alignr_epi32(A, B, imm) \
8170 ((__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
8171 (__v4si)(__m128i)(B), (int)(imm)))
8172
8173#define _mm_mask_alignr_epi32(W, U, A, B, imm) \
8174 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8175 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8176 (__v4si)(__m128i)(W)))
8177
8178#define _mm_maskz_alignr_epi32(U, A, B, imm) \
8179 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8180 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8181 (__v4si)_mm_setzero_si128()))
8182
8183#define _mm256_alignr_epi32(A, B, imm) \
8184 ((__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
8185 (__v8si)(__m256i)(B), (int)(imm)))
8186
8187#define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
8188 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8189 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8190 (__v8si)(__m256i)(W)))
8191
8192#define _mm256_maskz_alignr_epi32(U, A, B, imm) \
8193 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8194 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8195 (__v8si)_mm256_setzero_si256()))
8196
8197#define _mm_alignr_epi64(A, B, imm) \
8198 ((__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
8199 (__v2di)(__m128i)(B), (int)(imm)))
8200
8201#define _mm_mask_alignr_epi64(W, U, A, B, imm) \
8202 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8203 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8204 (__v2di)(__m128i)(W)))
8205
8206#define _mm_maskz_alignr_epi64(U, A, B, imm) \
8207 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8208 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8209 (__v2di)_mm_setzero_si128()))
8210
8211#define _mm256_alignr_epi64(A, B, imm) \
8212 ((__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
8213 (__v4di)(__m256i)(B), (int)(imm)))
8214
8215#define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
8216 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8217 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8218 (__v4di)(__m256i)(W)))
8219
8220#define _mm256_maskz_alignr_epi64(U, A, B, imm) \
8221 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8222 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8223 (__v4di)_mm256_setzero_si256()))
8224
8225static __inline__ __m128 __DEFAULT_FN_ATTRS128
8226_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8227{
8228 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8229 (__v4sf)_mm_movehdup_ps(__A),
8230 (__v4sf)__W);
8231}
8232
8233static __inline__ __m128 __DEFAULT_FN_ATTRS128
8235{
8236 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8237 (__v4sf)_mm_movehdup_ps(__A),
8238 (__v4sf)_mm_setzero_ps());
8239}
8240
8241static __inline__ __m256 __DEFAULT_FN_ATTRS256
8242_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8243{
8244 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8245 (__v8sf)_mm256_movehdup_ps(__A),
8246 (__v8sf)__W);
8247}
8248
8249static __inline__ __m256 __DEFAULT_FN_ATTRS256
8251{
8252 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8253 (__v8sf)_mm256_movehdup_ps(__A),
8254 (__v8sf)_mm256_setzero_ps());
8255}
8256
8257static __inline__ __m128 __DEFAULT_FN_ATTRS128
8258_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8259{
8260 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8261 (__v4sf)_mm_moveldup_ps(__A),
8262 (__v4sf)__W);
8263}
8264
8265static __inline__ __m128 __DEFAULT_FN_ATTRS128
8267{
8268 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8269 (__v4sf)_mm_moveldup_ps(__A),
8270 (__v4sf)_mm_setzero_ps());
8271}
8272
8273static __inline__ __m256 __DEFAULT_FN_ATTRS256
8274_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8275{
8276 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8277 (__v8sf)_mm256_moveldup_ps(__A),
8278 (__v8sf)__W);
8279}
8280
8281static __inline__ __m256 __DEFAULT_FN_ATTRS256
8283{
8284 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8285 (__v8sf)_mm256_moveldup_ps(__A),
8286 (__v8sf)_mm256_setzero_ps());
8287}
8288
8289#define _mm256_mask_shuffle_epi32(W, U, A, I) \
8290 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8291 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8292 (__v8si)(__m256i)(W)))
8293
8294#define _mm256_maskz_shuffle_epi32(U, A, I) \
8295 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8296 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8297 (__v8si)_mm256_setzero_si256()))
8298
8299#define _mm_mask_shuffle_epi32(W, U, A, I) \
8300 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8301 (__v4si)_mm_shuffle_epi32((A), (I)), \
8302 (__v4si)(__m128i)(W)))
8303
8304#define _mm_maskz_shuffle_epi32(U, A, I) \
8305 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8306 (__v4si)_mm_shuffle_epi32((A), (I)), \
8307 (__v4si)_mm_setzero_si128()))
8308
8309static __inline__ __m128d __DEFAULT_FN_ATTRS128
8310_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
8311{
8312 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8313 (__v2df) __A,
8314 (__v2df) __W);
8315}
8316
8317static __inline__ __m128d __DEFAULT_FN_ATTRS128
8318_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
8319{
8320 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8321 (__v2df) __A,
8322 (__v2df) _mm_setzero_pd ());
8323}
8324
8325static __inline__ __m256d __DEFAULT_FN_ATTRS256
8326_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
8327{
8328 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8329 (__v4df) __A,
8330 (__v4df) __W);
8331}
8332
8333static __inline__ __m256d __DEFAULT_FN_ATTRS256
8335{
8336 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8337 (__v4df) __A,
8338 (__v4df) _mm256_setzero_pd ());
8339}
8340
8341static __inline__ __m128 __DEFAULT_FN_ATTRS128
8342_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
8343{
8344 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8345 (__v4sf) __A,
8346 (__v4sf) __W);
8347}
8348
8349static __inline__ __m128 __DEFAULT_FN_ATTRS128
8351{
8352 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8353 (__v4sf) __A,
8354 (__v4sf) _mm_setzero_ps ());
8355}
8356
8357static __inline__ __m256 __DEFAULT_FN_ATTRS256
8358_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
8359{
8360 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8361 (__v8sf) __A,
8362 (__v8sf) __W);
8363}
8364
8365static __inline__ __m256 __DEFAULT_FN_ATTRS256
8367{
8368 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8369 (__v8sf) __A,
8370 (__v8sf) _mm256_setzero_ps ());
8371}
8372
8373static __inline__ __m128 __DEFAULT_FN_ATTRS128
8374_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8375{
8376 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8377 (__v4sf) __W,
8378 (__mmask8) __U);
8379}
8380
8381static __inline__ __m128 __DEFAULT_FN_ATTRS128
8383{
8384 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8385 (__v4sf)
8386 _mm_setzero_ps (),
8387 (__mmask8) __U);
8388}
8389
8390static __inline__ __m256 __DEFAULT_FN_ATTRS256
8391_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8392{
8393 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8394 (__v8sf) __W,
8395 (__mmask8) __U);
8396}
8397
8398static __inline__ __m256 __DEFAULT_FN_ATTRS256
8400{
8401 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8402 (__v8sf)
8404 (__mmask8) __U);
8405}
8406
8407#define _mm_mask_cvt_roundps_ph(W, U, A, I) \
8408 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8409 (__v8hi)(__m128i)(W), \
8410 (__mmask8)(U)))
8411
8412#define _mm_maskz_cvt_roundps_ph(U, A, I) \
8413 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8414 (__v8hi)_mm_setzero_si128(), \
8415 (__mmask8)(U)))
8416
8417#define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph
8418#define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph
8419
8420#define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
8421 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8422 (__v8hi)(__m128i)(W), \
8423 (__mmask8)(U)))
8424
8425#define _mm256_maskz_cvt_roundps_ph(U, A, I) \
8426 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8427 (__v8hi)_mm_setzero_si128(), \
8428 (__mmask8)(U)))
8429
8430#define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph
8431#define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph
8432
8433
8434#undef __DEFAULT_FN_ATTRS128
8435#undef __DEFAULT_FN_ATTRS256
8436
8437#endif /* __AVX512VLINTRIN_H */
static __inline__ vector float vector float __b
Definition: altivec.h:578
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi32(__m256i __a, __m128i __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by the number of bits give...
Definition: avx2intrin.h:2443
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi64(__m256i __a, int __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a right by __count bits,...
Definition: avx2intrin.h:2462
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi32(__m128i __V)
Zero-extends bytes from the lower half of the 128-bit integer vector in __V and returns the 32-bit va...
Definition: avx2intrin.h:1552
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_si256(__m256i __a, __m256i __b)
Computes the bitwise AND of the 256-bit integer vectors in __a and __b.
Definition: avx2intrin.h:464
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi32(__m128i __V)
Sign-extends 16-bit elements from the 128-bit vector of [8 x i16] in __V and returns the 32-bit value...
Definition: avx2intrin.h:1450
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi32(__m256i __a, __m128i __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a left by the number of bits given...
Definition: avx2intrin.h:2199
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors of [4 x i64] in __a and __b...
Definition: avx2intrin.h:2960
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastd_epi32(__m128i __X)
Broadcasts the low element from the 128-bit vector of [4 x i32] in __X to all elements of the result'...
Definition: avx2intrin.h:3268
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi64(__m256i __a, __m128i __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a left by the number of bits given...
Definition: avx2intrin.h:2239
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi32(__m128i __V)
Sign-extends bytes from the lower half of the 128-bit integer vector in __V and returns the 32-bit va...
Definition: avx2intrin.h:1395
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi32(__m256i __a, int __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a left by __count bits,...
Definition: avx2intrin.h:2178
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mul_epi32(__m256i __a, __m256i __b)
Multiplies signed 32-bit integers from even-numbered elements of two 256-bit vectors of [8 x i32] and...
Definition: avx2intrin.h:1679
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi32(__m256i __a, int __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by __count bits,...
Definition: avx2intrin.h:2301
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi32(__m256i __a, __m256i __b)
Multiplies signed 32-bit integer elements of two 256-bit vectors of [8 x i32], and returns the lower ...
Definition: avx2intrin.h:1782
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi64(__m256i __a, __m256i __b)
Subtracts 64-bit integers from corresponding elements of two 256-bit vectors of [4 x i64].
Definition: avx2intrin.h:2589
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi64(__m256i __X, __m256i __Y)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __X left by the number of bits given...
Definition: avx2intrin.h:3804
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi32(__m256i __a, __m256i __b)
Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors of [8 x i32] in __a and __b...
Definition: avx2intrin.h:2930
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu32(__m256i __a, __m256i __b)
Compares the corresponding unsigned 32-bit integers in the two 256-bit vectors of [8 x i32] in __a an...
Definition: avx2intrin.h:1204
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi32(__m256i __X, __m256i __Y)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __X right by the number of bits give...
Definition: avx2intrin.h:3849
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_broadcastsd_pd(__m128d __X)
Broadcasts the 64-bit floating-point value from the low element of the 128-bit vector of [2 x double]...
Definition: avx2intrin.h:3064
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi32(__m128i __X, __m128i __Y)
Shifts each 32-bit element of the 128-bit vector of [4 x i32] in __X right by the number of bits give...
Definition: avx2intrin.h:3916
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi64(__m128i __X, __m128i __Y)
Shifts each 64-bit element of the 128-bit vector of [2 x i64] in __X right by the number of bits give...
Definition: avx2intrin.h:3960
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi64(__m256i __a, __m256i __b)
Adds 64-bit integers from corresponding elements of two 256-bit vectors of [4 x i64] and returns the ...
Definition: avx2intrin.h:344
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi32(__m128i __X, __m128i __Y)
Shifts each 32-bit element of the 128-bit vector of [4 x i32] in __X left by the number of bits given...
Definition: avx2intrin.h:3782
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi32(__m256i __a, __m256i __b)
Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors of [8 x i32] in __a and __b...
Definition: avx2intrin.h:2797
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi32(__m256i __a, __m256i __b)
Compares the corresponding signed 32-bit integers in the two 256-bit vectors of [8 x i32] in __a and ...
Definition: avx2intrin.h:1147
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi32(__m256i __X, __m256i __Y)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __X left by the number of bits given...
Definition: avx2intrin.h:3760
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_broadcastss_ps(__m128 __X)
Broadcasts the 32-bit floating-point value from the low element of the 128-bit vector of [4 x float] ...
Definition: avx2intrin.h:3013
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi64(__m256i __X, __m256i __Y)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __X right by the number of bits give...
Definition: avx2intrin.h:3938
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_epi32(__m128i __V)
Zero-extends 16-bit elements from the 128-bit vector of [8 x i16] in __V and returns the 32-bit value...
Definition: avx2intrin.h:1603
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi32(__m256i __a, __m128i __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by the number of bits give...
Definition: avx2intrin.h:2323
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi32(__m128i __X, __m128i __Y)
Shifts each 32-bit element of the 128-bit vector of [4 x i32] in __X right by the number of bits give...
Definition: avx2intrin.h:3872
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi32(__m256i __a, __m256i __b)
Compares the corresponding signed 32-bit integers in the two 256-bit vectors of [8 x i32] in __a and ...
Definition: avx2intrin.h:1261
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi64(__m128i __V)
Zero-extends the first four bytes from the 128-bit integer vector in __V and returns the 64-bit value...
Definition: avx2intrin.h:1577
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi32(__m256i __X, __m256i __Y)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __X right by the number of bits give...
Definition: avx2intrin.h:3894
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu32(__m256i __a, __m256i __b)
Compares the corresponding unsigned 32-bit integers in the two 256-bit vectors of [8 x i32] in __a an...
Definition: avx2intrin.h:1318
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi32(__m256i __a, __m256i __b)
Subtracts 32-bit integers from corresponding elements of two 256-bit vectors of [8 x i32].
Definition: avx2intrin.h:2563
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_epi64(__m128i __V)
Zero-extends 16-bit elements from the lower half of the 128-bit vector of [8 x i16] in __V and return...
Definition: avx2intrin.h:1628
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi64(__m128i __V)
Sign-extends 32-bit elements from the 128-bit vector of [4 x i32] in __V and returns the 64-bit value...
Definition: avx2intrin.h:1500
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi64(__m256i __a, __m256i __b)
Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors of [4 x i64] in __a and __b...
Definition: avx2intrin.h:2827
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastq_epi64(__m128i __X)
Broadcasts the low element from the 128-bit vector of [2 x i64] in __X to both elements of the result...
Definition: avx2intrin.h:3284
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi64(__m256i __a, __m128i __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a right by the number of bits give...
Definition: avx2intrin.h:2483
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi32(__m256i __a, int __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by __count bits,...
Definition: avx2intrin.h:2422
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi64(__m256i __a, int __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a left by __count bits,...
Definition: avx2intrin.h:2218
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_epi64(__m128i __V)
Zero-extends 32-bit elements from the 128-bit vector of [4 x i32] in __V and returns the 64-bit value...
Definition: avx2intrin.h:1653
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastd_epi32(__m128i __X)
Broadcasts the low element from the 128-bit vector of [4 x i32] in __X to all elements of the result'...
Definition: avx2intrin.h:3204
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi64(__m128i __V)
Sign-extends 16-bit elements from the lower half of the 128-bit vector of [8 x i16] in __V and return...
Definition: avx2intrin.h:1475
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi32(__m256i __a)
Computes the absolute value of each signed 32-bit element in the 256-bit vector of [8 x i32] in __a a...
Definition: avx2intrin.h:142
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi64(__m128i __V)
Sign-extends the first four bytes from the 128-bit integer vector in __V and returns the 64-bit value...
Definition: avx2intrin.h:1422
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mul_epu32(__m256i __a, __m256i __b)
Multiplies unsigned 32-bit integers from even-numered elements of two 256-bit vectors of [8 x i32] an...
Definition: avx2intrin.h:1808
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi64(__m128i __X, __m128i __Y)
Shifts each 64-bit element of the 128-bit vector of [2 x i64] in __X left by the number of bits given...
Definition: avx2intrin.h:3826
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcastss_ps(__m128 __X)
Broadcasts the 32-bit floating-point value from the low element of the 128-bit vector of [4 x float] ...
Definition: avx2intrin.h:3047
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi32(__m256i __a, __m256i __b)
Adds 32-bit integers from corresponding elements of two 256-bit vectors of [8 x i32] and returns the ...
Definition: avx2intrin.h:325
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastq_epi64(__m128i __X)
Broadcasts the low element from the 128-bit vector of [2 x i64] in __X to all elements of the result'...
Definition: avx2intrin.h:3220
unsigned char __mmask8
Definition: avx512fintrin.h:41
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu32(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu32(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi32(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
#define _mm_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi64(__m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rorv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_epi64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi8(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mov_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rolv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expand_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi16(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_pd(__mmask8 __U, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi32_mask(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_store_epi32(void *__P, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
#define _mm256_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_pd(__m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_xor_epi32(__m128i __a, __m128i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_movedup_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi32(__mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rorv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi64(void *__P, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epi64(__m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi32_mask(__m256i __A, __m256i __B)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi64(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epu64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi8(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_pd(__mmask8 __U, __m256d __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi16(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_epi32(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi16(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_epi64(__m256i __a, __m256i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rcp14_pd(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_pd(__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_mov_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu32(__mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
#define _mm256_cmpneq_epi32_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
#define _mm256_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi16(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi32(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rorv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_or_epi64(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_compress_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ps(__mmask8 __U, __m128 __A, __m128 __B)
#define _mm_cmpeq_epi64_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_pd(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_pd(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_movedup_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_load_epi32(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi32(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi8(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi64(__m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_load_epi64(void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_and_epi32(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu32(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_getexp_pd(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_load_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu32(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epi64(__m128i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_store_epi32(void *__P, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W)
#define _mm256_permutexvar_epi32(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_epi32(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_movedup_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_loadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi32_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi32(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_epi64(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srai_epi64(__m128i __A, unsigned int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi32(void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi64(__mmask8 __U, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rorv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_epi64(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_pd(__mmask8 __U, __m256d __A)
#define _mm256_cmpeq_epi64_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi16(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
#define _mm256_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi8(__m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_scalef_pd(__m256d __A, __m256d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_ps(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ps(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rolv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expand_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mov_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_andnot_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_compress_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi64(void *__P, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi16(__m256i __A)
#define _mm_cmpneq_epi64_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi32(__mmask8 __M, int __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rolv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi32(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_pd(__m128d __W, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_load_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expand_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_abs_epi64(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi32(__mmask8 __M, int __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rolv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
#define _mm256_cmpeq_epi32_mask(A, B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_permutexvar_pd(__m256i __X, __m256d __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_ps(__mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, __m256d __B)
#define _mm_cmpeq_epi32_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rolv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi32_mask(__m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_pd(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ps(__m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rorv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi32(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_compress_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi64(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu32(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi16(__m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epu64(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
short __v2hi __attribute__((__vector_size__(4)))
#define __DEFAULT_FN_ATTRS256
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_or_epi32(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_scalef_ps(__m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi16(__m128i __A)
#define _mm256_permutexvar_ps(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi16(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi16(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_pd(__m256d __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rorv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_load_epi64(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
#define __DEFAULT_FN_ATTRS128
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rcp14_ps(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi64(__m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_and_epi64(__m128i __a, __m128i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_pd(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi32(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_ps(__m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_compress_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_ps(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rolv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_movedup_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
#define _mm_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi64(__m256i __A, unsigned int __imm)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu32(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_ps(__mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_ps(__m256 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi32(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi64(__m256i __X, __m256i __Y)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi32(void *__P, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi16(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepu32_pd(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_loadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_epi32(__m256i __a, __m256i __b)
#define _mm_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi64_mask(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_pd(__mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expand_pd(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sra_epi64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rolv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
#define _mm_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mov_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expand_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcast_f32x4(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
#define _mm256_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi64(__m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_andnot_epi32(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi16(__mmask8 __M, __m128i __A)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_store_epi64(void *__P, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu64(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rorv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_epi32(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ps(__m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i32x4(__m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_pd(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ps(__m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu32(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_getexp_ps(__m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_mov_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi32(__mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi64_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu32(__m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi8(__m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_compress_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi32(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi32(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_load_epi32(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rolv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi32(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi64(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi8(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __imm)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi32(void *__P, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ps(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi64(void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu32(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_store_epi64(void *__P, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
#define _mm256_cmpneq_epi64_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi32(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
#define _mm_cmpneq_epi32_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi32(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rorv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi32(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_ps(__mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi16(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_xor_epi64(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_pd(__mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_add_ps(__m256 __a, __m256 __b)
Adds two 256-bit vectors of [8 x float].
Definition: avxintrin.h:109
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvtpd_epi32(__m256d __a)
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x i32].
Definition: avxintrin.h:2301
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpacklo_pd(__m256d __a, __m256d __b)
Unpacks the even-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves the...
Definition: avxintrin.h:2488
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sqrt_ps(__m256 __a)
Calculates the square roots of the values in a 256-bit vector of [8 x float].
Definition: avxintrin.h:373
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_cvtepi32_ps(__m256i __a)
Converts a vector of [8 x i32] into a vector of [8 x float].
Definition: avxintrin.h:2209
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_moveldup_ps(__m256 __a)
Moves and duplicates even-indexed values from a 256-bit vector of [8 x float] to float values in a 25...
Definition: avxintrin.h:2421
static __inline __m128 __DEFAULT_FN_ATTRS _mm256_cvtpd_ps(__m256d __a)
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x float].
Definition: avxintrin.h:2225
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_permutevar_ps(__m256 __a, __m256i __c)
Copies the values stored in a 256-bit vector of [8 x float] as specified by the 256-bit integer vecto...
Definition: avxintrin.h:999
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtps_pd(__m128 __a)
Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4 x double].
Definition: avxintrin.h:2260
static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_permutevar_ps(__m128 __a, __m128i __c)
Copies the values stored in a 128-bit vector of [4 x float] as specified by the 128-bit integer vecto...
Definition: avxintrin.h:908
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32(__m256d __a)
Converts a 256-bit vector of [4 x double] into four signed truncated (rounded toward zero) 32-bit int...
Definition: avxintrin.h:2281
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvtps_epi32(__m256 __a)
Converts a vector of [8 x float] into a vector of [8 x i32].
Definition: avxintrin.h:2244
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi64x(long long __q)
Constructs a 256-bit integer vector of [4 x i64], with each of the 64-bit integral vector elements se...
Definition: avxintrin.h:4328
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_min_ps(__m256 __a, __m256 __b)
Compares two 256-bit vectors of [8 x float] and returns the lesser of each pair of values.
Definition: avxintrin.h:303
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32(__m256 __a)
Converts a vector of [8 x float] into eight signed truncated (rounded toward zero) 32-bit integers re...
Definition: avxintrin.h:2321
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sub_ps(__m256 __a, __m256 __b)
Subtracts two 256-bit vectors of [8 x float].
Definition: avxintrin.h:145
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_max_ps(__m256 __a, __m256 __b)
Compares two 256-bit vectors of [8 x float] and returns the greater of each pair of values.
Definition: avxintrin.h:261
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition: avxintrin.h:4354
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_movehdup_ps(__m256 __a)
Moves and duplicates odd-indexed values from a 256-bit vector of [8 x float] to float values in a 256...
Definition: avxintrin.h:2396
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_div_pd(__m256d __a, __m256d __b)
Divides two 256-bit vectors of [4 x double].
Definition: avxintrin.h:201
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sqrt_pd(__m256d __a)
Calculates the square roots of the values in a 256-bit vector of [4 x double].
Definition: avxintrin.h:356
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_mul_pd(__m256d __a, __m256d __b)
Multiplies two 256-bit vectors of [4 x double].
Definition: avxintrin.h:321
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtepi32_pd(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x double].
Definition: avxintrin.h:2194
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpackhi_pd(__m256d __a, __m256d __b)
Unpacks the odd-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves them...
Definition: avxintrin.h:2466
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sub_pd(__m256d __a, __m256d __b)
Subtracts two 256-bit vectors of [4 x double].
Definition: avxintrin.h:127
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_movedup_pd(__m256d __a)
Moves and duplicates double-precision floating point values from a 256-bit vector of [4 x double] to ...
Definition: avxintrin.h:2443
static __inline __m128d __DEFAULT_FN_ATTRS128 _mm_permutevar_pd(__m128d __a, __m128i __c)
Copies the values in a 128-bit vector of [2 x double] as specified by the 128-bit integer vector oper...
Definition: avxintrin.h:814
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition: avxintrin.h:4342
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_div_ps(__m256 __a, __m256 __b)
Divides two 256-bit vectors of [8 x float].
Definition: avxintrin.h:219
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_min_pd(__m256d __a, __m256d __b)
Compares two 256-bit vectors of [4 x double] and returns the lesser of each pair of values.
Definition: avxintrin.h:282
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition: avxintrin.h:4366
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpackhi_ps(__m256 __a, __m256 __b)
Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the two 256-bit vectors of [8 x float] ...
Definition: avxintrin.h:2515
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_mul_ps(__m256 __a, __m256 __b)
Multiplies two 256-bit vectors of [8 x float].
Definition: avxintrin.h:339
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpacklo_ps(__m256 __a, __m256 __b)
Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the two 256-bit vectors of [8 x float] ...
Definition: avxintrin.h:2542
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_permutevar_pd(__m256d __a, __m256i __c)
Copies the values in a 256-bit vector of [4 x double] as specified by the 256-bit integer vector oper...
Definition: avxintrin.h:853
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_max_pd(__m256d __a, __m256d __b)
Compares two 256-bit vectors of [4 x double] and returns the greater of each pair of values.
Definition: avxintrin.h:240
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi32(int __i)
Constructs a 256-bit integer vector of [8 x i32], with each of the 32-bit integral vector elements se...
Definition: avxintrin.h:4271
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_add_pd(__m256d __a, __m256d __b)
Adds two 256-bit vectors of [4 x double].
Definition: avxintrin.h:91
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_pd(__m128d __a, __m128d __b)
Performs an element-by-element division of two 128-bit vectors of [2 x double].
Definition: emmintrin.h:227
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, __m128i __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2828
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
Definition: emmintrin.h:2679
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, int __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:3019
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns a vecto...
Definition: emmintrin.h:313
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, __m128i __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:3037
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_epi32(__m128i __a, __m128i __b)
Subtracts the corresponding 32-bit integer values in the operands.
Definition: emmintrin.h:2543
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi32_pd(__m128i __a)
Converts the lower two integer elements of a 128-bit vector of [4 x i32] into two double-precision fl...
Definition: emmintrin.h:1332
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition: emmintrin.h:3880
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, __m128i __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them into...
Definition: emmintrin.h:4597
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, int __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2921
static __inline__ void int __a
Definition: emmintrin.h:4079
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, __m128i __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them int...
Definition: emmintrin.h:4490
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_epi64(__m128i __a, __m128i __b)
Subtracts the corresponding elements of two [2 x i64] vectors.
Definition: emmintrin.h:2577
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, __m128i __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x i32] and interleaves them i...
Definition: emmintrin.h:4576
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a)
Calculates the square root of the each of two values stored in a 128-bit vector of [2 x double].
Definition: emmintrin.h:268
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_pd(__m128d __a, __m128d __b)
Multiplies two 128-bit vectors of [2 x double].
Definition: emmintrin.h:186
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition: emmintrin.h:3495
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi64x(long long __q)
Initializes both values in a 128-bit integer vector with the specified 64-bit integer value.
Definition: emmintrin.h:3676
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, int __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:3055
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, __m128i __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x i32] and interleaves them ...
Definition: emmintrin.h:4469
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, __m128i __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2864
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition: emmintrin.h:1876
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, int __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2810
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_epi32(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [4 x i32], saving the lower 32 bits of each...
Definition: emmintrin.h:2114
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, __m128i __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the corresponding elements o...
Definition: emmintrin.h:2470
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, __m128i __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:3073
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtps_pd(__m128 __a)
Converts the lower two single-precision floating-point elements of a 128-bit vector of [4 x float] in...
Definition: emmintrin.h:1310
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a)
Converts a vector of [4 x float] into four signed truncated (rounded toward zero) 32-bit integers,...
Definition: emmintrin.h:3368
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, int __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2846
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_pd(__m128d __a, __m128d __b)
Adds two 128-bit vectors of [2 x double].
Definition: emmintrin.h:107
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpackhi_pd(__m128d __a, __m128d __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them ...
Definition: emmintrin.h:4667
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns a vecto...
Definition: emmintrin.h:359
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpacklo_pd(__m128d __a, __m128d __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them i...
Definition: emmintrin.h:4687
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_epi64(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [2 x i64], saving the lower 64 bits of each...
Definition: emmintrin.h:2151
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi32_ps(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x float].
Definition: emmintrin.h:3331
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value.
Definition: emmintrin.h:3711
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, __m128i __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2940
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_pd(__m128d __a, __m128d __b)
Subtracts two 128-bit vectors of [2 x double].
Definition: emmintrin.h:147
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32].
Definition: emmintrin.h:3349
struct __storeu_i16 *__P __v
Definition: immintrin.h:472
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movedup_pd(__m128d __a)
Moves and duplicates the double-precision value in the lower bits of a 128-bit vector of [2 x double]...
Definition: pmmintrin.h:260
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_moveldup_ps(__m128 __a)
Duplicates even-indexed values from a 128-bit vector of [4 x float] to float values stored in a 128-b...
Definition: pmmintrin.h:159
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movehdup_ps(__m128 __a)
Moves and duplicates odd-indexed values from a 128-bit vector of [4 x float] to float values stored i...
Definition: pmmintrin.h:138
__inline unsigned int unsigned int unsigned int * __P
Definition: bmi2intrin.h:25
__inline unsigned int unsigned int __Y
Definition: bmi2intrin.h:19
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V)
Zero-extends each of the lower four 8-bit integer elements of a 128-bit vector of [16 x i8] to 32-bit...
Definition: smmintrin.h:1364
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1, __m128i __V2)
Multiples corresponding elements of two 128-bit vectors of [4 x i32] and returns the lower 32 bits of...
Definition: smmintrin.h:545
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x u32] and returns a 128-bit vector ...
Definition: smmintrin.h:798
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi32(__m128i __V)
Sign-extends each of the lower four 8-bit integer elements of a 128-bit vector of [16 x i8] to 32-bit...
Definition: smmintrin.h:1249
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V)
Zero-extends each of the lower two 32-bit integer elements of a 128-bit integer vector of [4 x i32] t...
Definition: smmintrin.h:1436
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V)
Zero-extends each of the lower two 16-bit integer elements of a 128-bit integer vector of [8 x i16] t...
Definition: smmintrin.h:1418
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x i32] and returns a 128-bit vector ...
Definition: smmintrin.h:744
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi64(__m128i __V)
Sign-extends each of the lower two 16-bit integer elements of a 128-bit integer vector of [8 x i16] t...
Definition: smmintrin.h:1307
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi64(__m128i __V)
Sign-extends each of the lower two 8-bit integer elements of a 128-bit integer vector of [16 x i8] to...
Definition: smmintrin.h:1269
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi32(__m128i __V)
Sign-extends each of the lower four 16-bit integer elements of a 128-bit integer vector of [8 x i16] ...
Definition: smmintrin.h:1289
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V)
Zero-extends each of the lower four 16-bit integer elements of a 128-bit integer vector of [8 x i16] ...
Definition: smmintrin.h:1400
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x i32] and returns a 128-bit vector ...
Definition: smmintrin.h:762
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V)
Zero-extends each of the lower two 8-bit integer elements of a 128-bit integer vector of [16 x i8] to...
Definition: smmintrin.h:1382
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x u32] and returns a 128-bit vector ...
Definition: smmintrin.h:780
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1, __m128i __V2)
Multiplies corresponding even-indexed elements of two 128-bit vectors of [4 x i32] and returns a 128-...
Definition: smmintrin.h:564
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi64(__m128i __V)
Sign-extends each of the lower two 32-bit integer elements of a 128-bit integer vector of [4 x i32] t...
Definition: smmintrin.h:1325
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition: tmmintrin.h:139
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpacklo_ps(__m128 __a, __m128 __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x float] and interleaves them...
Definition: xmmintrin.h:2797
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_ps(__m128 __a, __m128 __b)
Adds two 128-bit vectors of [4 x float], and returns the results of the addition.
Definition: xmmintrin.h:106
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpackhi_ps(__m128 __a, __m128 __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x float] and interleaves the...
Definition: xmmintrin.h:2776
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_ps(__m128 __a, __m128 __b)
Divides two 128-bit vectors of [4 x float].
Definition: xmmintrin.h:226
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the greater of each pair of values.
Definition: xmmintrin.h:423
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the lesser of each pair of values.
Definition: xmmintrin.h:377
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ps(__m128 __a)
Calculates the square roots of the values stored in a 128-bit vector of [4 x float].
Definition: xmmintrin.h:260
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition: xmmintrin.h:2029
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_ps(__m128 __a, __m128 __b)
Subtracts each of the values of the second operand from the first operand, both of which are 128-bit ...
Definition: xmmintrin.h:147
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_ps(__m128 __a, __m128 __b)
Multiplies two 128-bit vectors of [4 x float] and returns the results of the multiplication.
Definition: xmmintrin.h:187