clang 20.0.0git
avx512vlintrin.h
Go to the documentation of this file.
1/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __AVX512VLINTRIN_H
15#define __AVX512VLINTRIN_H
16
17#define __DEFAULT_FN_ATTRS128 \
18 __attribute__((__always_inline__, __nodebug__, \
19 __target__("avx512vl,no-evex512"), \
20 __min_vector_width__(128)))
21#define __DEFAULT_FN_ATTRS256 \
22 __attribute__((__always_inline__, __nodebug__, \
23 __target__("avx512vl,no-evex512"), \
24 __min_vector_width__(256)))
25
26typedef short __v2hi __attribute__((__vector_size__(4)));
27typedef char __v4qi __attribute__((__vector_size__(4)));
28typedef char __v2qi __attribute__((__vector_size__(2)));
29
30/* Integer compare */
31
32#define _mm_cmpeq_epi32_mask(A, B) \
33 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
34#define _mm_mask_cmpeq_epi32_mask(k, A, B) \
35 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
36#define _mm_cmpge_epi32_mask(A, B) \
37 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
38#define _mm_mask_cmpge_epi32_mask(k, A, B) \
39 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
40#define _mm_cmpgt_epi32_mask(A, B) \
41 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
42#define _mm_mask_cmpgt_epi32_mask(k, A, B) \
43 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
44#define _mm_cmple_epi32_mask(A, B) \
45 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
46#define _mm_mask_cmple_epi32_mask(k, A, B) \
47 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
48#define _mm_cmplt_epi32_mask(A, B) \
49 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
50#define _mm_mask_cmplt_epi32_mask(k, A, B) \
51 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
52#define _mm_cmpneq_epi32_mask(A, B) \
53 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
54#define _mm_mask_cmpneq_epi32_mask(k, A, B) \
55 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
56
57#define _mm256_cmpeq_epi32_mask(A, B) \
58 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
59#define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
60 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
61#define _mm256_cmpge_epi32_mask(A, B) \
62 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
63#define _mm256_mask_cmpge_epi32_mask(k, A, B) \
64 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
65#define _mm256_cmpgt_epi32_mask(A, B) \
66 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
67#define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
68 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
69#define _mm256_cmple_epi32_mask(A, B) \
70 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
71#define _mm256_mask_cmple_epi32_mask(k, A, B) \
72 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
73#define _mm256_cmplt_epi32_mask(A, B) \
74 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
75#define _mm256_mask_cmplt_epi32_mask(k, A, B) \
76 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
77#define _mm256_cmpneq_epi32_mask(A, B) \
78 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
79#define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
80 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
81
82#define _mm_cmpeq_epu32_mask(A, B) \
83 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
84#define _mm_mask_cmpeq_epu32_mask(k, A, B) \
85 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
86#define _mm_cmpge_epu32_mask(A, B) \
87 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
88#define _mm_mask_cmpge_epu32_mask(k, A, B) \
89 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
90#define _mm_cmpgt_epu32_mask(A, B) \
91 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
92#define _mm_mask_cmpgt_epu32_mask(k, A, B) \
93 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
94#define _mm_cmple_epu32_mask(A, B) \
95 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
96#define _mm_mask_cmple_epu32_mask(k, A, B) \
97 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
98#define _mm_cmplt_epu32_mask(A, B) \
99 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
100#define _mm_mask_cmplt_epu32_mask(k, A, B) \
101 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
102#define _mm_cmpneq_epu32_mask(A, B) \
103 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
104#define _mm_mask_cmpneq_epu32_mask(k, A, B) \
105 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
106
107#define _mm256_cmpeq_epu32_mask(A, B) \
108 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
109#define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
110 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
111#define _mm256_cmpge_epu32_mask(A, B) \
112 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
113#define _mm256_mask_cmpge_epu32_mask(k, A, B) \
114 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
115#define _mm256_cmpgt_epu32_mask(A, B) \
116 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
117#define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
118 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
119#define _mm256_cmple_epu32_mask(A, B) \
120 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
121#define _mm256_mask_cmple_epu32_mask(k, A, B) \
122 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
123#define _mm256_cmplt_epu32_mask(A, B) \
124 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
125#define _mm256_mask_cmplt_epu32_mask(k, A, B) \
126 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
127#define _mm256_cmpneq_epu32_mask(A, B) \
128 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
129#define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
130 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
131
132#define _mm_cmpeq_epi64_mask(A, B) \
133 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
134#define _mm_mask_cmpeq_epi64_mask(k, A, B) \
135 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
136#define _mm_cmpge_epi64_mask(A, B) \
137 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
138#define _mm_mask_cmpge_epi64_mask(k, A, B) \
139 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
140#define _mm_cmpgt_epi64_mask(A, B) \
141 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
142#define _mm_mask_cmpgt_epi64_mask(k, A, B) \
143 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
144#define _mm_cmple_epi64_mask(A, B) \
145 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
146#define _mm_mask_cmple_epi64_mask(k, A, B) \
147 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
148#define _mm_cmplt_epi64_mask(A, B) \
149 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
150#define _mm_mask_cmplt_epi64_mask(k, A, B) \
151 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
152#define _mm_cmpneq_epi64_mask(A, B) \
153 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
154#define _mm_mask_cmpneq_epi64_mask(k, A, B) \
155 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
156
157#define _mm256_cmpeq_epi64_mask(A, B) \
158 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
159#define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
160 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
161#define _mm256_cmpge_epi64_mask(A, B) \
162 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
163#define _mm256_mask_cmpge_epi64_mask(k, A, B) \
164 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
165#define _mm256_cmpgt_epi64_mask(A, B) \
166 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
167#define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
168 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
169#define _mm256_cmple_epi64_mask(A, B) \
170 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
171#define _mm256_mask_cmple_epi64_mask(k, A, B) \
172 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
173#define _mm256_cmplt_epi64_mask(A, B) \
174 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
175#define _mm256_mask_cmplt_epi64_mask(k, A, B) \
176 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
177#define _mm256_cmpneq_epi64_mask(A, B) \
178 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
179#define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
180 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
181
182#define _mm_cmpeq_epu64_mask(A, B) \
183 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
184#define _mm_mask_cmpeq_epu64_mask(k, A, B) \
185 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
186#define _mm_cmpge_epu64_mask(A, B) \
187 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
188#define _mm_mask_cmpge_epu64_mask(k, A, B) \
189 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
190#define _mm_cmpgt_epu64_mask(A, B) \
191 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
192#define _mm_mask_cmpgt_epu64_mask(k, A, B) \
193 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
194#define _mm_cmple_epu64_mask(A, B) \
195 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
196#define _mm_mask_cmple_epu64_mask(k, A, B) \
197 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
198#define _mm_cmplt_epu64_mask(A, B) \
199 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
200#define _mm_mask_cmplt_epu64_mask(k, A, B) \
201 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
202#define _mm_cmpneq_epu64_mask(A, B) \
203 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
204#define _mm_mask_cmpneq_epu64_mask(k, A, B) \
205 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
206
207#define _mm256_cmpeq_epu64_mask(A, B) \
208 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
209#define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
210 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
211#define _mm256_cmpge_epu64_mask(A, B) \
212 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
213#define _mm256_mask_cmpge_epu64_mask(k, A, B) \
214 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
215#define _mm256_cmpgt_epu64_mask(A, B) \
216 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
217#define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
218 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
219#define _mm256_cmple_epu64_mask(A, B) \
220 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
221#define _mm256_mask_cmple_epu64_mask(k, A, B) \
222 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
223#define _mm256_cmplt_epu64_mask(A, B) \
224 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
225#define _mm256_mask_cmplt_epu64_mask(k, A, B) \
226 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
227#define _mm256_cmpneq_epu64_mask(A, B) \
228 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
229#define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
230 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
231
232static __inline__ __m256i __DEFAULT_FN_ATTRS256
233_mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
234{
235 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
236 (__v8si)_mm256_add_epi32(__A, __B),
237 (__v8si)__W);
238}
239
240static __inline__ __m256i __DEFAULT_FN_ATTRS256
241_mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
242{
243 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
244 (__v8si)_mm256_add_epi32(__A, __B),
245 (__v8si)_mm256_setzero_si256());
246}
247
248static __inline__ __m256i __DEFAULT_FN_ATTRS256
249_mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
250{
251 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
252 (__v4di)_mm256_add_epi64(__A, __B),
253 (__v4di)__W);
254}
255
256static __inline__ __m256i __DEFAULT_FN_ATTRS256
257_mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
258{
259 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
260 (__v4di)_mm256_add_epi64(__A, __B),
261 (__v4di)_mm256_setzero_si256());
262}
263
264static __inline__ __m256i __DEFAULT_FN_ATTRS256
265_mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
266{
267 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
268 (__v8si)_mm256_sub_epi32(__A, __B),
269 (__v8si)__W);
270}
271
272static __inline__ __m256i __DEFAULT_FN_ATTRS256
273_mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
274{
275 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
276 (__v8si)_mm256_sub_epi32(__A, __B),
277 (__v8si)_mm256_setzero_si256());
278}
279
280static __inline__ __m256i __DEFAULT_FN_ATTRS256
281_mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
282{
283 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
284 (__v4di)_mm256_sub_epi64(__A, __B),
285 (__v4di)__W);
286}
287
288static __inline__ __m256i __DEFAULT_FN_ATTRS256
289_mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
290{
291 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
292 (__v4di)_mm256_sub_epi64(__A, __B),
293 (__v4di)_mm256_setzero_si256());
294}
295
296static __inline__ __m128i __DEFAULT_FN_ATTRS128
297_mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
298{
299 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
300 (__v4si)_mm_add_epi32(__A, __B),
301 (__v4si)__W);
302}
303
304static __inline__ __m128i __DEFAULT_FN_ATTRS128
305_mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
306{
307 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
308 (__v4si)_mm_add_epi32(__A, __B),
309 (__v4si)_mm_setzero_si128());
310}
311
312static __inline__ __m128i __DEFAULT_FN_ATTRS128
313_mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
314{
315 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
316 (__v2di)_mm_add_epi64(__A, __B),
317 (__v2di)__W);
318}
319
320static __inline__ __m128i __DEFAULT_FN_ATTRS128
321_mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
322{
323 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
324 (__v2di)_mm_add_epi64(__A, __B),
325 (__v2di)_mm_setzero_si128());
326}
327
328static __inline__ __m128i __DEFAULT_FN_ATTRS128
329_mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
330{
331 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
332 (__v4si)_mm_sub_epi32(__A, __B),
333 (__v4si)__W);
334}
335
336static __inline__ __m128i __DEFAULT_FN_ATTRS128
337_mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
338{
339 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
340 (__v4si)_mm_sub_epi32(__A, __B),
341 (__v4si)_mm_setzero_si128());
342}
343
344static __inline__ __m128i __DEFAULT_FN_ATTRS128
345_mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
346{
347 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
348 (__v2di)_mm_sub_epi64(__A, __B),
349 (__v2di)__W);
350}
351
352static __inline__ __m128i __DEFAULT_FN_ATTRS128
353_mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
354{
355 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
356 (__v2di)_mm_sub_epi64(__A, __B),
357 (__v2di)_mm_setzero_si128());
358}
359
360static __inline__ __m256i __DEFAULT_FN_ATTRS256
361_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
362{
363 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
364 (__v4di)_mm256_mul_epi32(__X, __Y),
365 (__v4di)__W);
366}
367
368static __inline__ __m256i __DEFAULT_FN_ATTRS256
369_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
370{
371 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
372 (__v4di)_mm256_mul_epi32(__X, __Y),
373 (__v4di)_mm256_setzero_si256());
374}
375
376static __inline__ __m128i __DEFAULT_FN_ATTRS128
377_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
378{
379 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
380 (__v2di)_mm_mul_epi32(__X, __Y),
381 (__v2di)__W);
382}
383
384static __inline__ __m128i __DEFAULT_FN_ATTRS128
385_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
386{
387 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
388 (__v2di)_mm_mul_epi32(__X, __Y),
389 (__v2di)_mm_setzero_si128());
390}
391
392static __inline__ __m256i __DEFAULT_FN_ATTRS256
393_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
394{
395 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
396 (__v4di)_mm256_mul_epu32(__X, __Y),
397 (__v4di)__W);
398}
399
400static __inline__ __m256i __DEFAULT_FN_ATTRS256
401_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
402{
403 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
404 (__v4di)_mm256_mul_epu32(__X, __Y),
405 (__v4di)_mm256_setzero_si256());
406}
407
408static __inline__ __m128i __DEFAULT_FN_ATTRS128
409_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
410{
411 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
412 (__v2di)_mm_mul_epu32(__X, __Y),
413 (__v2di)__W);
414}
415
416static __inline__ __m128i __DEFAULT_FN_ATTRS128
417_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
418{
419 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
420 (__v2di)_mm_mul_epu32(__X, __Y),
421 (__v2di)_mm_setzero_si128());
422}
423
424static __inline__ __m256i __DEFAULT_FN_ATTRS256
425_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
426{
427 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
428 (__v8si)_mm256_mullo_epi32(__A, __B),
429 (__v8si)_mm256_setzero_si256());
430}
431
432static __inline__ __m256i __DEFAULT_FN_ATTRS256
433_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
434{
435 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
436 (__v8si)_mm256_mullo_epi32(__A, __B),
437 (__v8si)__W);
438}
439
440static __inline__ __m128i __DEFAULT_FN_ATTRS128
441_mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
442{
443 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
444 (__v4si)_mm_mullo_epi32(__A, __B),
445 (__v4si)_mm_setzero_si128());
446}
447
448static __inline__ __m128i __DEFAULT_FN_ATTRS128
449_mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
450{
451 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
452 (__v4si)_mm_mullo_epi32(__A, __B),
453 (__v4si)__W);
454}
455
456static __inline__ __m256i __DEFAULT_FN_ATTRS256
457_mm256_and_epi32(__m256i __a, __m256i __b)
458{
459 return (__m256i)((__v8su)__a & (__v8su)__b);
460}
461
462static __inline__ __m256i __DEFAULT_FN_ATTRS256
463_mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
464{
465 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
466 (__v8si)_mm256_and_epi32(__A, __B),
467 (__v8si)__W);
468}
469
470static __inline__ __m256i __DEFAULT_FN_ATTRS256
471_mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
472{
473 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
474}
475
476static __inline__ __m128i __DEFAULT_FN_ATTRS128
477_mm_and_epi32(__m128i __a, __m128i __b)
478{
479 return (__m128i)((__v4su)__a & (__v4su)__b);
480}
481
482static __inline__ __m128i __DEFAULT_FN_ATTRS128
483_mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
484{
485 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
486 (__v4si)_mm_and_epi32(__A, __B),
487 (__v4si)__W);
488}
489
490static __inline__ __m128i __DEFAULT_FN_ATTRS128
491_mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
492{
493 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
494}
495
496static __inline__ __m256i __DEFAULT_FN_ATTRS256
497_mm256_andnot_epi32(__m256i __A, __m256i __B)
498{
499 return (__m256i)(~(__v8su)__A & (__v8su)__B);
500}
501
502static __inline__ __m256i __DEFAULT_FN_ATTRS256
503_mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
504{
505 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
506 (__v8si)_mm256_andnot_epi32(__A, __B),
507 (__v8si)__W);
508}
509
510static __inline__ __m256i __DEFAULT_FN_ATTRS256
511_mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
512{
514 __U, __A, __B);
515}
516
517static __inline__ __m128i __DEFAULT_FN_ATTRS128
518_mm_andnot_epi32(__m128i __A, __m128i __B)
519{
520 return (__m128i)(~(__v4su)__A & (__v4su)__B);
521}
522
523static __inline__ __m128i __DEFAULT_FN_ATTRS128
524_mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
525{
526 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
527 (__v4si)_mm_andnot_epi32(__A, __B),
528 (__v4si)__W);
529}
530
531static __inline__ __m128i __DEFAULT_FN_ATTRS128
532_mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
533{
534 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
535}
536
537static __inline__ __m256i __DEFAULT_FN_ATTRS256
538_mm256_or_epi32(__m256i __a, __m256i __b)
539{
540 return (__m256i)((__v8su)__a | (__v8su)__b);
541}
542
543static __inline__ __m256i __DEFAULT_FN_ATTRS256
544_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
545{
546 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
547 (__v8si)_mm256_or_epi32(__A, __B),
548 (__v8si)__W);
549}
550
551static __inline__ __m256i __DEFAULT_FN_ATTRS256
552_mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
553{
554 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
555}
556
557static __inline__ __m128i __DEFAULT_FN_ATTRS128
558_mm_or_epi32(__m128i __a, __m128i __b)
559{
560 return (__m128i)((__v4su)__a | (__v4su)__b);
561}
562
563static __inline__ __m128i __DEFAULT_FN_ATTRS128
564_mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
565{
566 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
567 (__v4si)_mm_or_epi32(__A, __B),
568 (__v4si)__W);
569}
570
571static __inline__ __m128i __DEFAULT_FN_ATTRS128
572_mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
573{
574 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
575}
576
577static __inline__ __m256i __DEFAULT_FN_ATTRS256
578_mm256_xor_epi32(__m256i __a, __m256i __b)
579{
580 return (__m256i)((__v8su)__a ^ (__v8su)__b);
581}
582
583static __inline__ __m256i __DEFAULT_FN_ATTRS256
584_mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
585{
586 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
587 (__v8si)_mm256_xor_epi32(__A, __B),
588 (__v8si)__W);
589}
590
591static __inline__ __m256i __DEFAULT_FN_ATTRS256
592_mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
593{
594 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
595}
596
597static __inline__ __m128i __DEFAULT_FN_ATTRS128
598_mm_xor_epi32(__m128i __a, __m128i __b)
599{
600 return (__m128i)((__v4su)__a ^ (__v4su)__b);
601}
602
603static __inline__ __m128i __DEFAULT_FN_ATTRS128
604_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
605{
606 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
607 (__v4si)_mm_xor_epi32(__A, __B),
608 (__v4si)__W);
609}
610
611static __inline__ __m128i __DEFAULT_FN_ATTRS128
612_mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
613{
614 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
615}
616
617static __inline__ __m256i __DEFAULT_FN_ATTRS256
618_mm256_and_epi64(__m256i __a, __m256i __b)
619{
620 return (__m256i)((__v4du)__a & (__v4du)__b);
621}
622
623static __inline__ __m256i __DEFAULT_FN_ATTRS256
624_mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
625{
626 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
627 (__v4di)_mm256_and_epi64(__A, __B),
628 (__v4di)__W);
629}
630
631static __inline__ __m256i __DEFAULT_FN_ATTRS256
632_mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
633{
634 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
635}
636
637static __inline__ __m128i __DEFAULT_FN_ATTRS128
638_mm_and_epi64(__m128i __a, __m128i __b)
639{
640 return (__m128i)((__v2du)__a & (__v2du)__b);
641}
642
643static __inline__ __m128i __DEFAULT_FN_ATTRS128
644_mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
645{
646 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
647 (__v2di)_mm_and_epi64(__A, __B),
648 (__v2di)__W);
649}
650
651static __inline__ __m128i __DEFAULT_FN_ATTRS128
652_mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
653{
654 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
655}
656
657static __inline__ __m256i __DEFAULT_FN_ATTRS256
658_mm256_andnot_epi64(__m256i __A, __m256i __B)
659{
660 return (__m256i)(~(__v4du)__A & (__v4du)__B);
661}
662
663static __inline__ __m256i __DEFAULT_FN_ATTRS256
664_mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
665{
666 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
667 (__v4di)_mm256_andnot_epi64(__A, __B),
668 (__v4di)__W);
669}
670
671static __inline__ __m256i __DEFAULT_FN_ATTRS256
672_mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
673{
675 __U, __A, __B);
676}
677
678static __inline__ __m128i __DEFAULT_FN_ATTRS128
679_mm_andnot_epi64(__m128i __A, __m128i __B)
680{
681 return (__m128i)(~(__v2du)__A & (__v2du)__B);
682}
683
684static __inline__ __m128i __DEFAULT_FN_ATTRS128
685_mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
686{
687 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
688 (__v2di)_mm_andnot_epi64(__A, __B),
689 (__v2di)__W);
690}
691
692static __inline__ __m128i __DEFAULT_FN_ATTRS128
693_mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
694{
695 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
696}
697
698static __inline__ __m256i __DEFAULT_FN_ATTRS256
699_mm256_or_epi64(__m256i __a, __m256i __b)
700{
701 return (__m256i)((__v4du)__a | (__v4du)__b);
702}
703
704static __inline__ __m256i __DEFAULT_FN_ATTRS256
705_mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
706{
707 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
708 (__v4di)_mm256_or_epi64(__A, __B),
709 (__v4di)__W);
710}
711
712static __inline__ __m256i __DEFAULT_FN_ATTRS256
713_mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
714{
715 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
716}
717
718static __inline__ __m128i __DEFAULT_FN_ATTRS128
719_mm_or_epi64(__m128i __a, __m128i __b)
720{
721 return (__m128i)((__v2du)__a | (__v2du)__b);
722}
723
724static __inline__ __m128i __DEFAULT_FN_ATTRS128
725_mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
726{
727 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
728 (__v2di)_mm_or_epi64(__A, __B),
729 (__v2di)__W);
730}
731
732static __inline__ __m128i __DEFAULT_FN_ATTRS128
733_mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
734{
735 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
736}
737
738static __inline__ __m256i __DEFAULT_FN_ATTRS256
739_mm256_xor_epi64(__m256i __a, __m256i __b)
740{
741 return (__m256i)((__v4du)__a ^ (__v4du)__b);
742}
743
744static __inline__ __m256i __DEFAULT_FN_ATTRS256
745_mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
746{
747 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
748 (__v4di)_mm256_xor_epi64(__A, __B),
749 (__v4di)__W);
750}
751
752static __inline__ __m256i __DEFAULT_FN_ATTRS256
753_mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
754{
755 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
756}
757
758static __inline__ __m128i __DEFAULT_FN_ATTRS128
759_mm_xor_epi64(__m128i __a, __m128i __b)
760{
761 return (__m128i)((__v2du)__a ^ (__v2du)__b);
762}
763
764static __inline__ __m128i __DEFAULT_FN_ATTRS128
765_mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
766 __m128i __B)
767{
768 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
769 (__v2di)_mm_xor_epi64(__A, __B),
770 (__v2di)__W);
771}
772
773static __inline__ __m128i __DEFAULT_FN_ATTRS128
774_mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
775{
776 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
777}
778
779#define _mm_cmp_epi32_mask(a, b, p) \
780 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
781 (__v4si)(__m128i)(b), (int)(p), \
782 (__mmask8)-1))
783
784#define _mm_mask_cmp_epi32_mask(m, a, b, p) \
785 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
786 (__v4si)(__m128i)(b), (int)(p), \
787 (__mmask8)(m)))
788
789#define _mm_cmp_epu32_mask(a, b, p) \
790 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
791 (__v4si)(__m128i)(b), (int)(p), \
792 (__mmask8)-1))
793
794#define _mm_mask_cmp_epu32_mask(m, a, b, p) \
795 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
796 (__v4si)(__m128i)(b), (int)(p), \
797 (__mmask8)(m)))
798
799#define _mm256_cmp_epi32_mask(a, b, p) \
800 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
801 (__v8si)(__m256i)(b), (int)(p), \
802 (__mmask8)-1))
803
804#define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
805 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
806 (__v8si)(__m256i)(b), (int)(p), \
807 (__mmask8)(m)))
808
809#define _mm256_cmp_epu32_mask(a, b, p) \
810 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
811 (__v8si)(__m256i)(b), (int)(p), \
812 (__mmask8)-1))
813
814#define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
815 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
816 (__v8si)(__m256i)(b), (int)(p), \
817 (__mmask8)(m)))
818
819#define _mm_cmp_epi64_mask(a, b, p) \
820 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
821 (__v2di)(__m128i)(b), (int)(p), \
822 (__mmask8)-1))
823
824#define _mm_mask_cmp_epi64_mask(m, a, b, p) \
825 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
826 (__v2di)(__m128i)(b), (int)(p), \
827 (__mmask8)(m)))
828
829#define _mm_cmp_epu64_mask(a, b, p) \
830 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
831 (__v2di)(__m128i)(b), (int)(p), \
832 (__mmask8)-1))
833
834#define _mm_mask_cmp_epu64_mask(m, a, b, p) \
835 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
836 (__v2di)(__m128i)(b), (int)(p), \
837 (__mmask8)(m)))
838
839#define _mm256_cmp_epi64_mask(a, b, p) \
840 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
841 (__v4di)(__m256i)(b), (int)(p), \
842 (__mmask8)-1))
843
844#define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
845 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
846 (__v4di)(__m256i)(b), (int)(p), \
847 (__mmask8)(m)))
848
849#define _mm256_cmp_epu64_mask(a, b, p) \
850 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
851 (__v4di)(__m256i)(b), (int)(p), \
852 (__mmask8)-1))
853
854#define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
855 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
856 (__v4di)(__m256i)(b), (int)(p), \
857 (__mmask8)(m)))
858
859#define _mm256_cmp_ps_mask(a, b, p) \
860 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
861 (__v8sf)(__m256)(b), (int)(p), \
862 (__mmask8)-1))
863
864#define _mm256_mask_cmp_ps_mask(m, a, b, p) \
865 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
866 (__v8sf)(__m256)(b), (int)(p), \
867 (__mmask8)(m)))
868
869#define _mm256_cmp_pd_mask(a, b, p) \
870 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
871 (__v4df)(__m256d)(b), (int)(p), \
872 (__mmask8)-1))
873
874#define _mm256_mask_cmp_pd_mask(m, a, b, p) \
875 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
876 (__v4df)(__m256d)(b), (int)(p), \
877 (__mmask8)(m)))
878
879#define _mm_cmp_ps_mask(a, b, p) \
880 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
881 (__v4sf)(__m128)(b), (int)(p), \
882 (__mmask8)-1))
883
884#define _mm_mask_cmp_ps_mask(m, a, b, p) \
885 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
886 (__v4sf)(__m128)(b), (int)(p), \
887 (__mmask8)(m)))
888
889#define _mm_cmp_pd_mask(a, b, p) \
890 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
891 (__v2df)(__m128d)(b), (int)(p), \
892 (__mmask8)-1))
893
894#define _mm_mask_cmp_pd_mask(m, a, b, p) \
895 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
896 (__v2df)(__m128d)(b), (int)(p), \
897 (__mmask8)(m)))
898
899static __inline__ __m128d __DEFAULT_FN_ATTRS128
900_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
901{
902 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
903 __builtin_ia32_vfmaddpd ((__v2df) __A,
904 (__v2df) __B,
905 (__v2df) __C),
906 (__v2df) __A);
907}
908
909static __inline__ __m128d __DEFAULT_FN_ATTRS128
910_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
911{
912 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
913 __builtin_ia32_vfmaddpd ((__v2df) __A,
914 (__v2df) __B,
915 (__v2df) __C),
916 (__v2df) __C);
917}
918
919static __inline__ __m128d __DEFAULT_FN_ATTRS128
920_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
921{
922 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
923 __builtin_ia32_vfmaddpd ((__v2df) __A,
924 (__v2df) __B,
925 (__v2df) __C),
926 (__v2df)_mm_setzero_pd());
927}
928
929static __inline__ __m128d __DEFAULT_FN_ATTRS128
930_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
931{
932 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
933 __builtin_ia32_vfmaddpd ((__v2df) __A,
934 (__v2df) __B,
935 -(__v2df) __C),
936 (__v2df) __A);
937}
938
939static __inline__ __m128d __DEFAULT_FN_ATTRS128
940_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
941{
942 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
943 __builtin_ia32_vfmaddpd ((__v2df) __A,
944 (__v2df) __B,
945 -(__v2df) __C),
946 (__v2df)_mm_setzero_pd());
947}
948
949static __inline__ __m128d __DEFAULT_FN_ATTRS128
950_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
951{
952 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
953 __builtin_ia32_vfmaddpd (-(__v2df) __A,
954 (__v2df) __B,
955 (__v2df) __C),
956 (__v2df) __C);
957}
958
959static __inline__ __m128d __DEFAULT_FN_ATTRS128
960_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
961{
962 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
963 __builtin_ia32_vfmaddpd (-(__v2df) __A,
964 (__v2df) __B,
965 (__v2df) __C),
966 (__v2df)_mm_setzero_pd());
967}
968
969static __inline__ __m128d __DEFAULT_FN_ATTRS128
970_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
971{
972 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
973 __builtin_ia32_vfmaddpd (-(__v2df) __A,
974 (__v2df) __B,
975 -(__v2df) __C),
976 (__v2df)_mm_setzero_pd());
977}
978
979static __inline__ __m256d __DEFAULT_FN_ATTRS256
980_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
981{
982 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
983 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
984 (__v4df) __B,
985 (__v4df) __C),
986 (__v4df) __A);
987}
988
989static __inline__ __m256d __DEFAULT_FN_ATTRS256
990_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
991{
992 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
993 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
994 (__v4df) __B,
995 (__v4df) __C),
996 (__v4df) __C);
997}
998
999static __inline__ __m256d __DEFAULT_FN_ATTRS256
1000_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1001{
1002 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1003 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1004 (__v4df) __B,
1005 (__v4df) __C),
1006 (__v4df)_mm256_setzero_pd());
1007}
1008
1009static __inline__ __m256d __DEFAULT_FN_ATTRS256
1010_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1011{
1012 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1013 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1014 (__v4df) __B,
1015 -(__v4df) __C),
1016 (__v4df) __A);
1017}
1018
1019static __inline__ __m256d __DEFAULT_FN_ATTRS256
1020_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1021{
1022 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1023 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1024 (__v4df) __B,
1025 -(__v4df) __C),
1026 (__v4df)_mm256_setzero_pd());
1027}
1028
1029static __inline__ __m256d __DEFAULT_FN_ATTRS256
1030_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1031{
1032 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1033 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1034 (__v4df) __B,
1035 (__v4df) __C),
1036 (__v4df) __C);
1037}
1038
1039static __inline__ __m256d __DEFAULT_FN_ATTRS256
1040_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1041{
1042 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1043 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1044 (__v4df) __B,
1045 (__v4df) __C),
1046 (__v4df)_mm256_setzero_pd());
1047}
1048
1049static __inline__ __m256d __DEFAULT_FN_ATTRS256
1050_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1051{
1052 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1053 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1054 (__v4df) __B,
1055 -(__v4df) __C),
1056 (__v4df)_mm256_setzero_pd());
1057}
1058
1059static __inline__ __m128 __DEFAULT_FN_ATTRS128
1060_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1061{
1062 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1063 __builtin_ia32_vfmaddps ((__v4sf) __A,
1064 (__v4sf) __B,
1065 (__v4sf) __C),
1066 (__v4sf) __A);
1067}
1068
1069static __inline__ __m128 __DEFAULT_FN_ATTRS128
1070_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1071{
1072 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1073 __builtin_ia32_vfmaddps ((__v4sf) __A,
1074 (__v4sf) __B,
1075 (__v4sf) __C),
1076 (__v4sf) __C);
1077}
1078
1079static __inline__ __m128 __DEFAULT_FN_ATTRS128
1080_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1081{
1082 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1083 __builtin_ia32_vfmaddps ((__v4sf) __A,
1084 (__v4sf) __B,
1085 (__v4sf) __C),
1086 (__v4sf)_mm_setzero_ps());
1087}
1088
1089static __inline__ __m128 __DEFAULT_FN_ATTRS128
1090_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1091{
1092 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1093 __builtin_ia32_vfmaddps ((__v4sf) __A,
1094 (__v4sf) __B,
1095 -(__v4sf) __C),
1096 (__v4sf) __A);
1097}
1098
1099static __inline__ __m128 __DEFAULT_FN_ATTRS128
1100_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1101{
1102 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1103 __builtin_ia32_vfmaddps ((__v4sf) __A,
1104 (__v4sf) __B,
1105 -(__v4sf) __C),
1106 (__v4sf)_mm_setzero_ps());
1107}
1108
1109static __inline__ __m128 __DEFAULT_FN_ATTRS128
1110_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1111{
1112 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1113 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1114 (__v4sf) __B,
1115 (__v4sf) __C),
1116 (__v4sf) __C);
1117}
1118
1119static __inline__ __m128 __DEFAULT_FN_ATTRS128
1120_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1121{
1122 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1123 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1124 (__v4sf) __B,
1125 (__v4sf) __C),
1126 (__v4sf)_mm_setzero_ps());
1127}
1128
1129static __inline__ __m128 __DEFAULT_FN_ATTRS128
1130_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1131{
1132 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1133 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1134 (__v4sf) __B,
1135 -(__v4sf) __C),
1136 (__v4sf)_mm_setzero_ps());
1137}
1138
1139static __inline__ __m256 __DEFAULT_FN_ATTRS256
1140_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1141{
1142 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1143 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1144 (__v8sf) __B,
1145 (__v8sf) __C),
1146 (__v8sf) __A);
1147}
1148
1149static __inline__ __m256 __DEFAULT_FN_ATTRS256
1150_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1151{
1152 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1153 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1154 (__v8sf) __B,
1155 (__v8sf) __C),
1156 (__v8sf) __C);
1157}
1158
1159static __inline__ __m256 __DEFAULT_FN_ATTRS256
1160_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1161{
1162 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1163 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1164 (__v8sf) __B,
1165 (__v8sf) __C),
1166 (__v8sf)_mm256_setzero_ps());
1167}
1168
1169static __inline__ __m256 __DEFAULT_FN_ATTRS256
1170_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1171{
1172 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1173 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1174 (__v8sf) __B,
1175 -(__v8sf) __C),
1176 (__v8sf) __A);
1177}
1178
1179static __inline__ __m256 __DEFAULT_FN_ATTRS256
1180_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1181{
1182 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1183 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1184 (__v8sf) __B,
1185 -(__v8sf) __C),
1186 (__v8sf)_mm256_setzero_ps());
1187}
1188
1189static __inline__ __m256 __DEFAULT_FN_ATTRS256
1190_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1191{
1192 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1193 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1194 (__v8sf) __B,
1195 (__v8sf) __C),
1196 (__v8sf) __C);
1197}
1198
1199static __inline__ __m256 __DEFAULT_FN_ATTRS256
1200_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1201{
1202 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1203 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1204 (__v8sf) __B,
1205 (__v8sf) __C),
1206 (__v8sf)_mm256_setzero_ps());
1207}
1208
1209static __inline__ __m256 __DEFAULT_FN_ATTRS256
1210_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1211{
1212 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1213 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1214 (__v8sf) __B,
1215 -(__v8sf) __C),
1216 (__v8sf)_mm256_setzero_ps());
1217}
1218
1219static __inline__ __m128d __DEFAULT_FN_ATTRS128
1220_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1221{
1222 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1223 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1224 (__v2df) __B,
1225 (__v2df) __C),
1226 (__v2df) __A);
1227}
1228
1229static __inline__ __m128d __DEFAULT_FN_ATTRS128
1230_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1231{
1232 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1233 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1234 (__v2df) __B,
1235 (__v2df) __C),
1236 (__v2df) __C);
1237}
1238
1239static __inline__ __m128d __DEFAULT_FN_ATTRS128
1240_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1241{
1242 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1243 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1244 (__v2df) __B,
1245 (__v2df) __C),
1246 (__v2df)_mm_setzero_pd());
1247}
1248
1249static __inline__ __m128d __DEFAULT_FN_ATTRS128
1250_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1251{
1252 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1253 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1254 (__v2df) __B,
1255 -(__v2df) __C),
1256 (__v2df) __A);
1257}
1258
1259static __inline__ __m128d __DEFAULT_FN_ATTRS128
1260_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1261{
1262 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1263 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1264 (__v2df) __B,
1265 -(__v2df) __C),
1266 (__v2df)_mm_setzero_pd());
1267}
1268
1269static __inline__ __m256d __DEFAULT_FN_ATTRS256
1270_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1271{
1272 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1273 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1274 (__v4df) __B,
1275 (__v4df) __C),
1276 (__v4df) __A);
1277}
1278
1279static __inline__ __m256d __DEFAULT_FN_ATTRS256
1280_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1281{
1282 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1283 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1284 (__v4df) __B,
1285 (__v4df) __C),
1286 (__v4df) __C);
1287}
1288
1289static __inline__ __m256d __DEFAULT_FN_ATTRS256
1290_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1291{
1292 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1293 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1294 (__v4df) __B,
1295 (__v4df) __C),
1296 (__v4df)_mm256_setzero_pd());
1297}
1298
1299static __inline__ __m256d __DEFAULT_FN_ATTRS256
1300_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1301{
1302 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1303 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1304 (__v4df) __B,
1305 -(__v4df) __C),
1306 (__v4df) __A);
1307}
1308
1309static __inline__ __m256d __DEFAULT_FN_ATTRS256
1310_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1311{
1312 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1313 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1314 (__v4df) __B,
1315 -(__v4df) __C),
1316 (__v4df)_mm256_setzero_pd());
1317}
1318
1319static __inline__ __m128 __DEFAULT_FN_ATTRS128
1320_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1321{
1322 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1323 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1324 (__v4sf) __B,
1325 (__v4sf) __C),
1326 (__v4sf) __A);
1327}
1328
1329static __inline__ __m128 __DEFAULT_FN_ATTRS128
1330_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1331{
1332 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1333 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1334 (__v4sf) __B,
1335 (__v4sf) __C),
1336 (__v4sf) __C);
1337}
1338
1339static __inline__ __m128 __DEFAULT_FN_ATTRS128
1340_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1341{
1342 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1343 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1344 (__v4sf) __B,
1345 (__v4sf) __C),
1346 (__v4sf)_mm_setzero_ps());
1347}
1348
1349static __inline__ __m128 __DEFAULT_FN_ATTRS128
1350_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1351{
1352 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1353 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1354 (__v4sf) __B,
1355 -(__v4sf) __C),
1356 (__v4sf) __A);
1357}
1358
1359static __inline__ __m128 __DEFAULT_FN_ATTRS128
1360_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1361{
1362 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1363 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1364 (__v4sf) __B,
1365 -(__v4sf) __C),
1366 (__v4sf)_mm_setzero_ps());
1367}
1368
1369static __inline__ __m256 __DEFAULT_FN_ATTRS256
1370_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
1371 __m256 __C)
1372{
1373 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1374 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1375 (__v8sf) __B,
1376 (__v8sf) __C),
1377 (__v8sf) __A);
1378}
1379
1380static __inline__ __m256 __DEFAULT_FN_ATTRS256
1381_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1382{
1383 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1384 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1385 (__v8sf) __B,
1386 (__v8sf) __C),
1387 (__v8sf) __C);
1388}
1389
1390static __inline__ __m256 __DEFAULT_FN_ATTRS256
1391_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1392{
1393 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1394 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1395 (__v8sf) __B,
1396 (__v8sf) __C),
1397 (__v8sf)_mm256_setzero_ps());
1398}
1399
1400static __inline__ __m256 __DEFAULT_FN_ATTRS256
1401_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1402{
1403 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1404 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1405 (__v8sf) __B,
1406 -(__v8sf) __C),
1407 (__v8sf) __A);
1408}
1409
1410static __inline__ __m256 __DEFAULT_FN_ATTRS256
1411_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1412{
1413 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1414 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1415 (__v8sf) __B,
1416 -(__v8sf) __C),
1417 (__v8sf)_mm256_setzero_ps());
1418}
1419
1420static __inline__ __m128d __DEFAULT_FN_ATTRS128
1421_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1422{
1423 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1424 __builtin_ia32_vfmaddpd ((__v2df) __A,
1425 (__v2df) __B,
1426 -(__v2df) __C),
1427 (__v2df) __C);
1428}
1429
1430static __inline__ __m256d __DEFAULT_FN_ATTRS256
1431_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1432{
1433 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1434 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1435 (__v4df) __B,
1436 -(__v4df) __C),
1437 (__v4df) __C);
1438}
1439
1440static __inline__ __m128 __DEFAULT_FN_ATTRS128
1441_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1442{
1443 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1444 __builtin_ia32_vfmaddps ((__v4sf) __A,
1445 (__v4sf) __B,
1446 -(__v4sf) __C),
1447 (__v4sf) __C);
1448}
1449
1450static __inline__ __m256 __DEFAULT_FN_ATTRS256
1451_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1452{
1453 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1454 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1455 (__v8sf) __B,
1456 -(__v8sf) __C),
1457 (__v8sf) __C);
1458}
1459
1460static __inline__ __m128d __DEFAULT_FN_ATTRS128
1461_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1462{
1463 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1464 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1465 (__v2df) __B,
1466 -(__v2df) __C),
1467 (__v2df) __C);
1468}
1469
1470static __inline__ __m256d __DEFAULT_FN_ATTRS256
1471_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1472{
1473 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1474 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1475 (__v4df) __B,
1476 -(__v4df) __C),
1477 (__v4df) __C);
1478}
1479
1480static __inline__ __m128 __DEFAULT_FN_ATTRS128
1481_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1482{
1483 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1484 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1485 (__v4sf) __B,
1486 -(__v4sf) __C),
1487 (__v4sf) __C);
1488}
1489
1490static __inline__ __m256 __DEFAULT_FN_ATTRS256
1491_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1492{
1493 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1494 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1495 (__v8sf) __B,
1496 -(__v8sf) __C),
1497 (__v8sf) __C);
1498}
1499
1500static __inline__ __m128d __DEFAULT_FN_ATTRS128
1501_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1502{
1503 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1504 __builtin_ia32_vfmaddpd ((__v2df) __A,
1505 -(__v2df) __B,
1506 (__v2df) __C),
1507 (__v2df) __A);
1508}
1509
1510static __inline__ __m256d __DEFAULT_FN_ATTRS256
1511_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1512{
1513 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1514 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1515 -(__v4df) __B,
1516 (__v4df) __C),
1517 (__v4df) __A);
1518}
1519
1520static __inline__ __m128 __DEFAULT_FN_ATTRS128
1521_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1522{
1523 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1524 __builtin_ia32_vfmaddps ((__v4sf) __A,
1525 -(__v4sf) __B,
1526 (__v4sf) __C),
1527 (__v4sf) __A);
1528}
1529
1530static __inline__ __m256 __DEFAULT_FN_ATTRS256
1531_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1532{
1533 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1534 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1535 -(__v8sf) __B,
1536 (__v8sf) __C),
1537 (__v8sf) __A);
1538}
1539
1540static __inline__ __m128d __DEFAULT_FN_ATTRS128
1541_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1542{
1543 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1544 __builtin_ia32_vfmaddpd ((__v2df) __A,
1545 -(__v2df) __B,
1546 -(__v2df) __C),
1547 (__v2df) __A);
1548}
1549
1550static __inline__ __m128d __DEFAULT_FN_ATTRS128
1551_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1552{
1553 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1554 __builtin_ia32_vfmaddpd ((__v2df) __A,
1555 -(__v2df) __B,
1556 -(__v2df) __C),
1557 (__v2df) __C);
1558}
1559
1560static __inline__ __m256d __DEFAULT_FN_ATTRS256
1561_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1562{
1563 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1564 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1565 -(__v4df) __B,
1566 -(__v4df) __C),
1567 (__v4df) __A);
1568}
1569
1570static __inline__ __m256d __DEFAULT_FN_ATTRS256
1571_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1572{
1573 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1574 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1575 -(__v4df) __B,
1576 -(__v4df) __C),
1577 (__v4df) __C);
1578}
1579
1580static __inline__ __m128 __DEFAULT_FN_ATTRS128
1581_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1582{
1583 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1584 __builtin_ia32_vfmaddps ((__v4sf) __A,
1585 -(__v4sf) __B,
1586 -(__v4sf) __C),
1587 (__v4sf) __A);
1588}
1589
1590static __inline__ __m128 __DEFAULT_FN_ATTRS128
1591_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1592{
1593 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1594 __builtin_ia32_vfmaddps ((__v4sf) __A,
1595 -(__v4sf) __B,
1596 -(__v4sf) __C),
1597 (__v4sf) __C);
1598}
1599
1600static __inline__ __m256 __DEFAULT_FN_ATTRS256
1601_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1602{
1603 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1604 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1605 -(__v8sf) __B,
1606 -(__v8sf) __C),
1607 (__v8sf) __A);
1608}
1609
1610static __inline__ __m256 __DEFAULT_FN_ATTRS256
1611_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1612{
1613 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1614 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1615 -(__v8sf) __B,
1616 -(__v8sf) __C),
1617 (__v8sf) __C);
1618}
1619
1620static __inline__ __m128d __DEFAULT_FN_ATTRS128
1621_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1622 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1623 (__v2df)_mm_add_pd(__A, __B),
1624 (__v2df)__W);
1625}
1626
1627static __inline__ __m128d __DEFAULT_FN_ATTRS128
1628_mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
1629 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1630 (__v2df)_mm_add_pd(__A, __B),
1631 (__v2df)_mm_setzero_pd());
1632}
1633
1634static __inline__ __m256d __DEFAULT_FN_ATTRS256
1635_mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
1636 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1637 (__v4df)_mm256_add_pd(__A, __B),
1638 (__v4df)__W);
1639}
1640
1641static __inline__ __m256d __DEFAULT_FN_ATTRS256
1642_mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
1643 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1644 (__v4df)_mm256_add_pd(__A, __B),
1645 (__v4df)_mm256_setzero_pd());
1646}
1647
1648static __inline__ __m128 __DEFAULT_FN_ATTRS128
1649_mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1650 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1651 (__v4sf)_mm_add_ps(__A, __B),
1652 (__v4sf)__W);
1653}
1654
1655static __inline__ __m128 __DEFAULT_FN_ATTRS128
1656_mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
1657 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1658 (__v4sf)_mm_add_ps(__A, __B),
1659 (__v4sf)_mm_setzero_ps());
1660}
1661
1662static __inline__ __m256 __DEFAULT_FN_ATTRS256
1663_mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
1664 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1665 (__v8sf)_mm256_add_ps(__A, __B),
1666 (__v8sf)__W);
1667}
1668
1669static __inline__ __m256 __DEFAULT_FN_ATTRS256
1670_mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
1671 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1672 (__v8sf)_mm256_add_ps(__A, __B),
1673 (__v8sf)_mm256_setzero_ps());
1674}
1675
1676static __inline__ __m128i __DEFAULT_FN_ATTRS128
1677_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
1678 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
1679 (__v4si) __W,
1680 (__v4si) __A);
1681}
1682
1683static __inline__ __m256i __DEFAULT_FN_ATTRS256
1684_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
1685 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
1686 (__v8si) __W,
1687 (__v8si) __A);
1688}
1689
1690static __inline__ __m128d __DEFAULT_FN_ATTRS128
1691_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
1692 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
1693 (__v2df) __W,
1694 (__v2df) __A);
1695}
1696
1697static __inline__ __m256d __DEFAULT_FN_ATTRS256
1698_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
1699 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
1700 (__v4df) __W,
1701 (__v4df) __A);
1702}
1703
1704static __inline__ __m128 __DEFAULT_FN_ATTRS128
1705_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
1706 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
1707 (__v4sf) __W,
1708 (__v4sf) __A);
1709}
1710
1711static __inline__ __m256 __DEFAULT_FN_ATTRS256
1712_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
1713 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
1714 (__v8sf) __W,
1715 (__v8sf) __A);
1716}
1717
1718static __inline__ __m128i __DEFAULT_FN_ATTRS128
1719_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
1720 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
1721 (__v2di) __W,
1722 (__v2di) __A);
1723}
1724
1725static __inline__ __m256i __DEFAULT_FN_ATTRS256
1726_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
1727 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
1728 (__v4di) __W,
1729 (__v4di) __A);
1730}
1731
1732static __inline__ __m128d __DEFAULT_FN_ATTRS128
1733_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
1734 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1735 (__v2df) __W,
1736 (__mmask8) __U);
1737}
1738
1739static __inline__ __m128d __DEFAULT_FN_ATTRS128
1741 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1742 (__v2df)
1743 _mm_setzero_pd (),
1744 (__mmask8) __U);
1745}
1746
1747static __inline__ __m256d __DEFAULT_FN_ATTRS256
1748_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
1749 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1750 (__v4df) __W,
1751 (__mmask8) __U);
1752}
1753
1754static __inline__ __m256d __DEFAULT_FN_ATTRS256
1756 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1757 (__v4df)
1759 (__mmask8) __U);
1760}
1761
1762static __inline__ __m128i __DEFAULT_FN_ATTRS128
1763_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
1764 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1765 (__v2di) __W,
1766 (__mmask8) __U);
1767}
1768
1769static __inline__ __m128i __DEFAULT_FN_ATTRS128
1771 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1772 (__v2di)
1774 (__mmask8) __U);
1775}
1776
1777static __inline__ __m256i __DEFAULT_FN_ATTRS256
1778_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
1779 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1780 (__v4di) __W,
1781 (__mmask8) __U);
1782}
1783
1784static __inline__ __m256i __DEFAULT_FN_ATTRS256
1786 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1787 (__v4di)
1789 (__mmask8) __U);
1790}
1791
1792static __inline__ __m128 __DEFAULT_FN_ATTRS128
1793_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
1794 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1795 (__v4sf) __W,
1796 (__mmask8) __U);
1797}
1798
1799static __inline__ __m128 __DEFAULT_FN_ATTRS128
1801 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1802 (__v4sf)
1803 _mm_setzero_ps (),
1804 (__mmask8) __U);
1805}
1806
1807static __inline__ __m256 __DEFAULT_FN_ATTRS256
1808_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
1809 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1810 (__v8sf) __W,
1811 (__mmask8) __U);
1812}
1813
1814static __inline__ __m256 __DEFAULT_FN_ATTRS256
1816 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1817 (__v8sf)
1819 (__mmask8) __U);
1820}
1821
1822static __inline__ __m128i __DEFAULT_FN_ATTRS128
1823_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
1824 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1825 (__v4si) __W,
1826 (__mmask8) __U);
1827}
1828
1829static __inline__ __m128i __DEFAULT_FN_ATTRS128
1831 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1832 (__v4si)
1834 (__mmask8) __U);
1835}
1836
1837static __inline__ __m256i __DEFAULT_FN_ATTRS256
1838_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
1839 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1840 (__v8si) __W,
1841 (__mmask8) __U);
1842}
1843
1844static __inline__ __m256i __DEFAULT_FN_ATTRS256
1846 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1847 (__v8si)
1849 (__mmask8) __U);
1850}
1851
1852static __inline__ void __DEFAULT_FN_ATTRS128
1853_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
1854 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
1855 (__v2df) __A,
1856 (__mmask8) __U);
1857}
1858
1859static __inline__ void __DEFAULT_FN_ATTRS256
1860_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
1861 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
1862 (__v4df) __A,
1863 (__mmask8) __U);
1864}
1865
1866static __inline__ void __DEFAULT_FN_ATTRS128
1867_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
1868 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
1869 (__v2di) __A,
1870 (__mmask8) __U);
1871}
1872
1873static __inline__ void __DEFAULT_FN_ATTRS256
1875 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
1876 (__v4di) __A,
1877 (__mmask8) __U);
1878}
1879
1880static __inline__ void __DEFAULT_FN_ATTRS128
1881_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
1882 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
1883 (__v4sf) __A,
1884 (__mmask8) __U);
1885}
1886
1887static __inline__ void __DEFAULT_FN_ATTRS256
1889 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
1890 (__v8sf) __A,
1891 (__mmask8) __U);
1892}
1893
1894static __inline__ void __DEFAULT_FN_ATTRS128
1895_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
1896 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
1897 (__v4si) __A,
1898 (__mmask8) __U);
1899}
1900
1901static __inline__ void __DEFAULT_FN_ATTRS256
1903 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
1904 (__v8si) __A,
1905 (__mmask8) __U);
1906}
1907
1908static __inline__ __m128d __DEFAULT_FN_ATTRS128
1909_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
1910 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1911 (__v2df)_mm_cvtepi32_pd(__A),
1912 (__v2df)__W);
1913}
1914
1915static __inline__ __m128d __DEFAULT_FN_ATTRS128
1917 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1918 (__v2df)_mm_cvtepi32_pd(__A),
1919 (__v2df)_mm_setzero_pd());
1920}
1921
1922static __inline__ __m256d __DEFAULT_FN_ATTRS256
1923_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
1924 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1925 (__v4df)_mm256_cvtepi32_pd(__A),
1926 (__v4df)__W);
1927}
1928
1929static __inline__ __m256d __DEFAULT_FN_ATTRS256
1931 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1932 (__v4df)_mm256_cvtepi32_pd(__A),
1933 (__v4df)_mm256_setzero_pd());
1934}
1935
1936static __inline__ __m128 __DEFAULT_FN_ATTRS128
1937_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
1938 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1939 (__v4sf)_mm_cvtepi32_ps(__A),
1940 (__v4sf)__W);
1941}
1942
1943static __inline__ __m128 __DEFAULT_FN_ATTRS128
1945 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1946 (__v4sf)_mm_cvtepi32_ps(__A),
1947 (__v4sf)_mm_setzero_ps());
1948}
1949
1950static __inline__ __m256 __DEFAULT_FN_ATTRS256
1951_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
1952 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1953 (__v8sf)_mm256_cvtepi32_ps(__A),
1954 (__v8sf)__W);
1955}
1956
1957static __inline__ __m256 __DEFAULT_FN_ATTRS256
1959 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1960 (__v8sf)_mm256_cvtepi32_ps(__A),
1961 (__v8sf)_mm256_setzero_ps());
1962}
1963
1964static __inline__ __m128i __DEFAULT_FN_ATTRS128
1965_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
1966 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1967 (__v4si) __W,
1968 (__mmask8) __U);
1969}
1970
1971static __inline__ __m128i __DEFAULT_FN_ATTRS128
1973 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1974 (__v4si)
1976 (__mmask8) __U);
1977}
1978
1979static __inline__ __m128i __DEFAULT_FN_ATTRS256
1980_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
1981 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1982 (__v4si)_mm256_cvtpd_epi32(__A),
1983 (__v4si)__W);
1984}
1985
1986static __inline__ __m128i __DEFAULT_FN_ATTRS256
1988 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1989 (__v4si)_mm256_cvtpd_epi32(__A),
1990 (__v4si)_mm_setzero_si128());
1991}
1992
1993static __inline__ __m128 __DEFAULT_FN_ATTRS128
1994_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
1995 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1996 (__v4sf) __W,
1997 (__mmask8) __U);
1998}
1999
2000static __inline__ __m128 __DEFAULT_FN_ATTRS128
2001_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
2002 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2003 (__v4sf)
2004 _mm_setzero_ps (),
2005 (__mmask8) __U);
2006}
2007
2008static __inline__ __m128 __DEFAULT_FN_ATTRS256
2009_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
2010 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2011 (__v4sf)_mm256_cvtpd_ps(__A),
2012 (__v4sf)__W);
2013}
2014
2015static __inline__ __m128 __DEFAULT_FN_ATTRS256
2017 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2018 (__v4sf)_mm256_cvtpd_ps(__A),
2019 (__v4sf)_mm_setzero_ps());
2020}
2021
2022static __inline__ __m128i __DEFAULT_FN_ATTRS128
2023_mm_cvtpd_epu32 (__m128d __A) {
2024 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2025 (__v4si)
2027 (__mmask8) -1);
2028}
2029
2030static __inline__ __m128i __DEFAULT_FN_ATTRS128
2031_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2032 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2033 (__v4si) __W,
2034 (__mmask8) __U);
2035}
2036
2037static __inline__ __m128i __DEFAULT_FN_ATTRS128
2039 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2040 (__v4si)
2042 (__mmask8) __U);
2043}
2044
2045static __inline__ __m128i __DEFAULT_FN_ATTRS256
2046_mm256_cvtpd_epu32 (__m256d __A) {
2047 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2048 (__v4si)
2050 (__mmask8) -1);
2051}
2052
2053static __inline__ __m128i __DEFAULT_FN_ATTRS256
2054_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2055 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2056 (__v4si) __W,
2057 (__mmask8) __U);
2058}
2059
2060static __inline__ __m128i __DEFAULT_FN_ATTRS256
2062 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2063 (__v4si)
2065 (__mmask8) __U);
2066}
2067
2068static __inline__ __m128i __DEFAULT_FN_ATTRS128
2069_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2070 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2071 (__v4si)_mm_cvtps_epi32(__A),
2072 (__v4si)__W);
2073}
2074
2075static __inline__ __m128i __DEFAULT_FN_ATTRS128
2077 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2078 (__v4si)_mm_cvtps_epi32(__A),
2079 (__v4si)_mm_setzero_si128());
2080}
2081
2082static __inline__ __m256i __DEFAULT_FN_ATTRS256
2083_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2084 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2085 (__v8si)_mm256_cvtps_epi32(__A),
2086 (__v8si)__W);
2087}
2088
2089static __inline__ __m256i __DEFAULT_FN_ATTRS256
2091 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2092 (__v8si)_mm256_cvtps_epi32(__A),
2093 (__v8si)_mm256_setzero_si256());
2094}
2095
2096static __inline__ __m128d __DEFAULT_FN_ATTRS128
2097_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
2098 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2099 (__v2df)_mm_cvtps_pd(__A),
2100 (__v2df)__W);
2101}
2102
2103static __inline__ __m128d __DEFAULT_FN_ATTRS128
2104_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2105 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2106 (__v2df)_mm_cvtps_pd(__A),
2107 (__v2df)_mm_setzero_pd());
2108}
2109
2110static __inline__ __m256d __DEFAULT_FN_ATTRS256
2111_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
2112 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2113 (__v4df)_mm256_cvtps_pd(__A),
2114 (__v4df)__W);
2115}
2116
2117static __inline__ __m256d __DEFAULT_FN_ATTRS256
2119 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2120 (__v4df)_mm256_cvtps_pd(__A),
2121 (__v4df)_mm256_setzero_pd());
2122}
2123
2124static __inline__ __m128i __DEFAULT_FN_ATTRS128
2125_mm_cvtps_epu32 (__m128 __A) {
2126 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2127 (__v4si)
2129 (__mmask8) -1);
2130}
2131
2132static __inline__ __m128i __DEFAULT_FN_ATTRS128
2133_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2134 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2135 (__v4si) __W,
2136 (__mmask8) __U);
2137}
2138
2139static __inline__ __m128i __DEFAULT_FN_ATTRS128
2141 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2142 (__v4si)
2144 (__mmask8) __U);
2145}
2146
2147static __inline__ __m256i __DEFAULT_FN_ATTRS256
2149 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2150 (__v8si)
2152 (__mmask8) -1);
2153}
2154
2155static __inline__ __m256i __DEFAULT_FN_ATTRS256
2156_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2157 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2158 (__v8si) __W,
2159 (__mmask8) __U);
2160}
2161
2162static __inline__ __m256i __DEFAULT_FN_ATTRS256
2164 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2165 (__v8si)
2167 (__mmask8) __U);
2168}
2169
2170static __inline__ __m128i __DEFAULT_FN_ATTRS128
2171_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
2172 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2173 (__v4si) __W,
2174 (__mmask8) __U);
2175}
2176
2177static __inline__ __m128i __DEFAULT_FN_ATTRS128
2179 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2180 (__v4si)
2182 (__mmask8) __U);
2183}
2184
2185static __inline__ __m128i __DEFAULT_FN_ATTRS256
2186_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
2187 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2188 (__v4si)_mm256_cvttpd_epi32(__A),
2189 (__v4si)__W);
2190}
2191
2192static __inline__ __m128i __DEFAULT_FN_ATTRS256
2194 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2195 (__v4si)_mm256_cvttpd_epi32(__A),
2196 (__v4si)_mm_setzero_si128());
2197}
2198
2199static __inline__ __m128i __DEFAULT_FN_ATTRS128
2200_mm_cvttpd_epu32 (__m128d __A) {
2201 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2202 (__v4si)
2204 (__mmask8) -1);
2205}
2206
2207static __inline__ __m128i __DEFAULT_FN_ATTRS128
2208_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2209 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2210 (__v4si) __W,
2211 (__mmask8) __U);
2212}
2213
2214static __inline__ __m128i __DEFAULT_FN_ATTRS128
2216 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2217 (__v4si)
2219 (__mmask8) __U);
2220}
2221
2222static __inline__ __m128i __DEFAULT_FN_ATTRS256
2223_mm256_cvttpd_epu32 (__m256d __A) {
2224 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2225 (__v4si)
2227 (__mmask8) -1);
2228}
2229
2230static __inline__ __m128i __DEFAULT_FN_ATTRS256
2231_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2232 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2233 (__v4si) __W,
2234 (__mmask8) __U);
2235}
2236
2237static __inline__ __m128i __DEFAULT_FN_ATTRS256
2239 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2240 (__v4si)
2242 (__mmask8) __U);
2243}
2244
2245static __inline__ __m128i __DEFAULT_FN_ATTRS128
2246_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2247 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2248 (__v4si)_mm_cvttps_epi32(__A),
2249 (__v4si)__W);
2250}
2251
2252static __inline__ __m128i __DEFAULT_FN_ATTRS128
2254 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2255 (__v4si)_mm_cvttps_epi32(__A),
2256 (__v4si)_mm_setzero_si128());
2257}
2258
2259static __inline__ __m256i __DEFAULT_FN_ATTRS256
2260_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2261 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2262 (__v8si)_mm256_cvttps_epi32(__A),
2263 (__v8si)__W);
2264}
2265
2266static __inline__ __m256i __DEFAULT_FN_ATTRS256
2268 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2269 (__v8si)_mm256_cvttps_epi32(__A),
2270 (__v8si)_mm256_setzero_si256());
2271}
2272
2273static __inline__ __m128i __DEFAULT_FN_ATTRS128
2274_mm_cvttps_epu32 (__m128 __A) {
2275 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2276 (__v4si)
2278 (__mmask8) -1);
2279}
2280
2281static __inline__ __m128i __DEFAULT_FN_ATTRS128
2282_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2283 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2284 (__v4si) __W,
2285 (__mmask8) __U);
2286}
2287
2288static __inline__ __m128i __DEFAULT_FN_ATTRS128
2290 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2291 (__v4si)
2293 (__mmask8) __U);
2294}
2295
2296static __inline__ __m256i __DEFAULT_FN_ATTRS256
2298 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2299 (__v8si)
2301 (__mmask8) -1);
2302}
2303
2304static __inline__ __m256i __DEFAULT_FN_ATTRS256
2305_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2306 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2307 (__v8si) __W,
2308 (__mmask8) __U);
2309}
2310
2311static __inline__ __m256i __DEFAULT_FN_ATTRS256
2313 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2314 (__v8si)
2316 (__mmask8) __U);
2317}
2318
2319static __inline__ __m128d __DEFAULT_FN_ATTRS128
2320_mm_cvtepu32_pd (__m128i __A) {
2321 return (__m128d) __builtin_convertvector(
2322 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2323}
2324
2325static __inline__ __m128d __DEFAULT_FN_ATTRS128
2326_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
2327 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2328 (__v2df)_mm_cvtepu32_pd(__A),
2329 (__v2df)__W);
2330}
2331
2332static __inline__ __m128d __DEFAULT_FN_ATTRS128
2334 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2335 (__v2df)_mm_cvtepu32_pd(__A),
2336 (__v2df)_mm_setzero_pd());
2337}
2338
2339static __inline__ __m256d __DEFAULT_FN_ATTRS256
2340_mm256_cvtepu32_pd (__m128i __A) {
2341 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2342}
2343
2344static __inline__ __m256d __DEFAULT_FN_ATTRS256
2345_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
2346 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2347 (__v4df)_mm256_cvtepu32_pd(__A),
2348 (__v4df)__W);
2349}
2350
2351static __inline__ __m256d __DEFAULT_FN_ATTRS256
2353 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2354 (__v4df)_mm256_cvtepu32_pd(__A),
2355 (__v4df)_mm256_setzero_pd());
2356}
2357
2358static __inline__ __m128 __DEFAULT_FN_ATTRS128
2359_mm_cvtepu32_ps (__m128i __A) {
2360 return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
2361}
2362
2363static __inline__ __m128 __DEFAULT_FN_ATTRS128
2364_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
2365 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2366 (__v4sf)_mm_cvtepu32_ps(__A),
2367 (__v4sf)__W);
2368}
2369
2370static __inline__ __m128 __DEFAULT_FN_ATTRS128
2372 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2373 (__v4sf)_mm_cvtepu32_ps(__A),
2374 (__v4sf)_mm_setzero_ps());
2375}
2376
2377static __inline__ __m256 __DEFAULT_FN_ATTRS256
2378_mm256_cvtepu32_ps (__m256i __A) {
2379 return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
2380}
2381
2382static __inline__ __m256 __DEFAULT_FN_ATTRS256
2383_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
2384 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2385 (__v8sf)_mm256_cvtepu32_ps(__A),
2386 (__v8sf)__W);
2387}
2388
2389static __inline__ __m256 __DEFAULT_FN_ATTRS256
2391 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2392 (__v8sf)_mm256_cvtepu32_ps(__A),
2393 (__v8sf)_mm256_setzero_ps());
2394}
2395
2396static __inline__ __m128d __DEFAULT_FN_ATTRS128
2397_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2398 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2399 (__v2df)_mm_div_pd(__A, __B),
2400 (__v2df)__W);
2401}
2402
2403static __inline__ __m128d __DEFAULT_FN_ATTRS128
2404_mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2405 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2406 (__v2df)_mm_div_pd(__A, __B),
2407 (__v2df)_mm_setzero_pd());
2408}
2409
2410static __inline__ __m256d __DEFAULT_FN_ATTRS256
2411_mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2412 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2413 (__v4df)_mm256_div_pd(__A, __B),
2414 (__v4df)__W);
2415}
2416
2417static __inline__ __m256d __DEFAULT_FN_ATTRS256
2418_mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2419 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2420 (__v4df)_mm256_div_pd(__A, __B),
2421 (__v4df)_mm256_setzero_pd());
2422}
2423
2424static __inline__ __m128 __DEFAULT_FN_ATTRS128
2425_mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2426 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2427 (__v4sf)_mm_div_ps(__A, __B),
2428 (__v4sf)__W);
2429}
2430
2431static __inline__ __m128 __DEFAULT_FN_ATTRS128
2432_mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2433 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2434 (__v4sf)_mm_div_ps(__A, __B),
2435 (__v4sf)_mm_setzero_ps());
2436}
2437
2438static __inline__ __m256 __DEFAULT_FN_ATTRS256
2439_mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2440 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2441 (__v8sf)_mm256_div_ps(__A, __B),
2442 (__v8sf)__W);
2443}
2444
2445static __inline__ __m256 __DEFAULT_FN_ATTRS256
2446_mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2447 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2448 (__v8sf)_mm256_div_ps(__A, __B),
2449 (__v8sf)_mm256_setzero_ps());
2450}
2451
2452static __inline__ __m128d __DEFAULT_FN_ATTRS128
2453_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2454 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2455 (__v2df) __W,
2456 (__mmask8) __U);
2457}
2458
2459static __inline__ __m128d __DEFAULT_FN_ATTRS128
2460_mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
2461 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2462 (__v2df)
2463 _mm_setzero_pd (),
2464 (__mmask8) __U);
2465}
2466
2467static __inline__ __m256d __DEFAULT_FN_ATTRS256
2468_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2469 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2470 (__v4df) __W,
2471 (__mmask8) __U);
2472}
2473
2474static __inline__ __m256d __DEFAULT_FN_ATTRS256
2476 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2477 (__v4df)
2479 (__mmask8) __U);
2480}
2481
2482static __inline__ __m128i __DEFAULT_FN_ATTRS128
2483_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2484 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2485 (__v2di) __W,
2486 (__mmask8) __U);
2487}
2488
2489static __inline__ __m128i __DEFAULT_FN_ATTRS128
2491 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2492 (__v2di)
2494 (__mmask8) __U);
2495}
2496
2497static __inline__ __m256i __DEFAULT_FN_ATTRS256
2498_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2499 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2500 (__v4di) __W,
2501 (__mmask8) __U);
2502}
2503
2504static __inline__ __m256i __DEFAULT_FN_ATTRS256
2506 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2507 (__v4di)
2509 (__mmask8) __U);
2510}
2511
2512static __inline__ __m128d __DEFAULT_FN_ATTRS128
2513_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
2514 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
2515 (__v2df) __W,
2516 (__mmask8)
2517 __U);
2518}
2519
2520static __inline__ __m128d __DEFAULT_FN_ATTRS128
2522 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
2523 (__v2df)
2524 _mm_setzero_pd (),
2525 (__mmask8)
2526 __U);
2527}
2528
2529static __inline__ __m256d __DEFAULT_FN_ATTRS256
2530_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
2531 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
2532 (__v4df) __W,
2533 (__mmask8)
2534 __U);
2535}
2536
2537static __inline__ __m256d __DEFAULT_FN_ATTRS256
2539 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
2540 (__v4df)
2542 (__mmask8)
2543 __U);
2544}
2545
2546static __inline__ __m128i __DEFAULT_FN_ATTRS128
2547_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
2548 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
2549 (__v2di) __W,
2550 (__mmask8)
2551 __U);
2552}
2553
2554static __inline__ __m128i __DEFAULT_FN_ATTRS128
2556 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
2557 (__v2di)
2559 (__mmask8)
2560 __U);
2561}
2562
2563static __inline__ __m256i __DEFAULT_FN_ATTRS256
2565 void const *__P) {
2566 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
2567 (__v4di) __W,
2568 (__mmask8)
2569 __U);
2570}
2571
2572static __inline__ __m256i __DEFAULT_FN_ATTRS256
2574 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
2575 (__v4di)
2577 (__mmask8)
2578 __U);
2579}
2580
2581static __inline__ __m128 __DEFAULT_FN_ATTRS128
2582_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
2583 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
2584 (__v4sf) __W,
2585 (__mmask8) __U);
2586}
2587
2588static __inline__ __m128 __DEFAULT_FN_ATTRS128
2590 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
2591 (__v4sf)
2592 _mm_setzero_ps (),
2593 (__mmask8)
2594 __U);
2595}
2596
2597static __inline__ __m256 __DEFAULT_FN_ATTRS256
2598_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
2599 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
2600 (__v8sf) __W,
2601 (__mmask8) __U);
2602}
2603
2604static __inline__ __m256 __DEFAULT_FN_ATTRS256
2606 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
2607 (__v8sf)
2609 (__mmask8)
2610 __U);
2611}
2612
2613static __inline__ __m128i __DEFAULT_FN_ATTRS128
2614_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
2615 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
2616 (__v4si) __W,
2617 (__mmask8)
2618 __U);
2619}
2620
2621static __inline__ __m128i __DEFAULT_FN_ATTRS128
2623 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
2624 (__v4si)
2626 (__mmask8) __U);
2627}
2628
2629static __inline__ __m256i __DEFAULT_FN_ATTRS256
2631 void const *__P) {
2632 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
2633 (__v8si) __W,
2634 (__mmask8)
2635 __U);
2636}
2637
2638static __inline__ __m256i __DEFAULT_FN_ATTRS256
2640 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
2641 (__v8si)
2643 (__mmask8)
2644 __U);
2645}
2646
2647static __inline__ __m128 __DEFAULT_FN_ATTRS128
2648_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2649 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2650 (__v4sf) __W,
2651 (__mmask8) __U);
2652}
2653
2654static __inline__ __m128 __DEFAULT_FN_ATTRS128
2656 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2657 (__v4sf)
2658 _mm_setzero_ps (),
2659 (__mmask8) __U);
2660}
2661
2662static __inline__ __m256 __DEFAULT_FN_ATTRS256
2663_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2664 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2665 (__v8sf) __W,
2666 (__mmask8) __U);
2667}
2668
2669static __inline__ __m256 __DEFAULT_FN_ATTRS256
2671 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2672 (__v8sf)
2674 (__mmask8) __U);
2675}
2676
2677static __inline__ __m128i __DEFAULT_FN_ATTRS128
2678_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2679 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2680 (__v4si) __W,
2681 (__mmask8) __U);
2682}
2683
2684static __inline__ __m128i __DEFAULT_FN_ATTRS128
2686 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2687 (__v4si)
2689 (__mmask8) __U);
2690}
2691
2692static __inline__ __m256i __DEFAULT_FN_ATTRS256
2693_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2694 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2695 (__v8si) __W,
2696 (__mmask8) __U);
2697}
2698
2699static __inline__ __m256i __DEFAULT_FN_ATTRS256
2701 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2702 (__v8si)
2704 (__mmask8) __U);
2705}
2706
2707static __inline__ __m128d __DEFAULT_FN_ATTRS128
2708_mm_getexp_pd (__m128d __A) {
2709 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2710 (__v2df)
2711 _mm_setzero_pd (),
2712 (__mmask8) -1);
2713}
2714
2715static __inline__ __m128d __DEFAULT_FN_ATTRS128
2716_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2717 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2718 (__v2df) __W,
2719 (__mmask8) __U);
2720}
2721
2722static __inline__ __m128d __DEFAULT_FN_ATTRS128
2723_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
2724 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2725 (__v2df)
2726 _mm_setzero_pd (),
2727 (__mmask8) __U);
2728}
2729
2730static __inline__ __m256d __DEFAULT_FN_ATTRS256
2731_mm256_getexp_pd (__m256d __A) {
2732 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2733 (__v4df)
2735 (__mmask8) -1);
2736}
2737
2738static __inline__ __m256d __DEFAULT_FN_ATTRS256
2739_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2740 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2741 (__v4df) __W,
2742 (__mmask8) __U);
2743}
2744
2745static __inline__ __m256d __DEFAULT_FN_ATTRS256
2747 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2748 (__v4df)
2750 (__mmask8) __U);
2751}
2752
2753static __inline__ __m128 __DEFAULT_FN_ATTRS128
2754_mm_getexp_ps (__m128 __A) {
2755 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2756 (__v4sf)
2757 _mm_setzero_ps (),
2758 (__mmask8) -1);
2759}
2760
2761static __inline__ __m128 __DEFAULT_FN_ATTRS128
2762_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2763 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2764 (__v4sf) __W,
2765 (__mmask8) __U);
2766}
2767
2768static __inline__ __m128 __DEFAULT_FN_ATTRS128
2770 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2771 (__v4sf)
2772 _mm_setzero_ps (),
2773 (__mmask8) __U);
2774}
2775
2776static __inline__ __m256 __DEFAULT_FN_ATTRS256
2777_mm256_getexp_ps (__m256 __A) {
2778 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2779 (__v8sf)
2781 (__mmask8) -1);
2782}
2783
2784static __inline__ __m256 __DEFAULT_FN_ATTRS256
2785_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2786 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2787 (__v8sf) __W,
2788 (__mmask8) __U);
2789}
2790
2791static __inline__ __m256 __DEFAULT_FN_ATTRS256
2793 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2794 (__v8sf)
2796 (__mmask8) __U);
2797}
2798
2799static __inline__ __m128d __DEFAULT_FN_ATTRS128
2800_mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2801 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2802 (__v2df)_mm_max_pd(__A, __B),
2803 (__v2df)__W);
2804}
2805
2806static __inline__ __m128d __DEFAULT_FN_ATTRS128
2807_mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2808 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2809 (__v2df)_mm_max_pd(__A, __B),
2810 (__v2df)_mm_setzero_pd());
2811}
2812
2813static __inline__ __m256d __DEFAULT_FN_ATTRS256
2814_mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2815 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2816 (__v4df)_mm256_max_pd(__A, __B),
2817 (__v4df)__W);
2818}
2819
2820static __inline__ __m256d __DEFAULT_FN_ATTRS256
2821_mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2822 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2823 (__v4df)_mm256_max_pd(__A, __B),
2824 (__v4df)_mm256_setzero_pd());
2825}
2826
2827static __inline__ __m128 __DEFAULT_FN_ATTRS128
2828_mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2829 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2830 (__v4sf)_mm_max_ps(__A, __B),
2831 (__v4sf)__W);
2832}
2833
2834static __inline__ __m128 __DEFAULT_FN_ATTRS128
2835_mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2836 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2837 (__v4sf)_mm_max_ps(__A, __B),
2838 (__v4sf)_mm_setzero_ps());
2839}
2840
2841static __inline__ __m256 __DEFAULT_FN_ATTRS256
2842_mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2843 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2844 (__v8sf)_mm256_max_ps(__A, __B),
2845 (__v8sf)__W);
2846}
2847
2848static __inline__ __m256 __DEFAULT_FN_ATTRS256
2849_mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2850 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2851 (__v8sf)_mm256_max_ps(__A, __B),
2852 (__v8sf)_mm256_setzero_ps());
2853}
2854
2855static __inline__ __m128d __DEFAULT_FN_ATTRS128
2856_mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2857 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2858 (__v2df)_mm_min_pd(__A, __B),
2859 (__v2df)__W);
2860}
2861
2862static __inline__ __m128d __DEFAULT_FN_ATTRS128
2863_mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2864 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2865 (__v2df)_mm_min_pd(__A, __B),
2866 (__v2df)_mm_setzero_pd());
2867}
2868
2869static __inline__ __m256d __DEFAULT_FN_ATTRS256
2870_mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2871 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2872 (__v4df)_mm256_min_pd(__A, __B),
2873 (__v4df)__W);
2874}
2875
2876static __inline__ __m256d __DEFAULT_FN_ATTRS256
2877_mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2878 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2879 (__v4df)_mm256_min_pd(__A, __B),
2880 (__v4df)_mm256_setzero_pd());
2881}
2882
2883static __inline__ __m128 __DEFAULT_FN_ATTRS128
2884_mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2885 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2886 (__v4sf)_mm_min_ps(__A, __B),
2887 (__v4sf)__W);
2888}
2889
2890static __inline__ __m128 __DEFAULT_FN_ATTRS128
2891_mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2892 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2893 (__v4sf)_mm_min_ps(__A, __B),
2894 (__v4sf)_mm_setzero_ps());
2895}
2896
2897static __inline__ __m256 __DEFAULT_FN_ATTRS256
2898_mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2899 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2900 (__v8sf)_mm256_min_ps(__A, __B),
2901 (__v8sf)__W);
2902}
2903
2904static __inline__ __m256 __DEFAULT_FN_ATTRS256
2905_mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2906 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2907 (__v8sf)_mm256_min_ps(__A, __B),
2908 (__v8sf)_mm256_setzero_ps());
2909}
2910
2911static __inline__ __m128d __DEFAULT_FN_ATTRS128
2912_mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2913 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2914 (__v2df)_mm_mul_pd(__A, __B),
2915 (__v2df)__W);
2916}
2917
2918static __inline__ __m128d __DEFAULT_FN_ATTRS128
2919_mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2920 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2921 (__v2df)_mm_mul_pd(__A, __B),
2922 (__v2df)_mm_setzero_pd());
2923}
2924
2925static __inline__ __m256d __DEFAULT_FN_ATTRS256
2926_mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2927 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2928 (__v4df)_mm256_mul_pd(__A, __B),
2929 (__v4df)__W);
2930}
2931
2932static __inline__ __m256d __DEFAULT_FN_ATTRS256
2933_mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2934 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2935 (__v4df)_mm256_mul_pd(__A, __B),
2936 (__v4df)_mm256_setzero_pd());
2937}
2938
2939static __inline__ __m128 __DEFAULT_FN_ATTRS128
2940_mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2941 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2942 (__v4sf)_mm_mul_ps(__A, __B),
2943 (__v4sf)__W);
2944}
2945
2946static __inline__ __m128 __DEFAULT_FN_ATTRS128
2947_mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2948 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2949 (__v4sf)_mm_mul_ps(__A, __B),
2950 (__v4sf)_mm_setzero_ps());
2951}
2952
2953static __inline__ __m256 __DEFAULT_FN_ATTRS256
2954_mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2955 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2956 (__v8sf)_mm256_mul_ps(__A, __B),
2957 (__v8sf)__W);
2958}
2959
2960static __inline__ __m256 __DEFAULT_FN_ATTRS256
2961_mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2962 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2963 (__v8sf)_mm256_mul_ps(__A, __B),
2964 (__v8sf)_mm256_setzero_ps());
2965}
2966
2967static __inline__ __m128i __DEFAULT_FN_ATTRS128
2968_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
2969 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2970 (__v4si)_mm_abs_epi32(__A),
2971 (__v4si)__W);
2972}
2973
2974static __inline__ __m128i __DEFAULT_FN_ATTRS128
2976 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2977 (__v4si)_mm_abs_epi32(__A),
2978 (__v4si)_mm_setzero_si128());
2979}
2980
2981static __inline__ __m256i __DEFAULT_FN_ATTRS256
2982_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
2983 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2984 (__v8si)_mm256_abs_epi32(__A),
2985 (__v8si)__W);
2986}
2987
2988static __inline__ __m256i __DEFAULT_FN_ATTRS256
2990 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2991 (__v8si)_mm256_abs_epi32(__A),
2992 (__v8si)_mm256_setzero_si256());
2993}
2994
2995static __inline__ __m128i __DEFAULT_FN_ATTRS128
2996_mm_abs_epi64 (__m128i __A) {
2997 return (__m128i)__builtin_elementwise_abs((__v2di)__A);
2998}
2999
3000static __inline__ __m128i __DEFAULT_FN_ATTRS128
3001_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
3002 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3003 (__v2di)_mm_abs_epi64(__A),
3004 (__v2di)__W);
3005}
3006
3007static __inline__ __m128i __DEFAULT_FN_ATTRS128
3008_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
3009 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3010 (__v2di)_mm_abs_epi64(__A),
3011 (__v2di)_mm_setzero_si128());
3012}
3013
3014static __inline__ __m256i __DEFAULT_FN_ATTRS256
3015_mm256_abs_epi64 (__m256i __A) {
3016 return (__m256i)__builtin_elementwise_abs((__v4di)__A);
3017}
3018
3019static __inline__ __m256i __DEFAULT_FN_ATTRS256
3020_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
3021 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3022 (__v4di)_mm256_abs_epi64(__A),
3023 (__v4di)__W);
3024}
3025
3026static __inline__ __m256i __DEFAULT_FN_ATTRS256
3028 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3029 (__v4di)_mm256_abs_epi64(__A),
3030 (__v4di)_mm256_setzero_si256());
3031}
3032
3033static __inline__ __m128i __DEFAULT_FN_ATTRS128
3034_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3035 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3036 (__v4si)_mm_max_epi32(__A, __B),
3037 (__v4si)_mm_setzero_si128());
3038}
3039
3040static __inline__ __m128i __DEFAULT_FN_ATTRS128
3041_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3042 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3043 (__v4si)_mm_max_epi32(__A, __B),
3044 (__v4si)__W);
3045}
3046
3047static __inline__ __m256i __DEFAULT_FN_ATTRS256
3048_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3049 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3050 (__v8si)_mm256_max_epi32(__A, __B),
3051 (__v8si)_mm256_setzero_si256());
3052}
3053
3054static __inline__ __m256i __DEFAULT_FN_ATTRS256
3055_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3056 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3057 (__v8si)_mm256_max_epi32(__A, __B),
3058 (__v8si)__W);
3059}
3060
3061static __inline__ __m128i __DEFAULT_FN_ATTRS128
3062_mm_max_epi64 (__m128i __A, __m128i __B) {
3063 return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B);
3064}
3065
3066static __inline__ __m128i __DEFAULT_FN_ATTRS128
3067_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3068 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3069 (__v2di)_mm_max_epi64(__A, __B),
3070 (__v2di)_mm_setzero_si128());
3071}
3072
3073static __inline__ __m128i __DEFAULT_FN_ATTRS128
3074_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3075 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3076 (__v2di)_mm_max_epi64(__A, __B),
3077 (__v2di)__W);
3078}
3079
3080static __inline__ __m256i __DEFAULT_FN_ATTRS256
3081_mm256_max_epi64 (__m256i __A, __m256i __B) {
3082 return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B);
3083}
3084
3085static __inline__ __m256i __DEFAULT_FN_ATTRS256
3086_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3087 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3088 (__v4di)_mm256_max_epi64(__A, __B),
3089 (__v4di)_mm256_setzero_si256());
3090}
3091
3092static __inline__ __m256i __DEFAULT_FN_ATTRS256
3093_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3094 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3095 (__v4di)_mm256_max_epi64(__A, __B),
3096 (__v4di)__W);
3097}
3098
3099static __inline__ __m128i __DEFAULT_FN_ATTRS128
3100_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3101 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3102 (__v4si)_mm_max_epu32(__A, __B),
3103 (__v4si)_mm_setzero_si128());
3104}
3105
3106static __inline__ __m128i __DEFAULT_FN_ATTRS128
3107_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3108 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3109 (__v4si)_mm_max_epu32(__A, __B),
3110 (__v4si)__W);
3111}
3112
3113static __inline__ __m256i __DEFAULT_FN_ATTRS256
3114_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3115 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3116 (__v8si)_mm256_max_epu32(__A, __B),
3117 (__v8si)_mm256_setzero_si256());
3118}
3119
3120static __inline__ __m256i __DEFAULT_FN_ATTRS256
3121_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3122 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3123 (__v8si)_mm256_max_epu32(__A, __B),
3124 (__v8si)__W);
3125}
3126
3127static __inline__ __m128i __DEFAULT_FN_ATTRS128
3128_mm_max_epu64 (__m128i __A, __m128i __B) {
3129 return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B);
3130}
3131
3132static __inline__ __m128i __DEFAULT_FN_ATTRS128
3133_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3134 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3135 (__v2di)_mm_max_epu64(__A, __B),
3136 (__v2di)_mm_setzero_si128());
3137}
3138
3139static __inline__ __m128i __DEFAULT_FN_ATTRS128
3140_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3141 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3142 (__v2di)_mm_max_epu64(__A, __B),
3143 (__v2di)__W);
3144}
3145
3146static __inline__ __m256i __DEFAULT_FN_ATTRS256
3147_mm256_max_epu64 (__m256i __A, __m256i __B) {
3148 return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B);
3149}
3150
3151static __inline__ __m256i __DEFAULT_FN_ATTRS256
3152_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3153 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3154 (__v4di)_mm256_max_epu64(__A, __B),
3155 (__v4di)_mm256_setzero_si256());
3156}
3157
3158static __inline__ __m256i __DEFAULT_FN_ATTRS256
3159_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3160 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3161 (__v4di)_mm256_max_epu64(__A, __B),
3162 (__v4di)__W);
3163}
3164
3165static __inline__ __m128i __DEFAULT_FN_ATTRS128
3166_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3167 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3168 (__v4si)_mm_min_epi32(__A, __B),
3169 (__v4si)_mm_setzero_si128());
3170}
3171
3172static __inline__ __m128i __DEFAULT_FN_ATTRS128
3173_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3174 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3175 (__v4si)_mm_min_epi32(__A, __B),
3176 (__v4si)__W);
3177}
3178
3179static __inline__ __m256i __DEFAULT_FN_ATTRS256
3180_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3181 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3182 (__v8si)_mm256_min_epi32(__A, __B),
3183 (__v8si)_mm256_setzero_si256());
3184}
3185
3186static __inline__ __m256i __DEFAULT_FN_ATTRS256
3187_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3188 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3189 (__v8si)_mm256_min_epi32(__A, __B),
3190 (__v8si)__W);
3191}
3192
3193static __inline__ __m128i __DEFAULT_FN_ATTRS128
3194_mm_min_epi64 (__m128i __A, __m128i __B) {
3195 return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B);
3196}
3197
3198static __inline__ __m128i __DEFAULT_FN_ATTRS128
3199_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3200 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3201 (__v2di)_mm_min_epi64(__A, __B),
3202 (__v2di)__W);
3203}
3204
3205static __inline__ __m128i __DEFAULT_FN_ATTRS128
3206_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3207 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3208 (__v2di)_mm_min_epi64(__A, __B),
3209 (__v2di)_mm_setzero_si128());
3210}
3211
3212static __inline__ __m256i __DEFAULT_FN_ATTRS256
3213_mm256_min_epi64 (__m256i __A, __m256i __B) {
3214 return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B);
3215}
3216
3217static __inline__ __m256i __DEFAULT_FN_ATTRS256
3218_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3219 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3220 (__v4di)_mm256_min_epi64(__A, __B),
3221 (__v4di)__W);
3222}
3223
3224static __inline__ __m256i __DEFAULT_FN_ATTRS256
3225_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3226 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3227 (__v4di)_mm256_min_epi64(__A, __B),
3228 (__v4di)_mm256_setzero_si256());
3229}
3230
3231static __inline__ __m128i __DEFAULT_FN_ATTRS128
3232_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3233 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3234 (__v4si)_mm_min_epu32(__A, __B),
3235 (__v4si)_mm_setzero_si128());
3236}
3237
3238static __inline__ __m128i __DEFAULT_FN_ATTRS128
3239_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3240 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3241 (__v4si)_mm_min_epu32(__A, __B),
3242 (__v4si)__W);
3243}
3244
3245static __inline__ __m256i __DEFAULT_FN_ATTRS256
3246_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3247 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3248 (__v8si)_mm256_min_epu32(__A, __B),
3249 (__v8si)_mm256_setzero_si256());
3250}
3251
3252static __inline__ __m256i __DEFAULT_FN_ATTRS256
3253_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3254 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3255 (__v8si)_mm256_min_epu32(__A, __B),
3256 (__v8si)__W);
3257}
3258
3259static __inline__ __m128i __DEFAULT_FN_ATTRS128
3260_mm_min_epu64 (__m128i __A, __m128i __B) {
3261 return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B);
3262}
3263
3264static __inline__ __m128i __DEFAULT_FN_ATTRS128
3265_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3266 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3267 (__v2di)_mm_min_epu64(__A, __B),
3268 (__v2di)__W);
3269}
3270
3271static __inline__ __m128i __DEFAULT_FN_ATTRS128
3272_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3273 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3274 (__v2di)_mm_min_epu64(__A, __B),
3275 (__v2di)_mm_setzero_si128());
3276}
3277
3278static __inline__ __m256i __DEFAULT_FN_ATTRS256
3279_mm256_min_epu64 (__m256i __A, __m256i __B) {
3280 return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B);
3281}
3282
3283static __inline__ __m256i __DEFAULT_FN_ATTRS256
3284_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3285 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3286 (__v4di)_mm256_min_epu64(__A, __B),
3287 (__v4di)__W);
3288}
3289
3290static __inline__ __m256i __DEFAULT_FN_ATTRS256
3291_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3292 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3293 (__v4di)_mm256_min_epu64(__A, __B),
3294 (__v4di)_mm256_setzero_si256());
3295}
3296
3297#define _mm_roundscale_pd(A, imm) \
3298 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3299 (int)(imm), \
3300 (__v2df)_mm_setzero_pd(), \
3301 (__mmask8)-1))
3302
3303
3304#define _mm_mask_roundscale_pd(W, U, A, imm) \
3305 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3306 (int)(imm), \
3307 (__v2df)(__m128d)(W), \
3308 (__mmask8)(U)))
3309
3310
3311#define _mm_maskz_roundscale_pd(U, A, imm) \
3312 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3313 (int)(imm), \
3314 (__v2df)_mm_setzero_pd(), \
3315 (__mmask8)(U)))
3316
3317
3318#define _mm256_roundscale_pd(A, imm) \
3319 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3320 (int)(imm), \
3321 (__v4df)_mm256_setzero_pd(), \
3322 (__mmask8)-1))
3323
3324
3325#define _mm256_mask_roundscale_pd(W, U, A, imm) \
3326 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3327 (int)(imm), \
3328 (__v4df)(__m256d)(W), \
3329 (__mmask8)(U)))
3330
3331
3332#define _mm256_maskz_roundscale_pd(U, A, imm) \
3333 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3334 (int)(imm), \
3335 (__v4df)_mm256_setzero_pd(), \
3336 (__mmask8)(U)))
3337
3338#define _mm_roundscale_ps(A, imm) \
3339 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3340 (__v4sf)_mm_setzero_ps(), \
3341 (__mmask8)-1))
3342
3343
3344#define _mm_mask_roundscale_ps(W, U, A, imm) \
3345 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3346 (__v4sf)(__m128)(W), \
3347 (__mmask8)(U)))
3348
3349
3350#define _mm_maskz_roundscale_ps(U, A, imm) \
3351 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3352 (__v4sf)_mm_setzero_ps(), \
3353 (__mmask8)(U)))
3354
3355#define _mm256_roundscale_ps(A, imm) \
3356 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3357 (__v8sf)_mm256_setzero_ps(), \
3358 (__mmask8)-1))
3359
3360#define _mm256_mask_roundscale_ps(W, U, A, imm) \
3361 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3362 (__v8sf)(__m256)(W), \
3363 (__mmask8)(U)))
3364
3365
3366#define _mm256_maskz_roundscale_ps(U, A, imm) \
3367 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3368 (__v8sf)_mm256_setzero_ps(), \
3369 (__mmask8)(U)))
3370
3371static __inline__ __m128d __DEFAULT_FN_ATTRS128
3372_mm_scalef_pd (__m128d __A, __m128d __B) {
3373 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3374 (__v2df) __B,
3375 (__v2df)
3376 _mm_setzero_pd (),
3377 (__mmask8) -1);
3378}
3379
3380static __inline__ __m128d __DEFAULT_FN_ATTRS128
3381_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3382 __m128d __B) {
3383 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3384 (__v2df) __B,
3385 (__v2df) __W,
3386 (__mmask8) __U);
3387}
3388
3389static __inline__ __m128d __DEFAULT_FN_ATTRS128
3390_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3391 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3392 (__v2df) __B,
3393 (__v2df)
3394 _mm_setzero_pd (),
3395 (__mmask8) __U);
3396}
3397
3398static __inline__ __m256d __DEFAULT_FN_ATTRS256
3399_mm256_scalef_pd (__m256d __A, __m256d __B) {
3400 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3401 (__v4df) __B,
3402 (__v4df)
3404 (__mmask8) -1);
3405}
3406
3407static __inline__ __m256d __DEFAULT_FN_ATTRS256
3408_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3409 __m256d __B) {
3410 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3411 (__v4df) __B,
3412 (__v4df) __W,
3413 (__mmask8) __U);
3414}
3415
3416static __inline__ __m256d __DEFAULT_FN_ATTRS256
3417_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3418 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3419 (__v4df) __B,
3420 (__v4df)
3422 (__mmask8) __U);
3423}
3424
3425static __inline__ __m128 __DEFAULT_FN_ATTRS128
3426_mm_scalef_ps (__m128 __A, __m128 __B) {
3427 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3428 (__v4sf) __B,
3429 (__v4sf)
3430 _mm_setzero_ps (),
3431 (__mmask8) -1);
3432}
3433
3434static __inline__ __m128 __DEFAULT_FN_ATTRS128
3435_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3436 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3437 (__v4sf) __B,
3438 (__v4sf) __W,
3439 (__mmask8) __U);
3440}
3441
3442static __inline__ __m128 __DEFAULT_FN_ATTRS128
3443_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3444 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3445 (__v4sf) __B,
3446 (__v4sf)
3447 _mm_setzero_ps (),
3448 (__mmask8) __U);
3449}
3450
3451static __inline__ __m256 __DEFAULT_FN_ATTRS256
3452_mm256_scalef_ps (__m256 __A, __m256 __B) {
3453 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3454 (__v8sf) __B,
3455 (__v8sf)
3457 (__mmask8) -1);
3458}
3459
3460static __inline__ __m256 __DEFAULT_FN_ATTRS256
3461_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3462 __m256 __B) {
3463 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3464 (__v8sf) __B,
3465 (__v8sf) __W,
3466 (__mmask8) __U);
3467}
3468
3469static __inline__ __m256 __DEFAULT_FN_ATTRS256
3470_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3471 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3472 (__v8sf) __B,
3473 (__v8sf)
3475 (__mmask8) __U);
3476}
3477
3478#define _mm_i64scatter_pd(addr, index, v1, scale) \
3479 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
3480 (__v2di)(__m128i)(index), \
3481 (__v2df)(__m128d)(v1), (int)(scale))
3482
3483#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3484 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
3485 (__v2di)(__m128i)(index), \
3486 (__v2df)(__m128d)(v1), (int)(scale))
3487
3488#define _mm_i64scatter_epi64(addr, index, v1, scale) \
3489 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
3490 (__v2di)(__m128i)(index), \
3491 (__v2di)(__m128i)(v1), (int)(scale))
3492
3493#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3494 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
3495 (__v2di)(__m128i)(index), \
3496 (__v2di)(__m128i)(v1), (int)(scale))
3497
3498#define _mm256_i64scatter_pd(addr, index, v1, scale) \
3499 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
3500 (__v4di)(__m256i)(index), \
3501 (__v4df)(__m256d)(v1), (int)(scale))
3502
3503#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3504 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
3505 (__v4di)(__m256i)(index), \
3506 (__v4df)(__m256d)(v1), (int)(scale))
3507
3508#define _mm256_i64scatter_epi64(addr, index, v1, scale) \
3509 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
3510 (__v4di)(__m256i)(index), \
3511 (__v4di)(__m256i)(v1), (int)(scale))
3512
3513#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3514 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
3515 (__v4di)(__m256i)(index), \
3516 (__v4di)(__m256i)(v1), (int)(scale))
3517
3518#define _mm_i64scatter_ps(addr, index, v1, scale) \
3519 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
3520 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3521 (int)(scale))
3522
3523#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3524 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
3525 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3526 (int)(scale))
3527
3528#define _mm_i64scatter_epi32(addr, index, v1, scale) \
3529 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
3530 (__v2di)(__m128i)(index), \
3531 (__v4si)(__m128i)(v1), (int)(scale))
3532
3533#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3534 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
3535 (__v2di)(__m128i)(index), \
3536 (__v4si)(__m128i)(v1), (int)(scale))
3537
3538#define _mm256_i64scatter_ps(addr, index, v1, scale) \
3539 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
3540 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3541 (int)(scale))
3542
3543#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3544 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
3545 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3546 (int)(scale))
3547
3548#define _mm256_i64scatter_epi32(addr, index, v1, scale) \
3549 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
3550 (__v4di)(__m256i)(index), \
3551 (__v4si)(__m128i)(v1), (int)(scale))
3552
3553#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3554 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
3555 (__v4di)(__m256i)(index), \
3556 (__v4si)(__m128i)(v1), (int)(scale))
3557
3558#define _mm_i32scatter_pd(addr, index, v1, scale) \
3559 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
3560 (__v4si)(__m128i)(index), \
3561 (__v2df)(__m128d)(v1), (int)(scale))
3562
3563#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3564 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
3565 (__v4si)(__m128i)(index), \
3566 (__v2df)(__m128d)(v1), (int)(scale))
3567
3568#define _mm_i32scatter_epi64(addr, index, v1, scale) \
3569 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
3570 (__v4si)(__m128i)(index), \
3571 (__v2di)(__m128i)(v1), (int)(scale))
3572
3573#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3574 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
3575 (__v4si)(__m128i)(index), \
3576 (__v2di)(__m128i)(v1), (int)(scale))
3577
3578#define _mm256_i32scatter_pd(addr, index, v1, scale) \
3579 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
3580 (__v4si)(__m128i)(index), \
3581 (__v4df)(__m256d)(v1), (int)(scale))
3582
3583#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3584 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
3585 (__v4si)(__m128i)(index), \
3586 (__v4df)(__m256d)(v1), (int)(scale))
3587
3588#define _mm256_i32scatter_epi64(addr, index, v1, scale) \
3589 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
3590 (__v4si)(__m128i)(index), \
3591 (__v4di)(__m256i)(v1), (int)(scale))
3592
3593#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3594 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
3595 (__v4si)(__m128i)(index), \
3596 (__v4di)(__m256i)(v1), (int)(scale))
3597
3598#define _mm_i32scatter_ps(addr, index, v1, scale) \
3599 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
3600 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3601 (int)(scale))
3602
3603#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3604 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
3605 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3606 (int)(scale))
3607
3608#define _mm_i32scatter_epi32(addr, index, v1, scale) \
3609 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
3610 (__v4si)(__m128i)(index), \
3611 (__v4si)(__m128i)(v1), (int)(scale))
3612
3613#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3614 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
3615 (__v4si)(__m128i)(index), \
3616 (__v4si)(__m128i)(v1), (int)(scale))
3617
3618#define _mm256_i32scatter_ps(addr, index, v1, scale) \
3619 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
3620 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3621 (int)(scale))
3622
3623#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3624 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
3625 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3626 (int)(scale))
3627
3628#define _mm256_i32scatter_epi32(addr, index, v1, scale) \
3629 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
3630 (__v8si)(__m256i)(index), \
3631 (__v8si)(__m256i)(v1), (int)(scale))
3632
3633#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3634 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
3635 (__v8si)(__m256i)(index), \
3636 (__v8si)(__m256i)(v1), (int)(scale))
3637
3638 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3639 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
3640 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3641 (__v2df)_mm_sqrt_pd(__A),
3642 (__v2df)__W);
3643 }
3644
3645 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3646 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
3647 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3648 (__v2df)_mm_sqrt_pd(__A),
3649 (__v2df)_mm_setzero_pd());
3650 }
3651
3652 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3653 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
3654 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3655 (__v4df)_mm256_sqrt_pd(__A),
3656 (__v4df)__W);
3657 }
3658
3659 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3660 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
3661 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3662 (__v4df)_mm256_sqrt_pd(__A),
3663 (__v4df)_mm256_setzero_pd());
3664 }
3665
3666 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3667 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
3668 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3669 (__v4sf)_mm_sqrt_ps(__A),
3670 (__v4sf)__W);
3671 }
3672
3673 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3674 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
3675 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3676 (__v4sf)_mm_sqrt_ps(__A),
3677 (__v4sf)_mm_setzero_ps());
3678 }
3679
3680 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3681 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
3682 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3683 (__v8sf)_mm256_sqrt_ps(__A),
3684 (__v8sf)__W);
3685 }
3686
3687 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3689 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3690 (__v8sf)_mm256_sqrt_ps(__A),
3691 (__v8sf)_mm256_setzero_ps());
3692 }
3693
3694 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3695 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3696 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3697 (__v2df)_mm_sub_pd(__A, __B),
3698 (__v2df)__W);
3699 }
3700
3701 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3702 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3703 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3704 (__v2df)_mm_sub_pd(__A, __B),
3705 (__v2df)_mm_setzero_pd());
3706 }
3707
3708 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3709 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3710 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3711 (__v4df)_mm256_sub_pd(__A, __B),
3712 (__v4df)__W);
3713 }
3714
3715 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3716 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3717 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3718 (__v4df)_mm256_sub_pd(__A, __B),
3719 (__v4df)_mm256_setzero_pd());
3720 }
3721
3722 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3723 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3724 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3725 (__v4sf)_mm_sub_ps(__A, __B),
3726 (__v4sf)__W);
3727 }
3728
3729 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3730 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3731 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3732 (__v4sf)_mm_sub_ps(__A, __B),
3733 (__v4sf)_mm_setzero_ps());
3734 }
3735
3736 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3737 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3738 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3739 (__v8sf)_mm256_sub_ps(__A, __B),
3740 (__v8sf)__W);
3741 }
3742
3743 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3744 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3745 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3746 (__v8sf)_mm256_sub_ps(__A, __B),
3747 (__v8sf)_mm256_setzero_ps());
3748 }
3749
3750 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3751 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
3752 return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
3753 (__v4si)__B);
3754 }
3755
3756 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3757 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I,
3758 __m128i __B) {
3759 return (__m128i)__builtin_ia32_selectd_128(__U,
3760 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3761 (__v4si)__A);
3762 }
3763
3764 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3765 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,
3766 __m128i __B) {
3767 return (__m128i)__builtin_ia32_selectd_128(__U,
3768 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3769 (__v4si)__I);
3770 }
3771
3772 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3773 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,
3774 __m128i __B) {
3775 return (__m128i)__builtin_ia32_selectd_128(__U,
3776 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3777 (__v4si)_mm_setzero_si128());
3778 }
3779
3780 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3781 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
3782 return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
3783 (__v8si) __B);
3784 }
3785
3786 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3787 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I,
3788 __m256i __B) {
3789 return (__m256i)__builtin_ia32_selectd_256(__U,
3790 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3791 (__v8si)__A);
3792 }
3793
3794 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3795 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U,
3796 __m256i __B) {
3797 return (__m256i)__builtin_ia32_selectd_256(__U,
3798 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3799 (__v8si)__I);
3800 }
3801
3802 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3803 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I,
3804 __m256i __B) {
3805 return (__m256i)__builtin_ia32_selectd_256(__U,
3806 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3807 (__v8si)_mm256_setzero_si256());
3808 }
3809
3810 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3811 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
3812 return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
3813 (__v2df)__B);
3814 }
3815
3816 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3817 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
3818 return (__m128d)__builtin_ia32_selectpd_128(__U,
3819 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3820 (__v2df)__A);
3821 }
3822
3823 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3824 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
3825 return (__m128d)__builtin_ia32_selectpd_128(__U,
3826 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3827 (__v2df)(__m128d)__I);
3828 }
3829
3830 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3831 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
3832 return (__m128d)__builtin_ia32_selectpd_128(__U,
3833 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3834 (__v2df)_mm_setzero_pd());
3835 }
3836
3837 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3838 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
3839 return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
3840 (__v4df)__B);
3841 }
3842
3843 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3844 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I,
3845 __m256d __B) {
3846 return (__m256d)__builtin_ia32_selectpd_256(__U,
3847 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3848 (__v4df)__A);
3849 }
3850
3851 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3852 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,
3853 __m256d __B) {
3854 return (__m256d)__builtin_ia32_selectpd_256(__U,
3855 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3856 (__v4df)(__m256d)__I);
3857 }
3858
3859 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3860 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,
3861 __m256d __B) {
3862 return (__m256d)__builtin_ia32_selectpd_256(__U,
3863 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3864 (__v4df)_mm256_setzero_pd());
3865 }
3866
3867 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3868 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
3869 return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
3870 (__v4sf)__B);
3871 }
3872
3873 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3874 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
3875 return (__m128)__builtin_ia32_selectps_128(__U,
3876 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3877 (__v4sf)__A);
3878 }
3879
3880 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3881 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
3882 return (__m128)__builtin_ia32_selectps_128(__U,
3883 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3884 (__v4sf)(__m128)__I);
3885 }
3886
3887 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3888 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
3889 return (__m128)__builtin_ia32_selectps_128(__U,
3890 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3891 (__v4sf)_mm_setzero_ps());
3892 }
3893
3894 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3895 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
3896 return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
3897 (__v8sf) __B);
3898 }
3899
3900 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3901 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
3902 return (__m256)__builtin_ia32_selectps_256(__U,
3903 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3904 (__v8sf)__A);
3905 }
3906
3907 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3908 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,
3909 __m256 __B) {
3910 return (__m256)__builtin_ia32_selectps_256(__U,
3911 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3912 (__v8sf)(__m256)__I);
3913 }
3914
3915 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3916 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I,
3917 __m256 __B) {
3918 return (__m256)__builtin_ia32_selectps_256(__U,
3919 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3920 (__v8sf)_mm256_setzero_ps());
3921 }
3922
3923 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3924 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
3925 return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
3926 (__v2di)__B);
3927 }
3928
3929 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3930 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I,
3931 __m128i __B) {
3932 return (__m128i)__builtin_ia32_selectq_128(__U,
3933 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3934 (__v2di)__A);
3935 }
3936
3937 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3938 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
3939 __m128i __B) {
3940 return (__m128i)__builtin_ia32_selectq_128(__U,
3941 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3942 (__v2di)__I);
3943 }
3944
3945 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3946 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I,
3947 __m128i __B) {
3948 return (__m128i)__builtin_ia32_selectq_128(__U,
3949 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3950 (__v2di)_mm_setzero_si128());
3951 }
3952
3953
3954 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3955 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
3956 return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
3957 (__v4di) __B);
3958 }
3959
3960 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3961 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I,
3962 __m256i __B) {
3963 return (__m256i)__builtin_ia32_selectq_256(__U,
3964 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3965 (__v4di)__A);
3966 }
3967
3968 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3969 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U,
3970 __m256i __B) {
3971 return (__m256i)__builtin_ia32_selectq_256(__U,
3972 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3973 (__v4di)__I);
3974 }
3975
3976 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3977 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I,
3978 __m256i __B) {
3979 return (__m256i)__builtin_ia32_selectq_256(__U,
3980 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3981 (__v4di)_mm256_setzero_si256());
3982 }
3983
3984 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3985 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3986 {
3987 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3988 (__v4si)_mm_cvtepi8_epi32(__A),
3989 (__v4si)__W);
3990 }
3991
3992 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3994 {
3995 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3996 (__v4si)_mm_cvtepi8_epi32(__A),
3997 (__v4si)_mm_setzero_si128());
3998 }
3999
4000 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4001 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
4002 {
4003 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4004 (__v8si)_mm256_cvtepi8_epi32(__A),
4005 (__v8si)__W);
4006 }
4007
4008 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4010 {
4011 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4012 (__v8si)_mm256_cvtepi8_epi32(__A),
4013 (__v8si)_mm256_setzero_si256());
4014 }
4015
4016 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4017 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4018 {
4019 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4020 (__v2di)_mm_cvtepi8_epi64(__A),
4021 (__v2di)__W);
4022 }
4023
4024 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4026 {
4027 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4028 (__v2di)_mm_cvtepi8_epi64(__A),
4029 (__v2di)_mm_setzero_si128());
4030 }
4031
4032 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4033 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4034 {
4035 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4036 (__v4di)_mm256_cvtepi8_epi64(__A),
4037 (__v4di)__W);
4038 }
4039
4040 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4042 {
4043 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4044 (__v4di)_mm256_cvtepi8_epi64(__A),
4045 (__v4di)_mm256_setzero_si256());
4046 }
4047
4048 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4049 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4050 {
4051 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4052 (__v2di)_mm_cvtepi32_epi64(__X),
4053 (__v2di)__W);
4054 }
4055
4056 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4058 {
4059 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4060 (__v2di)_mm_cvtepi32_epi64(__X),
4061 (__v2di)_mm_setzero_si128());
4062 }
4063
4064 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4065 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4066 {
4067 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4068 (__v4di)_mm256_cvtepi32_epi64(__X),
4069 (__v4di)__W);
4070 }
4071
4072 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4074 {
4075 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4076 (__v4di)_mm256_cvtepi32_epi64(__X),
4077 (__v4di)_mm256_setzero_si256());
4078 }
4079
4080 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4081 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4082 {
4083 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4084 (__v4si)_mm_cvtepi16_epi32(__A),
4085 (__v4si)__W);
4086 }
4087
4088 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4090 {
4091 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4092 (__v4si)_mm_cvtepi16_epi32(__A),
4093 (__v4si)_mm_setzero_si128());
4094 }
4095
4096 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4097 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4098 {
4099 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4100 (__v8si)_mm256_cvtepi16_epi32(__A),
4101 (__v8si)__W);
4102 }
4103
4104 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4106 {
4107 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4108 (__v8si)_mm256_cvtepi16_epi32(__A),
4109 (__v8si)_mm256_setzero_si256());
4110 }
4111
4112 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4113 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4114 {
4115 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4116 (__v2di)_mm_cvtepi16_epi64(__A),
4117 (__v2di)__W);
4118 }
4119
4120 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4122 {
4123 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4124 (__v2di)_mm_cvtepi16_epi64(__A),
4125 (__v2di)_mm_setzero_si128());
4126 }
4127
4128 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4129 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4130 {
4131 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4132 (__v4di)_mm256_cvtepi16_epi64(__A),
4133 (__v4di)__W);
4134 }
4135
4136 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4138 {
4139 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4140 (__v4di)_mm256_cvtepi16_epi64(__A),
4141 (__v4di)_mm256_setzero_si256());
4142 }
4143
4144
4145 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4146 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4147 {
4148 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4149 (__v4si)_mm_cvtepu8_epi32(__A),
4150 (__v4si)__W);
4151 }
4152
4153 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4155 {
4156 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4157 (__v4si)_mm_cvtepu8_epi32(__A),
4158 (__v4si)_mm_setzero_si128());
4159 }
4160
4161 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4162 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4163 {
4164 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4165 (__v8si)_mm256_cvtepu8_epi32(__A),
4166 (__v8si)__W);
4167 }
4168
4169 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4171 {
4172 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4173 (__v8si)_mm256_cvtepu8_epi32(__A),
4174 (__v8si)_mm256_setzero_si256());
4175 }
4176
4177 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4178 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4179 {
4180 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4181 (__v2di)_mm_cvtepu8_epi64(__A),
4182 (__v2di)__W);
4183 }
4184
4185 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4187 {
4188 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4189 (__v2di)_mm_cvtepu8_epi64(__A),
4190 (__v2di)_mm_setzero_si128());
4191 }
4192
4193 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4194 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4195 {
4196 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4197 (__v4di)_mm256_cvtepu8_epi64(__A),
4198 (__v4di)__W);
4199 }
4200
4201 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4203 {
4204 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4205 (__v4di)_mm256_cvtepu8_epi64(__A),
4206 (__v4di)_mm256_setzero_si256());
4207 }
4208
4209 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4210 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4211 {
4212 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4213 (__v2di)_mm_cvtepu32_epi64(__X),
4214 (__v2di)__W);
4215 }
4216
4217 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4219 {
4220 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4221 (__v2di)_mm_cvtepu32_epi64(__X),
4222 (__v2di)_mm_setzero_si128());
4223 }
4224
4225 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4226 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4227 {
4228 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4229 (__v4di)_mm256_cvtepu32_epi64(__X),
4230 (__v4di)__W);
4231 }
4232
4233 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4235 {
4236 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4237 (__v4di)_mm256_cvtepu32_epi64(__X),
4238 (__v4di)_mm256_setzero_si256());
4239 }
4240
4241 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4242 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4243 {
4244 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4245 (__v4si)_mm_cvtepu16_epi32(__A),
4246 (__v4si)__W);
4247 }
4248
4249 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4251 {
4252 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4253 (__v4si)_mm_cvtepu16_epi32(__A),
4254 (__v4si)_mm_setzero_si128());
4255 }
4256
4257 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4258 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4259 {
4260 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4261 (__v8si)_mm256_cvtepu16_epi32(__A),
4262 (__v8si)__W);
4263 }
4264
4265 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4267 {
4268 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4269 (__v8si)_mm256_cvtepu16_epi32(__A),
4270 (__v8si)_mm256_setzero_si256());
4271 }
4272
4273 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4274 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4275 {
4276 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4277 (__v2di)_mm_cvtepu16_epi64(__A),
4278 (__v2di)__W);
4279 }
4280
4281 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4283 {
4284 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4285 (__v2di)_mm_cvtepu16_epi64(__A),
4286 (__v2di)_mm_setzero_si128());
4287 }
4288
4289 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4290 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4291 {
4292 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4293 (__v4di)_mm256_cvtepu16_epi64(__A),
4294 (__v4di)__W);
4295 }
4296
4297 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4299 {
4300 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4301 (__v4di)_mm256_cvtepu16_epi64(__A),
4302 (__v4di)_mm256_setzero_si256());
4303 }
4304
4305
4306#define _mm_rol_epi32(a, b) \
4307 ((__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b)))
4308
4309#define _mm_mask_rol_epi32(w, u, a, b) \
4310 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4311 (__v4si)_mm_rol_epi32((a), (b)), \
4312 (__v4si)(__m128i)(w)))
4313
4314#define _mm_maskz_rol_epi32(u, a, b) \
4315 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4316 (__v4si)_mm_rol_epi32((a), (b)), \
4317 (__v4si)_mm_setzero_si128()))
4318
4319#define _mm256_rol_epi32(a, b) \
4320 ((__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b)))
4321
4322#define _mm256_mask_rol_epi32(w, u, a, b) \
4323 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4324 (__v8si)_mm256_rol_epi32((a), (b)), \
4325 (__v8si)(__m256i)(w)))
4326
4327#define _mm256_maskz_rol_epi32(u, a, b) \
4328 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4329 (__v8si)_mm256_rol_epi32((a), (b)), \
4330 (__v8si)_mm256_setzero_si256()))
4331
4332#define _mm_rol_epi64(a, b) \
4333 ((__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b)))
4334
4335#define _mm_mask_rol_epi64(w, u, a, b) \
4336 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4337 (__v2di)_mm_rol_epi64((a), (b)), \
4338 (__v2di)(__m128i)(w)))
4339
4340#define _mm_maskz_rol_epi64(u, a, b) \
4341 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4342 (__v2di)_mm_rol_epi64((a), (b)), \
4343 (__v2di)_mm_setzero_si128()))
4344
4345#define _mm256_rol_epi64(a, b) \
4346 ((__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b)))
4347
4348#define _mm256_mask_rol_epi64(w, u, a, b) \
4349 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4350 (__v4di)_mm256_rol_epi64((a), (b)), \
4351 (__v4di)(__m256i)(w)))
4352
4353#define _mm256_maskz_rol_epi64(u, a, b) \
4354 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4355 (__v4di)_mm256_rol_epi64((a), (b)), \
4356 (__v4di)_mm256_setzero_si256()))
4357
4358static __inline__ __m128i __DEFAULT_FN_ATTRS128
4359_mm_rolv_epi32 (__m128i __A, __m128i __B)
4360{
4361 return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
4362}
4363
4364static __inline__ __m128i __DEFAULT_FN_ATTRS128
4365_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4366{
4367 return (__m128i)__builtin_ia32_selectd_128(__U,
4368 (__v4si)_mm_rolv_epi32(__A, __B),
4369 (__v4si)__W);
4370}
4371
4372static __inline__ __m128i __DEFAULT_FN_ATTRS128
4373_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4374{
4375 return (__m128i)__builtin_ia32_selectd_128(__U,
4376 (__v4si)_mm_rolv_epi32(__A, __B),
4377 (__v4si)_mm_setzero_si128());
4378}
4379
4380static __inline__ __m256i __DEFAULT_FN_ATTRS256
4381_mm256_rolv_epi32 (__m256i __A, __m256i __B)
4382{
4383 return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
4384}
4385
4386static __inline__ __m256i __DEFAULT_FN_ATTRS256
4387_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4388{
4389 return (__m256i)__builtin_ia32_selectd_256(__U,
4390 (__v8si)_mm256_rolv_epi32(__A, __B),
4391 (__v8si)__W);
4392}
4393
4394static __inline__ __m256i __DEFAULT_FN_ATTRS256
4395_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4396{
4397 return (__m256i)__builtin_ia32_selectd_256(__U,
4398 (__v8si)_mm256_rolv_epi32(__A, __B),
4399 (__v8si)_mm256_setzero_si256());
4400}
4401
4402static __inline__ __m128i __DEFAULT_FN_ATTRS128
4403_mm_rolv_epi64 (__m128i __A, __m128i __B)
4404{
4405 return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
4406}
4407
4408static __inline__ __m128i __DEFAULT_FN_ATTRS128
4409_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4410{
4411 return (__m128i)__builtin_ia32_selectq_128(__U,
4412 (__v2di)_mm_rolv_epi64(__A, __B),
4413 (__v2di)__W);
4414}
4415
4416static __inline__ __m128i __DEFAULT_FN_ATTRS128
4417_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4418{
4419 return (__m128i)__builtin_ia32_selectq_128(__U,
4420 (__v2di)_mm_rolv_epi64(__A, __B),
4421 (__v2di)_mm_setzero_si128());
4422}
4423
4424static __inline__ __m256i __DEFAULT_FN_ATTRS256
4425_mm256_rolv_epi64 (__m256i __A, __m256i __B)
4426{
4427 return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
4428}
4429
4430static __inline__ __m256i __DEFAULT_FN_ATTRS256
4431_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4432{
4433 return (__m256i)__builtin_ia32_selectq_256(__U,
4434 (__v4di)_mm256_rolv_epi64(__A, __B),
4435 (__v4di)__W);
4436}
4437
4438static __inline__ __m256i __DEFAULT_FN_ATTRS256
4439_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4440{
4441 return (__m256i)__builtin_ia32_selectq_256(__U,
4442 (__v4di)_mm256_rolv_epi64(__A, __B),
4443 (__v4di)_mm256_setzero_si256());
4444}
4445
4446#define _mm_ror_epi32(a, b) \
4447 ((__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b)))
4448
4449#define _mm_mask_ror_epi32(w, u, a, b) \
4450 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4451 (__v4si)_mm_ror_epi32((a), (b)), \
4452 (__v4si)(__m128i)(w)))
4453
4454#define _mm_maskz_ror_epi32(u, a, b) \
4455 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4456 (__v4si)_mm_ror_epi32((a), (b)), \
4457 (__v4si)_mm_setzero_si128()))
4458
4459#define _mm256_ror_epi32(a, b) \
4460 ((__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b)))
4461
4462#define _mm256_mask_ror_epi32(w, u, a, b) \
4463 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4464 (__v8si)_mm256_ror_epi32((a), (b)), \
4465 (__v8si)(__m256i)(w)))
4466
4467#define _mm256_maskz_ror_epi32(u, a, b) \
4468 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4469 (__v8si)_mm256_ror_epi32((a), (b)), \
4470 (__v8si)_mm256_setzero_si256()))
4471
4472#define _mm_ror_epi64(a, b) \
4473 ((__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b)))
4474
4475#define _mm_mask_ror_epi64(w, u, a, b) \
4476 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4477 (__v2di)_mm_ror_epi64((a), (b)), \
4478 (__v2di)(__m128i)(w)))
4479
4480#define _mm_maskz_ror_epi64(u, a, b) \
4481 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4482 (__v2di)_mm_ror_epi64((a), (b)), \
4483 (__v2di)_mm_setzero_si128()))
4484
4485#define _mm256_ror_epi64(a, b) \
4486 ((__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b)))
4487
4488#define _mm256_mask_ror_epi64(w, u, a, b) \
4489 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4490 (__v4di)_mm256_ror_epi64((a), (b)), \
4491 (__v4di)(__m256i)(w)))
4492
4493#define _mm256_maskz_ror_epi64(u, a, b) \
4494 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4495 (__v4di)_mm256_ror_epi64((a), (b)), \
4496 (__v4di)_mm256_setzero_si256()))
4497
4498static __inline__ __m128i __DEFAULT_FN_ATTRS128
4499_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4500{
4501 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4502 (__v4si)_mm_sll_epi32(__A, __B),
4503 (__v4si)__W);
4504}
4505
4506static __inline__ __m128i __DEFAULT_FN_ATTRS128
4507_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4508{
4509 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4510 (__v4si)_mm_sll_epi32(__A, __B),
4511 (__v4si)_mm_setzero_si128());
4512}
4513
4514static __inline__ __m256i __DEFAULT_FN_ATTRS256
4515_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4516{
4517 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4518 (__v8si)_mm256_sll_epi32(__A, __B),
4519 (__v8si)__W);
4520}
4521
4522static __inline__ __m256i __DEFAULT_FN_ATTRS256
4523_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4524{
4525 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4526 (__v8si)_mm256_sll_epi32(__A, __B),
4527 (__v8si)_mm256_setzero_si256());
4528}
4529
4530static __inline__ __m128i __DEFAULT_FN_ATTRS128
4531_mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4532{
4533 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4534 (__v4si)_mm_slli_epi32(__A, (int)__B),
4535 (__v4si)__W);
4536}
4537
4538static __inline__ __m128i __DEFAULT_FN_ATTRS128
4539_mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
4540{
4541 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4542 (__v4si)_mm_slli_epi32(__A, (int)__B),
4543 (__v4si)_mm_setzero_si128());
4544}
4545
4546static __inline__ __m256i __DEFAULT_FN_ATTRS256
4547_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4548{
4549 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4550 (__v8si)_mm256_slli_epi32(__A, (int)__B),
4551 (__v8si)__W);
4552}
4553
4554static __inline__ __m256i __DEFAULT_FN_ATTRS256
4555_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
4556{
4557 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4558 (__v8si)_mm256_slli_epi32(__A, (int)__B),
4559 (__v8si)_mm256_setzero_si256());
4560}
4561
4562static __inline__ __m128i __DEFAULT_FN_ATTRS128
4563_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4564{
4565 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4566 (__v2di)_mm_sll_epi64(__A, __B),
4567 (__v2di)__W);
4568}
4569
4570static __inline__ __m128i __DEFAULT_FN_ATTRS128
4571_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4572{
4573 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4574 (__v2di)_mm_sll_epi64(__A, __B),
4575 (__v2di)_mm_setzero_si128());
4576}
4577
4578static __inline__ __m256i __DEFAULT_FN_ATTRS256
4579_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4580{
4581 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4582 (__v4di)_mm256_sll_epi64(__A, __B),
4583 (__v4di)__W);
4584}
4585
4586static __inline__ __m256i __DEFAULT_FN_ATTRS256
4587_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4588{
4589 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4590 (__v4di)_mm256_sll_epi64(__A, __B),
4591 (__v4di)_mm256_setzero_si256());
4592}
4593
4594static __inline__ __m128i __DEFAULT_FN_ATTRS128
4595_mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4596{
4597 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4598 (__v2di)_mm_slli_epi64(__A, (int)__B),
4599 (__v2di)__W);
4600}
4601
4602static __inline__ __m128i __DEFAULT_FN_ATTRS128
4603_mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
4604{
4605 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4606 (__v2di)_mm_slli_epi64(__A, (int)__B),
4607 (__v2di)_mm_setzero_si128());
4608}
4609
4610static __inline__ __m256i __DEFAULT_FN_ATTRS256
4611_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4612{
4613 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4614 (__v4di)_mm256_slli_epi64(__A, (int)__B),
4615 (__v4di)__W);
4616}
4617
4618static __inline__ __m256i __DEFAULT_FN_ATTRS256
4619_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
4620{
4621 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4622 (__v4di)_mm256_slli_epi64(__A, (int)__B),
4623 (__v4di)_mm256_setzero_si256());
4624}
4625
4626static __inline__ __m128i __DEFAULT_FN_ATTRS128
4627_mm_rorv_epi32 (__m128i __A, __m128i __B)
4628{
4629 return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
4630}
4631
4632static __inline__ __m128i __DEFAULT_FN_ATTRS128
4633_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4634{
4635 return (__m128i)__builtin_ia32_selectd_128(__U,
4636 (__v4si)_mm_rorv_epi32(__A, __B),
4637 (__v4si)__W);
4638}
4639
4640static __inline__ __m128i __DEFAULT_FN_ATTRS128
4641_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4642{
4643 return (__m128i)__builtin_ia32_selectd_128(__U,
4644 (__v4si)_mm_rorv_epi32(__A, __B),
4645 (__v4si)_mm_setzero_si128());
4646}
4647
4648static __inline__ __m256i __DEFAULT_FN_ATTRS256
4649_mm256_rorv_epi32 (__m256i __A, __m256i __B)
4650{
4651 return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
4652}
4653
4654static __inline__ __m256i __DEFAULT_FN_ATTRS256
4655_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4656{
4657 return (__m256i)__builtin_ia32_selectd_256(__U,
4658 (__v8si)_mm256_rorv_epi32(__A, __B),
4659 (__v8si)__W);
4660}
4661
4662static __inline__ __m256i __DEFAULT_FN_ATTRS256
4663_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4664{
4665 return (__m256i)__builtin_ia32_selectd_256(__U,
4666 (__v8si)_mm256_rorv_epi32(__A, __B),
4667 (__v8si)_mm256_setzero_si256());
4668}
4669
4670static __inline__ __m128i __DEFAULT_FN_ATTRS128
4671_mm_rorv_epi64 (__m128i __A, __m128i __B)
4672{
4673 return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
4674}
4675
4676static __inline__ __m128i __DEFAULT_FN_ATTRS128
4677_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4678{
4679 return (__m128i)__builtin_ia32_selectq_128(__U,
4680 (__v2di)_mm_rorv_epi64(__A, __B),
4681 (__v2di)__W);
4682}
4683
4684static __inline__ __m128i __DEFAULT_FN_ATTRS128
4685_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4686{
4687 return (__m128i)__builtin_ia32_selectq_128(__U,
4688 (__v2di)_mm_rorv_epi64(__A, __B),
4689 (__v2di)_mm_setzero_si128());
4690}
4691
4692static __inline__ __m256i __DEFAULT_FN_ATTRS256
4693_mm256_rorv_epi64 (__m256i __A, __m256i __B)
4694{
4695 return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
4696}
4697
4698static __inline__ __m256i __DEFAULT_FN_ATTRS256
4699_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4700{
4701 return (__m256i)__builtin_ia32_selectq_256(__U,
4702 (__v4di)_mm256_rorv_epi64(__A, __B),
4703 (__v4di)__W);
4704}
4705
4706static __inline__ __m256i __DEFAULT_FN_ATTRS256
4707_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4708{
4709 return (__m256i)__builtin_ia32_selectq_256(__U,
4710 (__v4di)_mm256_rorv_epi64(__A, __B),
4711 (__v4di)_mm256_setzero_si256());
4712}
4713
4714static __inline__ __m128i __DEFAULT_FN_ATTRS128
4715_mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4716{
4717 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4718 (__v2di)_mm_sllv_epi64(__X, __Y),
4719 (__v2di)__W);
4720}
4721
4722static __inline__ __m128i __DEFAULT_FN_ATTRS128
4723_mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4724{
4725 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4726 (__v2di)_mm_sllv_epi64(__X, __Y),
4727 (__v2di)_mm_setzero_si128());
4728}
4729
4730static __inline__ __m256i __DEFAULT_FN_ATTRS256
4731_mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4732{
4733 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4734 (__v4di)_mm256_sllv_epi64(__X, __Y),
4735 (__v4di)__W);
4736}
4737
4738static __inline__ __m256i __DEFAULT_FN_ATTRS256
4739_mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4740{
4741 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4742 (__v4di)_mm256_sllv_epi64(__X, __Y),
4743 (__v4di)_mm256_setzero_si256());
4744}
4745
4746static __inline__ __m128i __DEFAULT_FN_ATTRS128
4747_mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4748{
4749 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4750 (__v4si)_mm_sllv_epi32(__X, __Y),
4751 (__v4si)__W);
4752}
4753
4754static __inline__ __m128i __DEFAULT_FN_ATTRS128
4755_mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4756{
4757 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4758 (__v4si)_mm_sllv_epi32(__X, __Y),
4759 (__v4si)_mm_setzero_si128());
4760}
4761
4762static __inline__ __m256i __DEFAULT_FN_ATTRS256
4763_mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4764{
4765 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4766 (__v8si)_mm256_sllv_epi32(__X, __Y),
4767 (__v8si)__W);
4768}
4769
4770static __inline__ __m256i __DEFAULT_FN_ATTRS256
4771_mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4772{
4773 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4774 (__v8si)_mm256_sllv_epi32(__X, __Y),
4775 (__v8si)_mm256_setzero_si256());
4776}
4777
4778static __inline__ __m128i __DEFAULT_FN_ATTRS128
4779_mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4780{
4781 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4782 (__v2di)_mm_srlv_epi64(__X, __Y),
4783 (__v2di)__W);
4784}
4785
4786static __inline__ __m128i __DEFAULT_FN_ATTRS128
4787_mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4788{
4789 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4790 (__v2di)_mm_srlv_epi64(__X, __Y),
4791 (__v2di)_mm_setzero_si128());
4792}
4793
4794static __inline__ __m256i __DEFAULT_FN_ATTRS256
4795_mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4796{
4797 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4798 (__v4di)_mm256_srlv_epi64(__X, __Y),
4799 (__v4di)__W);
4800}
4801
4802static __inline__ __m256i __DEFAULT_FN_ATTRS256
4803_mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4804{
4805 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4806 (__v4di)_mm256_srlv_epi64(__X, __Y),
4807 (__v4di)_mm256_setzero_si256());
4808}
4809
4810static __inline__ __m128i __DEFAULT_FN_ATTRS128
4811_mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4812{
4813 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4814 (__v4si)_mm_srlv_epi32(__X, __Y),
4815 (__v4si)__W);
4816}
4817
4818static __inline__ __m128i __DEFAULT_FN_ATTRS128
4819_mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4820{
4821 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4822 (__v4si)_mm_srlv_epi32(__X, __Y),
4823 (__v4si)_mm_setzero_si128());
4824}
4825
4826static __inline__ __m256i __DEFAULT_FN_ATTRS256
4827_mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4828{
4829 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4830 (__v8si)_mm256_srlv_epi32(__X, __Y),
4831 (__v8si)__W);
4832}
4833
4834static __inline__ __m256i __DEFAULT_FN_ATTRS256
4835_mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4836{
4837 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4838 (__v8si)_mm256_srlv_epi32(__X, __Y),
4839 (__v8si)_mm256_setzero_si256());
4840}
4841
4842static __inline__ __m128i __DEFAULT_FN_ATTRS128
4843_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4844{
4845 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4846 (__v4si)_mm_srl_epi32(__A, __B),
4847 (__v4si)__W);
4848}
4849
4850static __inline__ __m128i __DEFAULT_FN_ATTRS128
4851_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4852{
4853 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4854 (__v4si)_mm_srl_epi32(__A, __B),
4855 (__v4si)_mm_setzero_si128());
4856}
4857
4858static __inline__ __m256i __DEFAULT_FN_ATTRS256
4859_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4860{
4861 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4862 (__v8si)_mm256_srl_epi32(__A, __B),
4863 (__v8si)__W);
4864}
4865
4866static __inline__ __m256i __DEFAULT_FN_ATTRS256
4867_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4868{
4869 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4870 (__v8si)_mm256_srl_epi32(__A, __B),
4871 (__v8si)_mm256_setzero_si256());
4872}
4873
4874static __inline__ __m128i __DEFAULT_FN_ATTRS128
4875_mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4876{
4877 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4878 (__v4si)_mm_srli_epi32(__A, (int)__B),
4879 (__v4si)__W);
4880}
4881
4882static __inline__ __m128i __DEFAULT_FN_ATTRS128
4883_mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
4884{
4885 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4886 (__v4si)_mm_srli_epi32(__A, (int)__B),
4887 (__v4si)_mm_setzero_si128());
4888}
4889
4890static __inline__ __m256i __DEFAULT_FN_ATTRS256
4891_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4892{
4893 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4894 (__v8si)_mm256_srli_epi32(__A, (int)__B),
4895 (__v8si)__W);
4896}
4897
4898static __inline__ __m256i __DEFAULT_FN_ATTRS256
4899_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
4900{
4901 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4902 (__v8si)_mm256_srli_epi32(__A, (int)__B),
4903 (__v8si)_mm256_setzero_si256());
4904}
4905
4906static __inline__ __m128i __DEFAULT_FN_ATTRS128
4907_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4908{
4909 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4910 (__v2di)_mm_srl_epi64(__A, __B),
4911 (__v2di)__W);
4912}
4913
4914static __inline__ __m128i __DEFAULT_FN_ATTRS128
4915_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4916{
4917 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4918 (__v2di)_mm_srl_epi64(__A, __B),
4919 (__v2di)_mm_setzero_si128());
4920}
4921
4922static __inline__ __m256i __DEFAULT_FN_ATTRS256
4923_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4924{
4925 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4926 (__v4di)_mm256_srl_epi64(__A, __B),
4927 (__v4di)__W);
4928}
4929
4930static __inline__ __m256i __DEFAULT_FN_ATTRS256
4931_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4932{
4933 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4934 (__v4di)_mm256_srl_epi64(__A, __B),
4935 (__v4di)_mm256_setzero_si256());
4936}
4937
4938static __inline__ __m128i __DEFAULT_FN_ATTRS128
4939_mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4940{
4941 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4942 (__v2di)_mm_srli_epi64(__A, (int)__B),
4943 (__v2di)__W);
4944}
4945
4946static __inline__ __m128i __DEFAULT_FN_ATTRS128
4947_mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
4948{
4949 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4950 (__v2di)_mm_srli_epi64(__A, (int)__B),
4951 (__v2di)_mm_setzero_si128());
4952}
4953
4954static __inline__ __m256i __DEFAULT_FN_ATTRS256
4955_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4956{
4957 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4958 (__v4di)_mm256_srli_epi64(__A, (int)__B),
4959 (__v4di)__W);
4960}
4961
4962static __inline__ __m256i __DEFAULT_FN_ATTRS256
4963_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
4964{
4965 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4966 (__v4di)_mm256_srli_epi64(__A, (int)__B),
4967 (__v4di)_mm256_setzero_si256());
4968}
4969
4970static __inline__ __m128i __DEFAULT_FN_ATTRS128
4971_mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4972{
4973 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4974 (__v4si)_mm_srav_epi32(__X, __Y),
4975 (__v4si)__W);
4976}
4977
4978static __inline__ __m128i __DEFAULT_FN_ATTRS128
4979_mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4980{
4981 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4982 (__v4si)_mm_srav_epi32(__X, __Y),
4983 (__v4si)_mm_setzero_si128());
4984}
4985
4986static __inline__ __m256i __DEFAULT_FN_ATTRS256
4987_mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4988{
4989 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4990 (__v8si)_mm256_srav_epi32(__X, __Y),
4991 (__v8si)__W);
4992}
4993
4994static __inline__ __m256i __DEFAULT_FN_ATTRS256
4995_mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4996{
4997 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4998 (__v8si)_mm256_srav_epi32(__X, __Y),
4999 (__v8si)_mm256_setzero_si256());
5000}
5001
5002static __inline__ __m128i __DEFAULT_FN_ATTRS128
5003_mm_srav_epi64(__m128i __X, __m128i __Y)
5004{
5005 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
5006}
5007
5008static __inline__ __m128i __DEFAULT_FN_ATTRS128
5009_mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5010{
5011 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5012 (__v2di)_mm_srav_epi64(__X, __Y),
5013 (__v2di)__W);
5014}
5015
5016static __inline__ __m128i __DEFAULT_FN_ATTRS128
5017_mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
5018{
5019 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5020 (__v2di)_mm_srav_epi64(__X, __Y),
5021 (__v2di)_mm_setzero_si128());
5022}
5023
5024static __inline__ __m256i __DEFAULT_FN_ATTRS256
5025_mm256_srav_epi64(__m256i __X, __m256i __Y)
5026{
5027 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
5028}
5029
5030static __inline__ __m256i __DEFAULT_FN_ATTRS256
5031_mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5032{
5033 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5034 (__v4