clang 22.0.0git
avx512vlintrin.h
Go to the documentation of this file.
1/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __AVX512VLINTRIN_H
15#define __AVX512VLINTRIN_H
16
17#define __DEFAULT_FN_ATTRS128 \
18 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), \
19 __min_vector_width__(128)))
20#define __DEFAULT_FN_ATTRS256 \
21 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), \
22 __min_vector_width__(256)))
23
24#if defined(__cplusplus) && (__cplusplus >= 201103L)
25#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
26#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
27#else
28#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
29#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
30#endif
31
32typedef short __v2hi __attribute__((__vector_size__(4)));
33typedef char __v4qi __attribute__((__vector_size__(4)));
34typedef char __v2qi __attribute__((__vector_size__(2)));
35
36/* Integer compare */
37
38#define _mm_cmpeq_epi32_mask(A, B) \
39 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
40#define _mm_mask_cmpeq_epi32_mask(k, A, B) \
41 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
42#define _mm_cmpge_epi32_mask(A, B) \
43 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
44#define _mm_mask_cmpge_epi32_mask(k, A, B) \
45 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
46#define _mm_cmpgt_epi32_mask(A, B) \
47 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
48#define _mm_mask_cmpgt_epi32_mask(k, A, B) \
49 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
50#define _mm_cmple_epi32_mask(A, B) \
51 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
52#define _mm_mask_cmple_epi32_mask(k, A, B) \
53 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
54#define _mm_cmplt_epi32_mask(A, B) \
55 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
56#define _mm_mask_cmplt_epi32_mask(k, A, B) \
57 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
58#define _mm_cmpneq_epi32_mask(A, B) \
59 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
60#define _mm_mask_cmpneq_epi32_mask(k, A, B) \
61 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
62
63#define _mm256_cmpeq_epi32_mask(A, B) \
64 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
65#define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
66 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
67#define _mm256_cmpge_epi32_mask(A, B) \
68 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
69#define _mm256_mask_cmpge_epi32_mask(k, A, B) \
70 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
71#define _mm256_cmpgt_epi32_mask(A, B) \
72 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
73#define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
74 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
75#define _mm256_cmple_epi32_mask(A, B) \
76 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
77#define _mm256_mask_cmple_epi32_mask(k, A, B) \
78 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
79#define _mm256_cmplt_epi32_mask(A, B) \
80 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
81#define _mm256_mask_cmplt_epi32_mask(k, A, B) \
82 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
83#define _mm256_cmpneq_epi32_mask(A, B) \
84 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
85#define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
86 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
87
88#define _mm_cmpeq_epu32_mask(A, B) \
89 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
90#define _mm_mask_cmpeq_epu32_mask(k, A, B) \
91 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
92#define _mm_cmpge_epu32_mask(A, B) \
93 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
94#define _mm_mask_cmpge_epu32_mask(k, A, B) \
95 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
96#define _mm_cmpgt_epu32_mask(A, B) \
97 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
98#define _mm_mask_cmpgt_epu32_mask(k, A, B) \
99 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
100#define _mm_cmple_epu32_mask(A, B) \
101 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
102#define _mm_mask_cmple_epu32_mask(k, A, B) \
103 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
104#define _mm_cmplt_epu32_mask(A, B) \
105 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
106#define _mm_mask_cmplt_epu32_mask(k, A, B) \
107 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
108#define _mm_cmpneq_epu32_mask(A, B) \
109 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
110#define _mm_mask_cmpneq_epu32_mask(k, A, B) \
111 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
112
113#define _mm256_cmpeq_epu32_mask(A, B) \
114 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
115#define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
116 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
117#define _mm256_cmpge_epu32_mask(A, B) \
118 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
119#define _mm256_mask_cmpge_epu32_mask(k, A, B) \
120 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
121#define _mm256_cmpgt_epu32_mask(A, B) \
122 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
123#define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
124 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
125#define _mm256_cmple_epu32_mask(A, B) \
126 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
127#define _mm256_mask_cmple_epu32_mask(k, A, B) \
128 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
129#define _mm256_cmplt_epu32_mask(A, B) \
130 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
131#define _mm256_mask_cmplt_epu32_mask(k, A, B) \
132 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
133#define _mm256_cmpneq_epu32_mask(A, B) \
134 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
135#define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
136 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
137
138#define _mm_cmpeq_epi64_mask(A, B) \
139 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
140#define _mm_mask_cmpeq_epi64_mask(k, A, B) \
141 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
142#define _mm_cmpge_epi64_mask(A, B) \
143 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
144#define _mm_mask_cmpge_epi64_mask(k, A, B) \
145 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
146#define _mm_cmpgt_epi64_mask(A, B) \
147 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
148#define _mm_mask_cmpgt_epi64_mask(k, A, B) \
149 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
150#define _mm_cmple_epi64_mask(A, B) \
151 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
152#define _mm_mask_cmple_epi64_mask(k, A, B) \
153 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
154#define _mm_cmplt_epi64_mask(A, B) \
155 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
156#define _mm_mask_cmplt_epi64_mask(k, A, B) \
157 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
158#define _mm_cmpneq_epi64_mask(A, B) \
159 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
160#define _mm_mask_cmpneq_epi64_mask(k, A, B) \
161 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
162
163#define _mm256_cmpeq_epi64_mask(A, B) \
164 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
165#define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
166 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
167#define _mm256_cmpge_epi64_mask(A, B) \
168 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
169#define _mm256_mask_cmpge_epi64_mask(k, A, B) \
170 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
171#define _mm256_cmpgt_epi64_mask(A, B) \
172 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
173#define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
174 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
175#define _mm256_cmple_epi64_mask(A, B) \
176 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
177#define _mm256_mask_cmple_epi64_mask(k, A, B) \
178 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
179#define _mm256_cmplt_epi64_mask(A, B) \
180 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
181#define _mm256_mask_cmplt_epi64_mask(k, A, B) \
182 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
183#define _mm256_cmpneq_epi64_mask(A, B) \
184 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
185#define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
186 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
187
188#define _mm_cmpeq_epu64_mask(A, B) \
189 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
190#define _mm_mask_cmpeq_epu64_mask(k, A, B) \
191 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
192#define _mm_cmpge_epu64_mask(A, B) \
193 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
194#define _mm_mask_cmpge_epu64_mask(k, A, B) \
195 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
196#define _mm_cmpgt_epu64_mask(A, B) \
197 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
198#define _mm_mask_cmpgt_epu64_mask(k, A, B) \
199 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
200#define _mm_cmple_epu64_mask(A, B) \
201 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
202#define _mm_mask_cmple_epu64_mask(k, A, B) \
203 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
204#define _mm_cmplt_epu64_mask(A, B) \
205 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
206#define _mm_mask_cmplt_epu64_mask(k, A, B) \
207 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
208#define _mm_cmpneq_epu64_mask(A, B) \
209 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
210#define _mm_mask_cmpneq_epu64_mask(k, A, B) \
211 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
212
213#define _mm256_cmpeq_epu64_mask(A, B) \
214 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
215#define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
216 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
217#define _mm256_cmpge_epu64_mask(A, B) \
218 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
219#define _mm256_mask_cmpge_epu64_mask(k, A, B) \
220 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
221#define _mm256_cmpgt_epu64_mask(A, B) \
222 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
223#define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
224 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
225#define _mm256_cmple_epu64_mask(A, B) \
226 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
227#define _mm256_mask_cmple_epu64_mask(k, A, B) \
228 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
229#define _mm256_cmplt_epu64_mask(A, B) \
230 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
231#define _mm256_mask_cmplt_epu64_mask(k, A, B) \
232 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
233#define _mm256_cmpneq_epu64_mask(A, B) \
234 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
235#define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
236 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
237
238static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
239_mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
240 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
241 (__v8si)_mm256_add_epi32(__A, __B),
242 (__v8si)__W);
243}
244
245static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
246_mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
247 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
248 (__v8si)_mm256_add_epi32(__A, __B),
249 (__v8si)_mm256_setzero_si256());
250}
251
252static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
253_mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
254 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
255 (__v4di)_mm256_add_epi64(__A, __B),
256 (__v4di)__W);
257}
258
259static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
260_mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
261 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
262 (__v4di)_mm256_add_epi64(__A, __B),
263 (__v4di)_mm256_setzero_si256());
264}
265
266static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
267_mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
268 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
269 (__v8si)_mm256_sub_epi32(__A, __B),
270 (__v8si)__W);
271}
272
273static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
274_mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
275 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
276 (__v8si)_mm256_sub_epi32(__A, __B),
277 (__v8si)_mm256_setzero_si256());
278}
279
280static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
281_mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
282 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
283 (__v4di)_mm256_sub_epi64(__A, __B),
284 (__v4di)__W);
285}
286
287static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
288_mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
289 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
290 (__v4di)_mm256_sub_epi64(__A, __B),
291 (__v4di)_mm256_setzero_si256());
292}
293
294static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
295_mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
296 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
297 (__v4si)_mm_add_epi32(__A, __B),
298 (__v4si)__W);
299}
300
301static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
302_mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
303 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
304 (__v4si)_mm_add_epi32(__A, __B),
305 (__v4si)_mm_setzero_si128());
306}
307
308static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
309_mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
310 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
311 (__v2di)_mm_add_epi64(__A, __B),
312 (__v2di)__W);
313}
314
315static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
316_mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
317 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
318 (__v2di)_mm_add_epi64(__A, __B),
319 (__v2di)_mm_setzero_si128());
320}
321
322static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
323_mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
324 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
325 (__v4si)_mm_sub_epi32(__A, __B),
326 (__v4si)__W);
327}
328
329static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
330_mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
331 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
332 (__v4si)_mm_sub_epi32(__A, __B),
333 (__v4si)_mm_setzero_si128());
334}
335
336static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
337_mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
338 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
339 (__v2di)_mm_sub_epi64(__A, __B),
340 (__v2di)__W);
341}
342
343static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
344_mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
345 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
346 (__v2di)_mm_sub_epi64(__A, __B),
347 (__v2di)_mm_setzero_si128());
348}
349
350static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
351_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
352 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
353 (__v4di)_mm256_mul_epi32(__X, __Y),
354 (__v4di)__W);
355}
356
357static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
358_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) {
359 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
360 (__v4di)_mm256_mul_epi32(__X, __Y),
361 (__v4di)_mm256_setzero_si256());
362}
363
364static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
365_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
366 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
367 (__v2di)_mm_mul_epi32(__X, __Y),
368 (__v2di)__W);
369}
370
371static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
372_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) {
373 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
374 (__v2di)_mm_mul_epi32(__X, __Y),
375 (__v2di)_mm_setzero_si128());
376}
377
378static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
379_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
380 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
381 (__v4di)_mm256_mul_epu32(__X, __Y),
382 (__v4di)__W);
383}
384
385static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
386_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) {
387 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
388 (__v4di)_mm256_mul_epu32(__X, __Y),
389 (__v4di)_mm256_setzero_si256());
390}
391
392static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
393_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
394 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
395 (__v2di)_mm_mul_epu32(__X, __Y),
396 (__v2di)__W);
397}
398
399static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
400_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) {
401 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
402 (__v2di)_mm_mul_epu32(__X, __Y),
403 (__v2di)_mm_setzero_si128());
404}
405
406static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
407_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
408 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
409 (__v8si)_mm256_mullo_epi32(__A, __B),
410 (__v8si)_mm256_setzero_si256());
411}
412
413static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
414_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
415 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
416 (__v8si)_mm256_mullo_epi32(__A, __B),
417 (__v8si)__W);
418}
419
420static __inline__ __m128i __DEFAULT_FN_ATTRS128
421_mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
422{
423 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
424 (__v4si)_mm_mullo_epi32(__A, __B),
425 (__v4si)_mm_setzero_si128());
426}
427
428static __inline__ __m128i __DEFAULT_FN_ATTRS128
429_mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
430{
431 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
432 (__v4si)_mm_mullo_epi32(__A, __B),
433 (__v4si)__W);
434}
435
436static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
437_mm256_and_epi32(__m256i __a, __m256i __b) {
438 return (__m256i)((__v8su)__a & (__v8su)__b);
439}
440
441static __inline__ __m256i __DEFAULT_FN_ATTRS256
442_mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
443{
444 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
445 (__v8si)_mm256_and_epi32(__A, __B),
446 (__v8si)__W);
447}
448
449static __inline__ __m256i __DEFAULT_FN_ATTRS256
450_mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
451{
452 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
453}
454
455static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
456_mm_and_epi32(__m128i __a, __m128i __b) {
457 return (__m128i)((__v4su)__a & (__v4su)__b);
458}
459
460static __inline__ __m128i __DEFAULT_FN_ATTRS128
461_mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
462{
463 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
464 (__v4si)_mm_and_epi32(__A, __B),
465 (__v4si)__W);
466}
467
468static __inline__ __m128i __DEFAULT_FN_ATTRS128
469_mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
470{
471 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
472}
473
474static __inline__ __m256i __DEFAULT_FN_ATTRS256
475_mm256_andnot_epi32(__m256i __A, __m256i __B)
476{
477 return (__m256i)(~(__v8su)__A & (__v8su)__B);
478}
479
480static __inline__ __m256i __DEFAULT_FN_ATTRS256
481_mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
482{
483 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
484 (__v8si)_mm256_andnot_epi32(__A, __B),
485 (__v8si)__W);
486}
487
488static __inline__ __m256i __DEFAULT_FN_ATTRS256
489_mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
490{
492 __U, __A, __B);
493}
494
495static __inline__ __m128i __DEFAULT_FN_ATTRS128
496_mm_andnot_epi32(__m128i __A, __m128i __B)
497{
498 return (__m128i)(~(__v4su)__A & (__v4su)__B);
499}
500
501static __inline__ __m128i __DEFAULT_FN_ATTRS128
502_mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
503{
504 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
505 (__v4si)_mm_andnot_epi32(__A, __B),
506 (__v4si)__W);
507}
508
509static __inline__ __m128i __DEFAULT_FN_ATTRS128
510_mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
511{
512 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
513}
514
515static __inline__ __m256i __DEFAULT_FN_ATTRS256
516_mm256_or_epi32(__m256i __a, __m256i __b)
517{
518 return (__m256i)((__v8su)__a | (__v8su)__b);
519}
520
521static __inline__ __m256i __DEFAULT_FN_ATTRS256
522_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
523{
524 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
525 (__v8si)_mm256_or_epi32(__A, __B),
526 (__v8si)__W);
527}
528
529static __inline__ __m256i __DEFAULT_FN_ATTRS256
530_mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
531{
532 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
533}
534
535static __inline__ __m128i __DEFAULT_FN_ATTRS128
536_mm_or_epi32(__m128i __a, __m128i __b)
537{
538 return (__m128i)((__v4su)__a | (__v4su)__b);
539}
540
541static __inline__ __m128i __DEFAULT_FN_ATTRS128
542_mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
543{
544 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
545 (__v4si)_mm_or_epi32(__A, __B),
546 (__v4si)__W);
547}
548
549static __inline__ __m128i __DEFAULT_FN_ATTRS128
550_mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
551{
552 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
553}
554
555static __inline__ __m256i __DEFAULT_FN_ATTRS256
556_mm256_xor_epi32(__m256i __a, __m256i __b)
557{
558 return (__m256i)((__v8su)__a ^ (__v8su)__b);
559}
560
561static __inline__ __m256i __DEFAULT_FN_ATTRS256
562_mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
563{
564 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
565 (__v8si)_mm256_xor_epi32(__A, __B),
566 (__v8si)__W);
567}
568
569static __inline__ __m256i __DEFAULT_FN_ATTRS256
570_mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
571{
572 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
573}
574
575static __inline__ __m128i __DEFAULT_FN_ATTRS128
576_mm_xor_epi32(__m128i __a, __m128i __b)
577{
578 return (__m128i)((__v4su)__a ^ (__v4su)__b);
579}
580
581static __inline__ __m128i __DEFAULT_FN_ATTRS128
582_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
583{
584 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
585 (__v4si)_mm_xor_epi32(__A, __B),
586 (__v4si)__W);
587}
588
589static __inline__ __m128i __DEFAULT_FN_ATTRS128
590_mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
591{
592 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
593}
594
595static __inline__ __m256i __DEFAULT_FN_ATTRS256
596_mm256_and_epi64(__m256i __a, __m256i __b)
597{
598 return (__m256i)((__v4du)__a & (__v4du)__b);
599}
600
601static __inline__ __m256i __DEFAULT_FN_ATTRS256
602_mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
603{
604 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
605 (__v4di)_mm256_and_epi64(__A, __B),
606 (__v4di)__W);
607}
608
609static __inline__ __m256i __DEFAULT_FN_ATTRS256
610_mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
611{
612 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
613}
614
615static __inline__ __m128i __DEFAULT_FN_ATTRS128
616_mm_and_epi64(__m128i __a, __m128i __b)
617{
618 return (__m128i)((__v2du)__a & (__v2du)__b);
619}
620
621static __inline__ __m128i __DEFAULT_FN_ATTRS128
622_mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
623{
624 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
625 (__v2di)_mm_and_epi64(__A, __B),
626 (__v2di)__W);
627}
628
629static __inline__ __m128i __DEFAULT_FN_ATTRS128
630_mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
631{
632 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
633}
634
635static __inline__ __m256i __DEFAULT_FN_ATTRS256
636_mm256_andnot_epi64(__m256i __A, __m256i __B)
637{
638 return (__m256i)(~(__v4du)__A & (__v4du)__B);
639}
640
641static __inline__ __m256i __DEFAULT_FN_ATTRS256
642_mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
643{
644 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
645 (__v4di)_mm256_andnot_epi64(__A, __B),
646 (__v4di)__W);
647}
648
649static __inline__ __m256i __DEFAULT_FN_ATTRS256
650_mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
651{
653 __U, __A, __B);
654}
655
656static __inline__ __m128i __DEFAULT_FN_ATTRS128
657_mm_andnot_epi64(__m128i __A, __m128i __B)
658{
659 return (__m128i)(~(__v2du)__A & (__v2du)__B);
660}
661
662static __inline__ __m128i __DEFAULT_FN_ATTRS128
663_mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
664{
665 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
666 (__v2di)_mm_andnot_epi64(__A, __B),
667 (__v2di)__W);
668}
669
670static __inline__ __m128i __DEFAULT_FN_ATTRS128
671_mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
672{
673 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
674}
675
676static __inline__ __m256i __DEFAULT_FN_ATTRS256
677_mm256_or_epi64(__m256i __a, __m256i __b)
678{
679 return (__m256i)((__v4du)__a | (__v4du)__b);
680}
681
682static __inline__ __m256i __DEFAULT_FN_ATTRS256
683_mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
684{
685 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
686 (__v4di)_mm256_or_epi64(__A, __B),
687 (__v4di)__W);
688}
689
690static __inline__ __m256i __DEFAULT_FN_ATTRS256
691_mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
692{
693 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
694}
695
696static __inline__ __m128i __DEFAULT_FN_ATTRS128
697_mm_or_epi64(__m128i __a, __m128i __b)
698{
699 return (__m128i)((__v2du)__a | (__v2du)__b);
700}
701
702static __inline__ __m128i __DEFAULT_FN_ATTRS128
703_mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
704{
705 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
706 (__v2di)_mm_or_epi64(__A, __B),
707 (__v2di)__W);
708}
709
710static __inline__ __m128i __DEFAULT_FN_ATTRS128
711_mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
712{
713 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
714}
715
716static __inline__ __m256i __DEFAULT_FN_ATTRS256
717_mm256_xor_epi64(__m256i __a, __m256i __b)
718{
719 return (__m256i)((__v4du)__a ^ (__v4du)__b);
720}
721
722static __inline__ __m256i __DEFAULT_FN_ATTRS256
723_mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
724{
725 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
726 (__v4di)_mm256_xor_epi64(__A, __B),
727 (__v4di)__W);
728}
729
730static __inline__ __m256i __DEFAULT_FN_ATTRS256
731_mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
732{
733 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
734}
735
736static __inline__ __m128i __DEFAULT_FN_ATTRS128
737_mm_xor_epi64(__m128i __a, __m128i __b)
738{
739 return (__m128i)((__v2du)__a ^ (__v2du)__b);
740}
741
742static __inline__ __m128i __DEFAULT_FN_ATTRS128
743_mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
744 __m128i __B)
745{
746 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
747 (__v2di)_mm_xor_epi64(__A, __B),
748 (__v2di)__W);
749}
750
751static __inline__ __m128i __DEFAULT_FN_ATTRS128
752_mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
753{
754 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
755}
756
757#define _mm_cmp_epi32_mask(a, b, p) \
758 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
759 (__v4si)(__m128i)(b), (int)(p), \
760 (__mmask8)-1))
761
762#define _mm_mask_cmp_epi32_mask(m, a, b, p) \
763 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
764 (__v4si)(__m128i)(b), (int)(p), \
765 (__mmask8)(m)))
766
767#define _mm_cmp_epu32_mask(a, b, p) \
768 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
769 (__v4si)(__m128i)(b), (int)(p), \
770 (__mmask8)-1))
771
772#define _mm_mask_cmp_epu32_mask(m, a, b, p) \
773 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
774 (__v4si)(__m128i)(b), (int)(p), \
775 (__mmask8)(m)))
776
777#define _mm256_cmp_epi32_mask(a, b, p) \
778 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
779 (__v8si)(__m256i)(b), (int)(p), \
780 (__mmask8)-1))
781
782#define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
783 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
784 (__v8si)(__m256i)(b), (int)(p), \
785 (__mmask8)(m)))
786
787#define _mm256_cmp_epu32_mask(a, b, p) \
788 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
789 (__v8si)(__m256i)(b), (int)(p), \
790 (__mmask8)-1))
791
792#define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
793 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
794 (__v8si)(__m256i)(b), (int)(p), \
795 (__mmask8)(m)))
796
797#define _mm_cmp_epi64_mask(a, b, p) \
798 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
799 (__v2di)(__m128i)(b), (int)(p), \
800 (__mmask8)-1))
801
802#define _mm_mask_cmp_epi64_mask(m, a, b, p) \
803 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
804 (__v2di)(__m128i)(b), (int)(p), \
805 (__mmask8)(m)))
806
807#define _mm_cmp_epu64_mask(a, b, p) \
808 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
809 (__v2di)(__m128i)(b), (int)(p), \
810 (__mmask8)-1))
811
812#define _mm_mask_cmp_epu64_mask(m, a, b, p) \
813 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
814 (__v2di)(__m128i)(b), (int)(p), \
815 (__mmask8)(m)))
816
817#define _mm256_cmp_epi64_mask(a, b, p) \
818 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
819 (__v4di)(__m256i)(b), (int)(p), \
820 (__mmask8)-1))
821
822#define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
823 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
824 (__v4di)(__m256i)(b), (int)(p), \
825 (__mmask8)(m)))
826
827#define _mm256_cmp_epu64_mask(a, b, p) \
828 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
829 (__v4di)(__m256i)(b), (int)(p), \
830 (__mmask8)-1))
831
832#define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
833 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
834 (__v4di)(__m256i)(b), (int)(p), \
835 (__mmask8)(m)))
836
837#define _mm256_cmp_ps_mask(a, b, p) \
838 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
839 (__v8sf)(__m256)(b), (int)(p), \
840 (__mmask8)-1))
841
842#define _mm256_mask_cmp_ps_mask(m, a, b, p) \
843 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
844 (__v8sf)(__m256)(b), (int)(p), \
845 (__mmask8)(m)))
846
847#define _mm256_cmp_pd_mask(a, b, p) \
848 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
849 (__v4df)(__m256d)(b), (int)(p), \
850 (__mmask8)-1))
851
852#define _mm256_mask_cmp_pd_mask(m, a, b, p) \
853 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
854 (__v4df)(__m256d)(b), (int)(p), \
855 (__mmask8)(m)))
856
857#define _mm_cmp_ps_mask(a, b, p) \
858 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
859 (__v4sf)(__m128)(b), (int)(p), \
860 (__mmask8)-1))
861
862#define _mm_mask_cmp_ps_mask(m, a, b, p) \
863 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
864 (__v4sf)(__m128)(b), (int)(p), \
865 (__mmask8)(m)))
866
867#define _mm_cmp_pd_mask(a, b, p) \
868 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
869 (__v2df)(__m128d)(b), (int)(p), \
870 (__mmask8)-1))
871
872#define _mm_mask_cmp_pd_mask(m, a, b, p) \
873 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
874 (__v2df)(__m128d)(b), (int)(p), \
875 (__mmask8)(m)))
876
877static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
878_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
879 return (__m128d)__builtin_ia32_selectpd_128(
880 (__mmask8)__U, (__v2df)_mm_fmadd_pd(__A, __B, __C), (__v2df)__A);
881}
882
883static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
884_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
885 return (__m128d)__builtin_ia32_selectpd_128(
886 (__mmask8)__U, (__v2df)_mm_fmadd_pd(__A, __B, __C), (__v2df)__C);
887}
888
889static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
890_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
891 return (__m128d)__builtin_ia32_selectpd_128(
892 (__mmask8)__U, (__v2df)_mm_fmadd_pd(__A, __B, __C),
893 (__v2df)_mm_setzero_pd());
894}
895
896static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
897_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
898 return (__m128d)__builtin_ia32_selectpd_128(
899 (__mmask8)__U, (__v2df)_mm_fmsub_pd(__A, __B, __C), (__v2df)__A);
900}
901
902static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
903_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
904 return (__m128d)__builtin_ia32_selectpd_128(
905 (__mmask8)__U, (__v2df)_mm_fmsub_pd(__A, __B, __C), (__v2df)__C);
906}
907
908static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
909_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
910 return (__m128d)__builtin_ia32_selectpd_128(
911 (__mmask8)__U, (__v2df)_mm_fmsub_pd(__A, __B, __C),
912 (__v2df)_mm_setzero_pd());
913}
914
915static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
916_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
917 return (__m128d)__builtin_ia32_selectpd_128(
918 (__mmask8)__U, (__v2df)_mm_fnmadd_pd(__A, __B, __C), (__v2df)__A);
919}
920
921static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
922_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
923 return (__m128d)__builtin_ia32_selectpd_128(
924 (__mmask8)__U, (__v2df)_mm_fnmadd_pd(__A, __B, __C), (__v2df)__C);
925}
926
927static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
928_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
929 return (__m128d)__builtin_ia32_selectpd_128(
930 (__mmask8)__U, (__v2df)_mm_fnmadd_pd(__A, __B, __C),
931 (__v2df)_mm_setzero_pd());
932}
933
934static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
935_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
936 return (__m128d)__builtin_ia32_selectpd_128(
937 (__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C), (__v2df)__A);
938}
939
940static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
941_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
942 return (__m128d)__builtin_ia32_selectpd_128(
943 (__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C), (__v2df)__C);
944}
945
946static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
947_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
948 return (__m128d)__builtin_ia32_selectpd_128(
949 (__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C),
950 (__v2df)_mm_setzero_pd());
951}
952
953static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
954_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
955 return (__m256d)__builtin_ia32_selectpd_256(
956 (__mmask8)__U, (__v4df)_mm256_fmadd_pd(__A, __B, __C), (__v4df)__A);
957}
958
959static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
960_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
961 return (__m256d)__builtin_ia32_selectpd_256(
962 (__mmask8)__U, (__v4df)_mm256_fmadd_pd(__A, __B, __C), (__v4df)__C);
963}
964
965static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
966_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
967 return (__m256d)__builtin_ia32_selectpd_256(
968 (__mmask8)__U, (__v4df)_mm256_fmadd_pd(__A, __B, __C),
969 (__v4df)_mm256_setzero_pd());
970}
971
972static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
973_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
974 return (__m256d)__builtin_ia32_selectpd_256(
975 (__mmask8)__U, (__v4df)_mm256_fmsub_pd(__A, __B, __C), (__v4df)__A);
976}
977
978static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
979_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
980 return (__m256d)__builtin_ia32_selectpd_256(
981 (__mmask8)__U, (__v4df)_mm256_fmsub_pd(__A, __B, __C), (__v4df)__C);
982}
983
984static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
985_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
986 return (__m256d)__builtin_ia32_selectpd_256(
987 (__mmask8)__U, (__v4df)_mm256_fmsub_pd(__A, __B, __C),
988 (__v4df)_mm256_setzero_pd());
989}
990
991static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
992_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
993 return (__m256d)__builtin_ia32_selectpd_256(
994 (__mmask8)__U, (__v4df)_mm256_fnmadd_pd(__A, __B, __C), (__v4df)__A);
995}
996
997static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
998_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
999 return (__m256d)__builtin_ia32_selectpd_256(
1000 (__mmask8)__U, (__v4df)_mm256_fnmadd_pd(__A, __B, __C), (__v4df)__C);
1001}
1002
1003static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1004_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
1005 return (__m256d)__builtin_ia32_selectpd_256(
1006 (__mmask8)__U, (__v4df)_mm256_fnmadd_pd(__A, __B, __C),
1007 (__v4df)_mm256_setzero_pd());
1008}
1009
1010static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1011_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
1012 return (__m256d)__builtin_ia32_selectpd_256(
1013 (__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C), (__v4df)__A);
1014}
1015
1016static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1017_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
1018 return (__m256d)__builtin_ia32_selectpd_256(
1019 (__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C), (__v4df)__C);
1020}
1021
1022static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1023_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
1024 return (__m256d)__builtin_ia32_selectpd_256(
1025 (__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C),
1026 (__v4df)_mm256_setzero_pd());
1027}
1028
1029static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1030_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
1031 return (__m128)__builtin_ia32_selectps_128(
1032 (__mmask8)__U, (__v4sf)_mm_fmadd_ps(__A, __B, __C), (__v4sf)__A);
1033}
1034
1035static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1036_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
1037 return (__m128)__builtin_ia32_selectps_128(
1038 (__mmask8)__U, (__v4sf)_mm_fmadd_ps(__A, __B, __C), (__v4sf)__C);
1039}
1040
1041static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1042_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
1043 return (__m128)__builtin_ia32_selectps_128(
1044 (__mmask8)__U, (__v4sf)_mm_fmadd_ps(__A, __B, __C),
1045 (__v4sf)_mm_setzero_ps());
1046}
1047
1048static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1049_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
1050 return (__m128)__builtin_ia32_selectps_128(
1051 (__mmask8)__U, (__v4sf)_mm_fmsub_ps(__A, __B, __C), (__v4sf)__A);
1052}
1053
1054static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1055_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
1056 return (__m128)__builtin_ia32_selectps_128(
1057 (__mmask8)__U, (__v4sf)_mm_fmsub_ps(__A, __B, __C), (__v4sf)__C);
1058}
1059
1060static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1061_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
1062 return (__m128)__builtin_ia32_selectps_128(
1063 (__mmask8)__U, (__v4sf)_mm_fmsub_ps(__A, __B, __C),
1064 (__v4sf)_mm_setzero_ps());
1065}
1066
1067static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1068_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
1069 return (__m128)__builtin_ia32_selectps_128(
1070 (__mmask8)__U, (__v4sf)_mm_fnmadd_ps(__A, __B, __C), (__v4sf)__A);
1071}
1072
1073static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1074_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
1075 return (__m128)__builtin_ia32_selectps_128(
1076 (__mmask8)__U, (__v4sf)_mm_fnmadd_ps(__A, __B, __C), (__v4sf)__C);
1077}
1078
1079static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1080_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
1081 return (__m128)__builtin_ia32_selectps_128(
1082 (__mmask8)__U, (__v4sf)_mm_fnmadd_ps(__A, __B, __C),
1083 (__v4sf)_mm_setzero_ps());
1084}
1085
1086static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1087_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
1088 return (__m128)__builtin_ia32_selectps_128(
1089 (__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C), (__v4sf)__A);
1090}
1091
1092static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1093_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
1094 return (__m128)__builtin_ia32_selectps_128(
1095 (__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C), (__v4sf)__C);
1096}
1097
1098static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1099_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
1100 return (__m128)__builtin_ia32_selectps_128(
1101 (__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C),
1102 (__v4sf)_mm_setzero_ps());
1103}
1104
1105static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1106_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
1107 return (__m256)__builtin_ia32_selectps_256(
1108 (__mmask8)__U, (__v8sf)_mm256_fmadd_ps(__A, __B, __C), (__v8sf)__A);
1109}
1110
1111static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1112_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
1113 return (__m256)__builtin_ia32_selectps_256(
1114 (__mmask8)__U, (__v8sf)_mm256_fmadd_ps(__A, __B, __C), (__v8sf)__C);
1115}
1116
1117static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1118_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
1119 return (__m256)__builtin_ia32_selectps_256(
1120 (__mmask8)__U, (__v8sf)_mm256_fmadd_ps(__A, __B, __C),
1121 (__v8sf)_mm256_setzero_ps());
1122}
1123
1124static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1125_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
1126 return (__m256)__builtin_ia32_selectps_256(
1127 (__mmask8)__U, (__v8sf)_mm256_fmsub_ps(__A, __B, __C), (__v8sf)__A);
1128}
1129
1130static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1131_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
1132 return (__m256)__builtin_ia32_selectps_256(
1133 (__mmask8)__U, (__v8sf)_mm256_fmsub_ps(__A, __B, __C), (__v8sf)__C);
1134}
1135
1136static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1137_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
1138 return (__m256)__builtin_ia32_selectps_256(
1139 (__mmask8)__U, (__v8sf)_mm256_fmsub_ps(__A, __B, __C),
1140 (__v8sf)_mm256_setzero_ps());
1141}
1142
1143static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1144_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
1145 return (__m256)__builtin_ia32_selectps_256(
1146 (__mmask8)__U, (__v8sf)_mm256_fnmadd_ps(__A, __B, __C), (__v8sf)__A);
1147}
1148
1149static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1150_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
1151 return (__m256)__builtin_ia32_selectps_256(
1152 (__mmask8)__U, (__v8sf)_mm256_fnmadd_ps(__A, __B, __C), (__v8sf)__C);
1153}
1154
1155static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1156_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
1157 return (__m256)__builtin_ia32_selectps_256(
1158 (__mmask8)__U, (__v8sf)_mm256_fnmadd_ps(__A, __B, __C),
1159 (__v8sf)_mm256_setzero_ps());
1160}
1161
1162static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1163_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
1164 return (__m256)__builtin_ia32_selectps_256(
1165 (__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C), (__v8sf)__A);
1166}
1167
1168static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1169_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
1170 return (__m256)__builtin_ia32_selectps_256(
1171 (__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C), (__v8sf)__C);
1172}
1173
1174static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1175_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
1176 return (__m256)__builtin_ia32_selectps_256(
1177 (__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C),
1178 (__v8sf)_mm256_setzero_ps());
1179}
1180
1181static __inline__ __m128d __DEFAULT_FN_ATTRS128
1182_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
1183 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1184 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1185 (__v2df) __B,
1186 (__v2df) __C),
1187 (__v2df) __A);
1188}
1189
1190static __inline__ __m128d __DEFAULT_FN_ATTRS128
1191_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1192{
1193 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1194 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1195 (__v2df) __B,
1196 (__v2df) __C),
1197 (__v2df) __C);
1198}
1199
1200static __inline__ __m128d __DEFAULT_FN_ATTRS128
1201_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1202{
1203 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1204 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1205 (__v2df) __B,
1206 (__v2df) __C),
1207 (__v2df)_mm_setzero_pd());
1208}
1209
1210static __inline__ __m128d __DEFAULT_FN_ATTRS128
1211_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1212{
1213 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1214 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1215 (__v2df) __B,
1216 -(__v2df) __C),
1217 (__v2df) __A);
1218}
1219
1220static __inline__ __m128d __DEFAULT_FN_ATTRS128
1221_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1222{
1223 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1224 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1225 (__v2df) __B,
1226 -(__v2df) __C),
1227 (__v2df)_mm_setzero_pd());
1228}
1229
1230static __inline__ __m256d __DEFAULT_FN_ATTRS256
1231_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1232{
1233 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1234 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1235 (__v4df) __B,
1236 (__v4df) __C),
1237 (__v4df) __A);
1238}
1239
1240static __inline__ __m256d __DEFAULT_FN_ATTRS256
1241_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1242{
1243 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1244 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1245 (__v4df) __B,
1246 (__v4df) __C),
1247 (__v4df) __C);
1248}
1249
1250static __inline__ __m256d __DEFAULT_FN_ATTRS256
1251_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1252{
1253 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1254 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1255 (__v4df) __B,
1256 (__v4df) __C),
1257 (__v4df)_mm256_setzero_pd());
1258}
1259
1260static __inline__ __m256d __DEFAULT_FN_ATTRS256
1261_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1262{
1263 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1264 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1265 (__v4df) __B,
1266 -(__v4df) __C),
1267 (__v4df) __A);
1268}
1269
1270static __inline__ __m256d __DEFAULT_FN_ATTRS256
1271_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1272{
1273 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1274 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1275 (__v4df) __B,
1276 -(__v4df) __C),
1277 (__v4df)_mm256_setzero_pd());
1278}
1279
1280static __inline__ __m128 __DEFAULT_FN_ATTRS128
1281_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1282{
1283 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1284 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1285 (__v4sf) __B,
1286 (__v4sf) __C),
1287 (__v4sf) __A);
1288}
1289
1290static __inline__ __m128 __DEFAULT_FN_ATTRS128
1291_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1292{
1293 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1294 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1295 (__v4sf) __B,
1296 (__v4sf) __C),
1297 (__v4sf) __C);
1298}
1299
1300static __inline__ __m128 __DEFAULT_FN_ATTRS128
1301_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1302{
1303 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1304 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1305 (__v4sf) __B,
1306 (__v4sf) __C),
1307 (__v4sf)_mm_setzero_ps());
1308}
1309
1310static __inline__ __m128 __DEFAULT_FN_ATTRS128
1311_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1312{
1313 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1314 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1315 (__v4sf) __B,
1316 -(__v4sf) __C),
1317 (__v4sf) __A);
1318}
1319
1320static __inline__ __m128 __DEFAULT_FN_ATTRS128
1321_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1322{
1323 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1324 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1325 (__v4sf) __B,
1326 -(__v4sf) __C),
1327 (__v4sf)_mm_setzero_ps());
1328}
1329
1330static __inline__ __m256 __DEFAULT_FN_ATTRS256
1331_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
1332 __m256 __C)
1333{
1334 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1335 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1336 (__v8sf) __B,
1337 (__v8sf) __C),
1338 (__v8sf) __A);
1339}
1340
1341static __inline__ __m256 __DEFAULT_FN_ATTRS256
1342_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1343{
1344 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1345 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1346 (__v8sf) __B,
1347 (__v8sf) __C),
1348 (__v8sf) __C);
1349}
1350
1351static __inline__ __m256 __DEFAULT_FN_ATTRS256
1352_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1353{
1354 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1355 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1356 (__v8sf) __B,
1357 (__v8sf) __C),
1358 (__v8sf)_mm256_setzero_ps());
1359}
1360
1361static __inline__ __m256 __DEFAULT_FN_ATTRS256
1362_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1363{
1364 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1365 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1366 (__v8sf) __B,
1367 -(__v8sf) __C),
1368 (__v8sf) __A);
1369}
1370
1371static __inline__ __m256 __DEFAULT_FN_ATTRS256
1372_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1373{
1374 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1375 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1376 (__v8sf) __B,
1377 -(__v8sf) __C),
1378 (__v8sf)_mm256_setzero_ps());
1379}
1380
1381static __inline__ __m128d __DEFAULT_FN_ATTRS128
1382_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1383{
1384 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1385 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1386 (__v2df) __B,
1387 -(__v2df) __C),
1388 (__v2df) __C);
1389}
1390
1391static __inline__ __m256d __DEFAULT_FN_ATTRS256
1392_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1393{
1394 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1395 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1396 (__v4df) __B,
1397 -(__v4df) __C),
1398 (__v4df) __C);
1399}
1400
1401static __inline__ __m128 __DEFAULT_FN_ATTRS128
1402_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1403{
1404 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1405 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1406 (__v4sf) __B,
1407 -(__v4sf) __C),
1408 (__v4sf) __C);
1409}
1410
1411static __inline__ __m256 __DEFAULT_FN_ATTRS256
1412_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1413{
1414 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1415 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1416 (__v8sf) __B,
1417 -(__v8sf) __C),
1418 (__v8sf) __C);
1419}
1420
1421static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1422_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1423 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1424 (__v2df)_mm_add_pd(__A, __B),
1425 (__v2df)__W);
1426}
1427
1428static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1429_mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
1430 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1431 (__v2df)_mm_add_pd(__A, __B),
1432 (__v2df)_mm_setzero_pd());
1433}
1434
1435static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1436_mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
1437 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1438 (__v4df)_mm256_add_pd(__A, __B),
1439 (__v4df)__W);
1440}
1441
1442static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1443_mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
1444 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1445 (__v4df)_mm256_add_pd(__A, __B),
1446 (__v4df)_mm256_setzero_pd());
1447}
1448
1449static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1450_mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1451 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1452 (__v4sf)_mm_add_ps(__A, __B),
1453 (__v4sf)__W);
1454}
1455
1456static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1457_mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
1458 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1459 (__v4sf)_mm_add_ps(__A, __B),
1460 (__v4sf)_mm_setzero_ps());
1461}
1462
1463static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1464_mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
1465 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1466 (__v8sf)_mm256_add_ps(__A, __B),
1467 (__v8sf)__W);
1468}
1469
1470static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1471_mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
1472 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1473 (__v8sf)_mm256_add_ps(__A, __B),
1474 (__v8sf)_mm256_setzero_ps());
1475}
1476
1477static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
1478_mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W) {
1479 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
1480 (__v4si) __W,
1481 (__v4si) __A);
1482}
1483
1484static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
1485_mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W) {
1486 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
1487 (__v8si) __W,
1488 (__v8si) __A);
1489}
1490
1491static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1492_mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W) {
1493 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
1494 (__v2df) __W,
1495 (__v2df) __A);
1496}
1497
1498static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1499_mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W) {
1500 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
1501 (__v4df) __W,
1502 (__v4df) __A);
1503}
1504
1505static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1506_mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W) {
1507 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
1508 (__v4sf) __W,
1509 (__v4sf) __A);
1510}
1511
1512static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1513_mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W) {
1514 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
1515 (__v8sf) __W,
1516 (__v8sf) __A);
1517}
1518
1519static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
1520_mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W) {
1521 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
1522 (__v2di) __W,
1523 (__v2di) __A);
1524}
1525
1526static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
1527_mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) {
1528 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
1529 (__v4di) __W,
1530 (__v4di) __A);
1531}
1532
1533static __inline__ __m128d __DEFAULT_FN_ATTRS128
1534_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
1535 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1536 (__v2df) __W,
1537 (__mmask8) __U);
1538}
1539
1540static __inline__ __m128d __DEFAULT_FN_ATTRS128
1542 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1543 (__v2df)
1544 _mm_setzero_pd (),
1545 (__mmask8) __U);
1546}
1547
1548static __inline__ __m256d __DEFAULT_FN_ATTRS256
1549_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
1550 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1551 (__v4df) __W,
1552 (__mmask8) __U);
1553}
1554
1555static __inline__ __m256d __DEFAULT_FN_ATTRS256
1557 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1558 (__v4df)
1560 (__mmask8) __U);
1561}
1562
1563static __inline__ __m128i __DEFAULT_FN_ATTRS128
1564_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
1565 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1566 (__v2di) __W,
1567 (__mmask8) __U);
1568}
1569
1570static __inline__ __m128i __DEFAULT_FN_ATTRS128
1572 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1573 (__v2di)
1575 (__mmask8) __U);
1576}
1577
1578static __inline__ __m256i __DEFAULT_FN_ATTRS256
1579_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
1580 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1581 (__v4di) __W,
1582 (__mmask8) __U);
1583}
1584
1585static __inline__ __m256i __DEFAULT_FN_ATTRS256
1587 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1588 (__v4di)
1590 (__mmask8) __U);
1591}
1592
1593static __inline__ __m128 __DEFAULT_FN_ATTRS128
1594_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
1595 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1596 (__v4sf) __W,
1597 (__mmask8) __U);
1598}
1599
1600static __inline__ __m128 __DEFAULT_FN_ATTRS128
1602 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1603 (__v4sf)
1604 _mm_setzero_ps (),
1605 (__mmask8) __U);
1606}
1607
1608static __inline__ __m256 __DEFAULT_FN_ATTRS256
1609_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
1610 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1611 (__v8sf) __W,
1612 (__mmask8) __U);
1613}
1614
1615static __inline__ __m256 __DEFAULT_FN_ATTRS256
1617 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1618 (__v8sf)
1620 (__mmask8) __U);
1621}
1622
1623static __inline__ __m128i __DEFAULT_FN_ATTRS128
1624_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
1625 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1626 (__v4si) __W,
1627 (__mmask8) __U);
1628}
1629
1630static __inline__ __m128i __DEFAULT_FN_ATTRS128
1632 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1633 (__v4si)
1635 (__mmask8) __U);
1636}
1637
1638static __inline__ __m256i __DEFAULT_FN_ATTRS256
1639_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
1640 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1641 (__v8si) __W,
1642 (__mmask8) __U);
1643}
1644
1645static __inline__ __m256i __DEFAULT_FN_ATTRS256
1647 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1648 (__v8si)
1650 (__mmask8) __U);
1651}
1652
1653static __inline__ void __DEFAULT_FN_ATTRS128
1654_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
1655 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
1656 (__v2df) __A,
1657 (__mmask8) __U);
1658}
1659
1660static __inline__ void __DEFAULT_FN_ATTRS256
1661_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
1662 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
1663 (__v4df) __A,
1664 (__mmask8) __U);
1665}
1666
1667static __inline__ void __DEFAULT_FN_ATTRS128
1668_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
1669 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
1670 (__v2di) __A,
1671 (__mmask8) __U);
1672}
1673
1674static __inline__ void __DEFAULT_FN_ATTRS256
1676 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
1677 (__v4di) __A,
1678 (__mmask8) __U);
1679}
1680
1681static __inline__ void __DEFAULT_FN_ATTRS128
1682_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
1683 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
1684 (__v4sf) __A,
1685 (__mmask8) __U);
1686}
1687
1688static __inline__ void __DEFAULT_FN_ATTRS256
1690 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
1691 (__v8sf) __A,
1692 (__mmask8) __U);
1693}
1694
1695static __inline__ void __DEFAULT_FN_ATTRS128
1696_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
1697 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
1698 (__v4si) __A,
1699 (__mmask8) __U);
1700}
1701
1702static __inline__ void __DEFAULT_FN_ATTRS256
1704 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
1705 (__v8si) __A,
1706 (__mmask8) __U);
1707}
1708
1709static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1710_mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A) {
1711 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1712 (__v2df)_mm_cvtepi32_pd(__A),
1713 (__v2df)__W);
1714}
1715
1716static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1718 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1719 (__v2df)_mm_cvtepi32_pd(__A),
1720 (__v2df)_mm_setzero_pd());
1721}
1722
1723static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1724_mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A) {
1725 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1726 (__v4df)_mm256_cvtepi32_pd(__A),
1727 (__v4df)__W);
1728}
1729
1730static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1732 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1733 (__v4df)_mm256_cvtepi32_pd(__A),
1734 (__v4df)_mm256_setzero_pd());
1735}
1736
1737static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1738_mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A) {
1739 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1740 (__v4sf)_mm_cvtepi32_ps(__A),
1741 (__v4sf)__W);
1742}
1743
1744static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1746 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1747 (__v4sf)_mm_cvtepi32_ps(__A),
1748 (__v4sf)_mm_setzero_ps());
1749}
1750
1751static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1752_mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A) {
1753 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1754 (__v8sf)_mm256_cvtepi32_ps(__A),
1755 (__v8sf)__W);
1756}
1757
1758static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1760 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1761 (__v8sf)_mm256_cvtepi32_ps(__A),
1762 (__v8sf)_mm256_setzero_ps());
1763}
1764
1765static __inline__ __m128i __DEFAULT_FN_ATTRS128
1766_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
1767 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1768 (__v4si) __W,
1769 (__mmask8) __U);
1770}
1771
1772static __inline__ __m128i __DEFAULT_FN_ATTRS128
1774 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1775 (__v4si)
1777 (__mmask8) __U);
1778}
1779
1780static __inline__ __m128i __DEFAULT_FN_ATTRS256
1781_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
1782 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1783 (__v4si)_mm256_cvtpd_epi32(__A),
1784 (__v4si)__W);
1785}
1786
1787static __inline__ __m128i __DEFAULT_FN_ATTRS256
1789 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1790 (__v4si)_mm256_cvtpd_epi32(__A),
1791 (__v4si)_mm_setzero_si128());
1792}
1793
1794static __inline__ __m128 __DEFAULT_FN_ATTRS128
1795_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
1796 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1797 (__v4sf) __W,
1798 (__mmask8) __U);
1799}
1800
1801static __inline__ __m128 __DEFAULT_FN_ATTRS128
1802_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
1803 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1804 (__v4sf)
1805 _mm_setzero_ps (),
1806 (__mmask8) __U);
1807}
1808
1809static __inline__ __m128 __DEFAULT_FN_ATTRS256
1810_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
1811 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1812 (__v4sf)_mm256_cvtpd_ps(__A),
1813 (__v4sf)__W);
1814}
1815
1816static __inline__ __m128 __DEFAULT_FN_ATTRS256
1818 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1819 (__v4sf)_mm256_cvtpd_ps(__A),
1820 (__v4sf)_mm_setzero_ps());
1821}
1822
1823static __inline__ __m128i __DEFAULT_FN_ATTRS128
1824_mm_cvtpd_epu32 (__m128d __A) {
1825 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1826 (__v4si)
1828 (__mmask8) -1);
1829}
1830
1831static __inline__ __m128i __DEFAULT_FN_ATTRS128
1832_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
1833 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1834 (__v4si) __W,
1835 (__mmask8) __U);
1836}
1837
1838static __inline__ __m128i __DEFAULT_FN_ATTRS128
1840 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1841 (__v4si)
1843 (__mmask8) __U);
1844}
1845
1846static __inline__ __m128i __DEFAULT_FN_ATTRS256
1847_mm256_cvtpd_epu32 (__m256d __A) {
1848 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1849 (__v4si)
1851 (__mmask8) -1);
1852}
1853
1854static __inline__ __m128i __DEFAULT_FN_ATTRS256
1855_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
1856 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1857 (__v4si) __W,
1858 (__mmask8) __U);
1859}
1860
1861static __inline__ __m128i __DEFAULT_FN_ATTRS256
1863 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1864 (__v4si)
1866 (__mmask8) __U);
1867}
1868
1869static __inline__ __m128i __DEFAULT_FN_ATTRS128
1870_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
1871 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1872 (__v4si)_mm_cvtps_epi32(__A),
1873 (__v4si)__W);
1874}
1875
1876static __inline__ __m128i __DEFAULT_FN_ATTRS128
1878 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1879 (__v4si)_mm_cvtps_epi32(__A),
1880 (__v4si)_mm_setzero_si128());
1881}
1882
1883static __inline__ __m256i __DEFAULT_FN_ATTRS256
1884_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
1885 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1886 (__v8si)_mm256_cvtps_epi32(__A),
1887 (__v8si)__W);
1888}
1889
1890static __inline__ __m256i __DEFAULT_FN_ATTRS256
1892 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1893 (__v8si)_mm256_cvtps_epi32(__A),
1894 (__v8si)_mm256_setzero_si256());
1895}
1896
1897static __inline__ __m128d __DEFAULT_FN_ATTRS128
1898_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
1899 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1900 (__v2df)_mm_cvtps_pd(__A),
1901 (__v2df)__W);
1902}
1903
1904static __inline__ __m128d __DEFAULT_FN_ATTRS128
1905_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
1906 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1907 (__v2df)_mm_cvtps_pd(__A),
1908 (__v2df)_mm_setzero_pd());
1909}
1910
1911static __inline__ __m256d __DEFAULT_FN_ATTRS256
1912_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
1913 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1914 (__v4df)_mm256_cvtps_pd(__A),
1915 (__v4df)__W);
1916}
1917
1918static __inline__ __m256d __DEFAULT_FN_ATTRS256
1920 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1921 (__v4df)_mm256_cvtps_pd(__A),
1922 (__v4df)_mm256_setzero_pd());
1923}
1924
1925static __inline__ __m128i __DEFAULT_FN_ATTRS128
1926_mm_cvtps_epu32 (__m128 __A) {
1927 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
1928 (__v4si)
1930 (__mmask8) -1);
1931}
1932
1933static __inline__ __m128i __DEFAULT_FN_ATTRS128
1934_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
1935 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
1936 (__v4si) __W,
1937 (__mmask8) __U);
1938}
1939
1940static __inline__ __m128i __DEFAULT_FN_ATTRS128
1942 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
1943 (__v4si)
1945 (__mmask8) __U);
1946}
1947
1948static __inline__ __m256i __DEFAULT_FN_ATTRS256
1950 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
1951 (__v8si)
1953 (__mmask8) -1);
1954}
1955
1956static __inline__ __m256i __DEFAULT_FN_ATTRS256
1957_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
1958 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
1959 (__v8si) __W,
1960 (__mmask8) __U);
1961}
1962
1963static __inline__ __m256i __DEFAULT_FN_ATTRS256
1965 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
1966 (__v8si)
1968 (__mmask8) __U);
1969}
1970
1971static __inline__ __m128i __DEFAULT_FN_ATTRS128
1972_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
1973 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1974 (__v4si) __W,
1975 (__mmask8) __U);
1976}
1977
1978static __inline__ __m128i __DEFAULT_FN_ATTRS128
1980 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1981 (__v4si)
1983 (__mmask8) __U);
1984}
1985
1986static __inline__ __m128i __DEFAULT_FN_ATTRS256
1987_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
1988 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1989 (__v4si)_mm256_cvttpd_epi32(__A),
1990 (__v4si)__W);
1991}
1992
1993static __inline__ __m128i __DEFAULT_FN_ATTRS256
1995 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1996 (__v4si)_mm256_cvttpd_epi32(__A),
1997 (__v4si)_mm_setzero_si128());
1998}
1999
2000static __inline__ __m128i __DEFAULT_FN_ATTRS128
2001_mm_cvttpd_epu32 (__m128d __A) {
2002 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2003 (__v4si)
2005 (__mmask8) -1);
2006}
2007
2008static __inline__ __m128i __DEFAULT_FN_ATTRS128
2009_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2010 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2011 (__v4si) __W,
2012 (__mmask8) __U);
2013}
2014
2015static __inline__ __m128i __DEFAULT_FN_ATTRS128
2017 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2018 (__v4si)
2020 (__mmask8) __U);
2021}
2022
2023static __inline__ __m128i __DEFAULT_FN_ATTRS256
2024_mm256_cvttpd_epu32 (__m256d __A) {
2025 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2026 (__v4si)
2028 (__mmask8) -1);
2029}
2030
2031static __inline__ __m128i __DEFAULT_FN_ATTRS256
2032_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2033 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2034 (__v4si) __W,
2035 (__mmask8) __U);
2036}
2037
2038static __inline__ __m128i __DEFAULT_FN_ATTRS256
2040 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2041 (__v4si)
2043 (__mmask8) __U);
2044}
2045
2046static __inline__ __m128i __DEFAULT_FN_ATTRS128
2047_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2048 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2049 (__v4si)_mm_cvttps_epi32(__A),
2050 (__v4si)__W);
2051}
2052
2053static __inline__ __m128i __DEFAULT_FN_ATTRS128
2055 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2056 (__v4si)_mm_cvttps_epi32(__A),
2057 (__v4si)_mm_setzero_si128());
2058}
2059
2060static __inline__ __m256i __DEFAULT_FN_ATTRS256
2061_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2062 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2063 (__v8si)_mm256_cvttps_epi32(__A),
2064 (__v8si)__W);
2065}
2066
2067static __inline__ __m256i __DEFAULT_FN_ATTRS256
2069 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2070 (__v8si)_mm256_cvttps_epi32(__A),
2071 (__v8si)_mm256_setzero_si256());
2072}
2073
2074static __inline__ __m128i __DEFAULT_FN_ATTRS128
2075_mm_cvttps_epu32 (__m128 __A) {
2076 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2077 (__v4si)
2079 (__mmask8) -1);
2080}
2081
2082static __inline__ __m128i __DEFAULT_FN_ATTRS128
2083_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2084 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2085 (__v4si) __W,
2086 (__mmask8) __U);
2087}
2088
2089static __inline__ __m128i __DEFAULT_FN_ATTRS128
2091 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2092 (__v4si)
2094 (__mmask8) __U);
2095}
2096
2097static __inline__ __m256i __DEFAULT_FN_ATTRS256
2099 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2100 (__v8si)
2102 (__mmask8) -1);
2103}
2104
2105static __inline__ __m256i __DEFAULT_FN_ATTRS256
2106_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2107 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2108 (__v8si) __W,
2109 (__mmask8) __U);
2110}
2111
2112static __inline__ __m256i __DEFAULT_FN_ATTRS256
2114 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2115 (__v8si)
2117 (__mmask8) __U);
2118}
2119
2120static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2121_mm_cvtepu32_pd(__m128i __A) {
2122 return (__m128d) __builtin_convertvector(
2123 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2124}
2125
2126static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2127_mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A) {
2128 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2129 (__v2df)_mm_cvtepu32_pd(__A),
2130 (__v2df)__W);
2131}
2132
2133static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2135 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2136 (__v2df)_mm_cvtepu32_pd(__A),
2137 (__v2df)_mm_setzero_pd());
2138}
2139
2140static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
2142 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2143}
2144
2145static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
2146_mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A) {
2147 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2148 (__v4df)_mm256_cvtepu32_pd(__A),
2149 (__v4df)__W);
2150}
2151
2152static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
2154 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2155 (__v4df)_mm256_cvtepu32_pd(__A),
2156 (__v4df)_mm256_setzero_pd());
2157}
2158
2159static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2160_mm_cvtepu32_ps(__m128i __A) {
2161 return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
2162}
2163
2164static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2165_mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A) {
2166 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2167 (__v4sf)_mm_cvtepu32_ps(__A),
2168 (__v4sf)__W);
2169}
2170
2171static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2173 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2174 (__v4sf)_mm_cvtepu32_ps(__A),
2175 (__v4sf)_mm_setzero_ps());
2176}
2177
2178static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
2180 return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
2181}
2182
2183static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
2184_mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A) {
2185 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2186 (__v8sf)_mm256_cvtepu32_ps(__A),
2187 (__v8sf)__W);
2188}
2189
2190static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
2192 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2193 (__v8sf)_mm256_cvtepu32_ps(__A),
2194 (__v8sf)_mm256_setzero_ps());
2195}
2196
2197static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2198_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2199 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2200 (__v2df)_mm_div_pd(__A, __B),
2201 (__v2df)__W);
2202}
2203
2204static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2205_mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2206 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2207 (__v2df)_mm_div_pd(__A, __B),
2208 (__v2df)_mm_setzero_pd());
2209}
2210
2211static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
2212_mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2213 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2214 (__v4df)_mm256_div_pd(__A, __B),
2215 (__v4df)__W);
2216}
2217
2218static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
2219_mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2220 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2221 (__v4df)_mm256_div_pd(__A, __B),
2222 (__v4df)_mm256_setzero_pd());
2223}
2224
2225static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2226_mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2227 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2228 (__v4sf)_mm_div_ps(__A, __B),
2229 (__v4sf)__W);
2230}
2231
2232static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2233_mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2234 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2235 (__v4sf)_mm_div_ps(__A, __B),
2236 (__v4sf)_mm_setzero_ps());
2237}
2238
2239static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
2240_mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2241 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2242 (__v8sf)_mm256_div_ps(__A, __B),
2243 (__v8sf)__W);
2244}
2245
2246static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
2247_mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2248 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2249 (__v8sf)_mm256_div_ps(__A, __B),
2250 (__v8sf)_mm256_setzero_ps());
2251}
2252
2253static __inline__ __m128d __DEFAULT_FN_ATTRS128
2254_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2255 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2256 (__v2df) __W,
2257 (__mmask8) __U);
2258}
2259
2260static __inline__ __m128d __DEFAULT_FN_ATTRS128
2261_mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
2262 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2263 (__v2df)
2264 _mm_setzero_pd (),
2265 (__mmask8) __U);
2266}
2267
2268static __inline__ __m256d __DEFAULT_FN_ATTRS256
2269_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2270 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2271 (__v4df) __W,
2272 (__mmask8) __U);
2273}
2274
2275static __inline__ __m256d __DEFAULT_FN_ATTRS256
2277 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2278 (__v4df)
2280 (__mmask8) __U);
2281}
2282
2283static __inline__ __m128i __DEFAULT_FN_ATTRS128
2284_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2285 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2286 (__v2di) __W,
2287 (__mmask8) __U);
2288}
2289
2290static __inline__ __m128i __DEFAULT_FN_ATTRS128
2292 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2293 (__v2di)
2295 (__mmask8) __U);
2296}
2297
2298static __inline__ __m256i __DEFAULT_FN_ATTRS256
2299_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2300 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2301 (__v4di) __W,
2302 (__mmask8) __U);
2303}
2304
2305static __inline__ __m256i __DEFAULT_FN_ATTRS256
2307 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2308 (__v4di)
2310 (__mmask8) __U);
2311}
2312
2313static __inline__ __m128d __DEFAULT_FN_ATTRS128
2314_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
2315 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
2316 (__v2df) __W,
2317 (__mmask8)
2318 __U);
2319}
2320
2321static __inline__ __m128d __DEFAULT_FN_ATTRS128
2323 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
2324 (__v2df)
2325 _mm_setzero_pd (),
2326 (__mmask8)
2327 __U);
2328}
2329
2330static __inline__ __m256d __DEFAULT_FN_ATTRS256
2331_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
2332 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
2333 (__v4df) __W,
2334 (__mmask8)
2335 __U);
2336}
2337
2338static __inline__ __m256d __DEFAULT_FN_ATTRS256
2340 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
2341 (__v4df)
2343 (__mmask8)
2344 __U);
2345}
2346
2347static __inline__ __m128i __DEFAULT_FN_ATTRS128
2348_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
2349 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
2350 (__v2di) __W,
2351 (__mmask8)
2352 __U);
2353}
2354
2355static __inline__ __m128i __DEFAULT_FN_ATTRS128
2357 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
2358 (__v2di)
2360 (__mmask8)
2361 __U);
2362}
2363
2364static __inline__ __m256i __DEFAULT_FN_ATTRS256
2366 void const *__P) {
2367 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
2368 (__v4di) __W,
2369 (__mmask8)
2370 __U);
2371}
2372
2373static __inline__ __m256i __DEFAULT_FN_ATTRS256
2375 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
2376 (__v4di)
2378 (__mmask8)
2379 __U);
2380}
2381
2382static __inline__ __m128 __DEFAULT_FN_ATTRS128
2383_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
2384 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
2385 (__v4sf) __W,
2386 (__mmask8) __U);
2387}
2388
2389static __inline__ __m128 __DEFAULT_FN_ATTRS128
2391 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
2392 (__v4sf)
2393 _mm_setzero_ps (),
2394 (__mmask8)
2395 __U);
2396}
2397
2398static __inline__ __m256 __DEFAULT_FN_ATTRS256
2399_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
2400 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
2401 (__v8sf) __W,
2402 (__mmask8) __U);
2403}
2404
2405static __inline__ __m256 __DEFAULT_FN_ATTRS256
2407 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
2408 (__v8sf)
2410 (__mmask8)
2411 __U);
2412}
2413
2414static __inline__ __m128i __DEFAULT_FN_ATTRS128
2415_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
2416 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
2417 (__v4si) __W,
2418 (__mmask8)
2419 __U);
2420}
2421
2422static __inline__ __m128i __DEFAULT_FN_ATTRS128
2424 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
2425 (__v4si)
2427 (__mmask8) __U);
2428}
2429
2430static __inline__ __m256i __DEFAULT_FN_ATTRS256
2432 void const *__P) {
2433 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
2434 (__v8si) __W,
2435 (__mmask8)
2436 __U);
2437}
2438
2439static __inline__ __m256i __DEFAULT_FN_ATTRS256
2441 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
2442 (__v8si)
2444 (__mmask8)
2445 __U);
2446}
2447
2448static __inline__ __m128 __DEFAULT_FN_ATTRS128
2449_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2450 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2451 (__v4sf) __W,
2452 (__mmask8) __U);
2453}
2454
2455static __inline__ __m128 __DEFAULT_FN_ATTRS128
2457 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2458 (__v4sf)
2459 _mm_setzero_ps (),
2460 (__mmask8) __U);
2461}
2462
2463static __inline__ __m256 __DEFAULT_FN_ATTRS256
2464_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2465 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2466 (__v8sf) __W,
2467 (__mmask8) __U);
2468}
2469
2470static __inline__ __m256 __DEFAULT_FN_ATTRS256
2472 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2473 (__v8sf)
2475 (__mmask8) __U);
2476}
2477
2478static __inline__ __m128i __DEFAULT_FN_ATTRS128
2479_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2480 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2481 (__v4si) __W,
2482 (__mmask8) __U);
2483}
2484
2485static __inline__ __m128i __DEFAULT_FN_ATTRS128
2487 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2488 (__v4si)
2490 (__mmask8) __U);
2491}
2492
2493static __inline__ __m256i __DEFAULT_FN_ATTRS256
2494_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2495 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2496 (__v8si) __W,
2497 (__mmask8) __U);
2498}
2499
2500static __inline__ __m256i __DEFAULT_FN_ATTRS256
2502 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2503 (__v8si)
2505 (__mmask8) __U);
2506}
2507
2508static __inline__ __m128d __DEFAULT_FN_ATTRS128
2509_mm_getexp_pd (__m128d __A) {
2510 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2511 (__v2df)
2512 _mm_setzero_pd (),
2513 (__mmask8) -1);
2514}
2515
2516static __inline__ __m128d __DEFAULT_FN_ATTRS128
2517_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2518 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2519 (__v2df) __W,
2520 (__mmask8) __U);
2521}
2522
2523static __inline__ __m128d __DEFAULT_FN_ATTRS128
2524_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
2525 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2526 (__v2df)
2527 _mm_setzero_pd (),
2528 (__mmask8) __U);
2529}
2530
2531static __inline__ __m256d __DEFAULT_FN_ATTRS256
2532_mm256_getexp_pd (__m256d __A) {
2533 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2534 (__v4df)
2536 (__mmask8) -1);
2537}
2538
2539static __inline__ __m256d __DEFAULT_FN_ATTRS256
2540_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2541 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2542 (__v4df) __W,
2543 (__mmask8) __U);
2544}
2545
2546static __inline__ __m256d __DEFAULT_FN_ATTRS256
2548 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2549 (__v4df)
2551 (__mmask8) __U);
2552}
2553
2554static __inline__ __m128 __DEFAULT_FN_ATTRS128
2555_mm_getexp_ps (__m128 __A) {
2556 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2557 (__v4sf)
2558 _mm_setzero_ps (),
2559 (__mmask8) -1);
2560}
2561
2562static __inline__ __m128 __DEFAULT_FN_ATTRS128
2563_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2564 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2565 (__v4sf) __W,
2566 (__mmask8) __U);
2567}
2568
2569static __inline__ __m128 __DEFAULT_FN_ATTRS128
2571 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2572 (__v4sf)
2573 _mm_setzero_ps (),
2574 (__mmask8) __U);
2575}
2576
2577static __inline__ __m256 __DEFAULT_FN_ATTRS256
2578_mm256_getexp_ps (__m256 __A) {
2579 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2580 (__v8sf)
2582 (__mmask8) -1);
2583}
2584
2585static __inline__ __m256 __DEFAULT_FN_ATTRS256
2586_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2587 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2588 (__v8sf) __W,
2589 (__mmask8) __U);
2590}
2591
2592static __inline__ __m256 __DEFAULT_FN_ATTRS256
2594 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2595 (__v8sf)
2597 (__mmask8) __U);
2598}
2599
2600static __inline__ __m128d __DEFAULT_FN_ATTRS128
2601_mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2602 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2603 (__v2df)_mm_max_pd(__A, __B),
2604 (__v2df)__W);
2605}
2606
2607static __inline__ __m128d __DEFAULT_FN_ATTRS128
2608_mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2609 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2610 (__v2df)_mm_max_pd(__A, __B),
2611 (__v2df)_mm_setzero_pd());
2612}
2613
2614static __inline__ __m256d __DEFAULT_FN_ATTRS256
2615_mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2616 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2617 (__v4df)_mm256_max_pd(__A, __B),
2618 (__v4df)__W);
2619}
2620
2621static __inline__ __m256d __DEFAULT_FN_ATTRS256
2622_mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2623 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2624 (__v4df)_mm256_max_pd(__A, __B),
2625 (__v4df)_mm256_setzero_pd());
2626}
2627
2628static __inline__ __m128 __DEFAULT_FN_ATTRS128
2629_mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2630 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2631 (__v4sf)_mm_max_ps(__A, __B),
2632 (__v4sf)__W);
2633}
2634
2635static __inline__ __m128 __DEFAULT_FN_ATTRS128
2636_mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2637 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2638 (__v4sf)_mm_max_ps(__A, __B),
2639 (__v4sf)_mm_setzero_ps());
2640}
2641
2642static __inline__ __m256 __DEFAULT_FN_ATTRS256
2643_mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2644 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2645 (__v8sf)_mm256_max_ps(__A, __B),
2646 (__v8sf)__W);
2647}
2648
2649static __inline__ __m256 __DEFAULT_FN_ATTRS256
2650_mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2651 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2652 (__v8sf)_mm256_max_ps(__A, __B),
2653 (__v8sf)_mm256_setzero_ps());
2654}
2655
2656static __inline__ __m128d __DEFAULT_FN_ATTRS128
2657_mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2658 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2659 (__v2df)_mm_min_pd(__A, __B),
2660 (__v2df)__W);
2661}
2662
2663static __inline__ __m128d __DEFAULT_FN_ATTRS128
2664_mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2665 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2666 (__v2df)_mm_min_pd(__A, __B),
2667 (__v2df)_mm_setzero_pd());
2668}
2669
2670static __inline__ __m256d __DEFAULT_FN_ATTRS256
2671_mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2672 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2673 (__v4df)_mm256_min_pd(__A, __B),
2674 (__v4df)__W);
2675}
2676
2677static __inline__ __m256d __DEFAULT_FN_ATTRS256
2678_mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2679 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2680 (__v4df)_mm256_min_pd(__A, __B),
2681 (__v4df)_mm256_setzero_pd());
2682}
2683
2684static __inline__ __m128 __DEFAULT_FN_ATTRS128
2685_mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2686 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2687 (__v4sf)_mm_min_ps(__A, __B),
2688 (__v4sf)__W);
2689}
2690
2691static __inline__ __m128 __DEFAULT_FN_ATTRS128
2692_mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2693 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2694 (__v4sf)_mm_min_ps(__A, __B),
2695 (__v4sf)_mm_setzero_ps());
2696}
2697
2698static __inline__ __m256 __DEFAULT_FN_ATTRS256
2699_mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2700 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2701 (__v8sf)_mm256_min_ps(__A, __B),
2702 (__v8sf)__W);
2703}
2704
2705static __inline__ __m256 __DEFAULT_FN_ATTRS256
2706_mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2707 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2708 (__v8sf)_mm256_min_ps(__A, __B),
2709 (__v8sf)_mm256_setzero_ps());
2710}
2711
2712static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2713_mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2714 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2715 (__v2df)_mm_mul_pd(__A, __B),
2716 (__v2df)__W);
2717}
2718
2719static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2720_mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2721 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2722 (__v2df)_mm_mul_pd(__A, __B),
2723 (__v2df)_mm_setzero_pd());
2724}
2725
2726static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
2727_mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2728 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2729 (__v4df)_mm256_mul_pd(__A, __B),
2730 (__v4df)__W);
2731}
2732
2733static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
2734_mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2735 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2736 (__v4df)_mm256_mul_pd(__A, __B),
2737 (__v4df)_mm256_setzero_pd());
2738}
2739
2740static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2741_mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2742 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2743 (__v4sf)_mm_mul_ps(__A, __B),
2744 (__v4sf)__W);
2745}
2746
2747static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2748_mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2749 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2750 (__v4sf)_mm_mul_ps(__A, __B),
2751 (__v4sf)_mm_setzero_ps());
2752}
2753
2754static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
2755_mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2756 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2757 (__v8sf)_mm256_mul_ps(__A, __B),
2758 (__v8sf)__W);
2759}
2760
2761static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
2762_mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2763 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2764 (__v8sf)_mm256_mul_ps(__A, __B),
2765 (__v8sf)_mm256_setzero_ps());
2766}
2767
2768static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2769_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
2770 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2771 (__v4si)_mm_abs_epi32(__A),
2772 (__v4si)__W);
2773}
2774
2775static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2777 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2778 (__v4si)_mm_abs_epi32(__A),
2779 (__v4si)_mm_setzero_si128());
2780}
2781
2782static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2783_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
2784 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2785 (__v8si)_mm256_abs_epi32(__A),
2786 (__v8si)__W);
2787}
2788
2789static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2791 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2792 (__v8si)_mm256_abs_epi32(__A),
2793 (__v8si)_mm256_setzero_si256());
2794}
2795
2796static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2797_mm_abs_epi64(__m128i __A) {
2798 return (__m128i)__builtin_elementwise_abs((__v2di)__A);
2799}
2800
2801static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2802_mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
2803 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
2804 (__v2di)_mm_abs_epi64(__A),
2805 (__v2di)__W);
2806}
2807
2808static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2810 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
2811 (__v2di)_mm_abs_epi64(__A),
2812 (__v2di)_mm_setzero_si128());
2813}
2814
2815static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2816_mm256_abs_epi64(__m256i __A) {
2817 return (__m256i)__builtin_elementwise_abs((__v4di)__A);
2818}
2819
2820static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2821_mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
2822 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
2823 (__v4di)_mm256_abs_epi64(__A),
2824 (__v4di)__W);
2825}
2826
2827static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2829 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
2830 (__v4di)_mm256_abs_epi64(__A),
2831 (__v4di)_mm256_setzero_si256());
2832}
2833
2834static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2835_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
2836 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2837 (__v4si)_mm_max_epi32(__A, __B),
2838 (__v4si)_mm_setzero_si128());
2839}
2840
2841static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2842_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
2843 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2844 (__v4si)_mm_max_epi32(__A, __B),
2845 (__v4si)__W);
2846}
2847
2848static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2849_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
2850 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
2851 (__v8si)_mm256_max_epi32(__A, __B),
2852 (__v8si)_mm256_setzero_si256());
2853}
2854
2855static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2856_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
2857 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
2858 (__v8si)_mm256_max_epi32(__A, __B),
2859 (__v8si)__W);
2860}
2861
2862static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2863_mm_max_epi64(__m128i __A, __m128i __B) {
2864 return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B);
2865}
2866
2867static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2868_mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B) {
2869 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
2870 (__v2di)_mm_max_epi64(__A, __B),
2871 (__v2di)_mm_setzero_si128());
2872}
2873
2874static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2875_mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
2876 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
2877 (__v2di)_mm_max_epi64(__A, __B),
2878 (__v2di)__W);
2879}
2880
2881static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2882_mm256_max_epi64(__m256i __A, __m256i __B) {
2883 return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B);
2884}
2885
2886static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2887_mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B) {
2888 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
2889 (__v4di)_mm256_max_epi64(__A, __B),
2890 (__v4di)_mm256_setzero_si256());
2891}
2892
2893static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2894_mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
2895 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
2896 (__v4di)_mm256_max_epi64(__A, __B),
2897 (__v4di)__W);
2898}
2899
2900static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2901_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
2902 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2903 (__v4si)_mm_max_epu32(__A, __B),
2904 (__v4si)_mm_setzero_si128());
2905}
2906
2907static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2908_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
2909 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2910 (__v4si)_mm_max_epu32(__A, __B),
2911 (__v4si)__W);
2912}
2913
2914static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2915_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
2916 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
2917 (__v8si)_mm256_max_epu32(__A, __B),
2918 (__v8si)_mm256_setzero_si256());
2919}
2920
2921static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2922_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
2923 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
2924 (__v8si)_mm256_max_epu32(__A, __B),
2925 (__v8si)__W);
2926}
2927
2928static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2929_mm_max_epu64(__m128i __A, __m128i __B) {
2930 return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B);
2931}
2932
2933static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2934_mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B) {
2935 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
2936 (__v2di)_mm_max_epu64(__A, __B),
2937 (__v2di)_mm_setzero_si128());
2938}
2939
2940static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2941_mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
2942 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
2943 (__v2di)_mm_max_epu64(__A, __B),
2944 (__v2di)__W);
2945}
2946
2947static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2948_mm256_max_epu64(__m256i __A, __m256i __B) {
2949 return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B);
2950}
2951
2952static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2953_mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B) {
2954 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
2955 (__v4di)_mm256_max_epu64(__A, __B),
2956 (__v4di)_mm256_setzero_si256());
2957}
2958
2959static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2960_mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
2961 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
2962 (__v4di)_mm256_max_epu64(__A, __B),
2963 (__v4di)__W);
2964}
2965
2966static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2967_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
2968 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2969 (__v4si)_mm_min_epi32(__A, __B),
2970 (__v4si)_mm_setzero_si128());
2971}
2972
2973static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2974_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
2975 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2976 (__v4si)_mm_min_epi32(__A, __B),
2977 (__v4si)__W);
2978}
2979
2980static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2981_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
2982 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
2983 (__v8si)_mm256_min_epi32(__A, __B),
2984 (__v8si)_mm256_setzero_si256());
2985}
2986
2987static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2988_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
2989 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
2990 (__v8si)_mm256_min_epi32(__A, __B),
2991 (__v8si)__W);
2992}
2993
2994static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2995_mm_min_epi64(__m128i __A, __m128i __B) {
2996 return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B);
2997}
2998
2999static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3000_mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3001 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3002 (__v2di)_mm_min_epi64(__A, __B),
3003 (__v2di)__W);
3004}
3005
3006static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3007_mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B) {
3008 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3009 (__v2di)_mm_min_epi64(__A, __B),
3010 (__v2di)_mm_setzero_si128());
3011}
3012
3013static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3014_mm256_min_epi64(__m256i __A, __m256i __B) {
3015 return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B);
3016}
3017
3018static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3019_mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3020 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3021 (__v4di)_mm256_min_epi64(__A, __B),
3022 (__v4di)__W);
3023}
3024
3025static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3026_mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B) {
3027 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3028 (__v4di)_mm256_min_epi64(__A, __B),
3029 (__v4di)_mm256_setzero_si256());
3030}
3031
3032static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3033_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3034 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3035 (__v4si)_mm_min_epu32(__A, __B),
3036 (__v4si)_mm_setzero_si128());
3037}
3038
3039static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3040_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3041 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3042 (__v4si)_mm_min_epu32(__A, __B),
3043 (__v4si)__W);
3044}
3045
3046static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3047_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3048 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3049 (__v8si)_mm256_min_epu32(__A, __B),
3050 (__v8si)_mm256_setzero_si256());
3051}
3052
3053static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3054_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3055 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3056 (__v8si)_mm256_min_epu32(__A, __B),
3057 (__v8si)__W);
3058}
3059
3060static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3061_mm_min_epu64(__m128i __A, __m128i __B) {
3062 return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B);
3063}
3064
3065static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3066_mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3067 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3068 (__v2di)_mm_min_epu64(__A, __B),
3069 (__v2di)__W);
3070}
3071
3072static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3073_mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B) {
3074 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3075 (__v2di)_mm_min_epu64(__A, __B),
3076 (__v2di)_mm_setzero_si128());
3077}
3078
3079static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3080_mm256_min_epu64(__m256i __A, __m256i __B) {
3081 return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B);
3082}
3083
3084static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3085_mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3086 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3087 (__v4di)_mm256_min_epu64(__A, __B),
3088 (__v4di)__W);
3089}
3090
3091static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3092_mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B) {
3093 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3094 (__v4di)_mm256_min_epu64(__A, __B),
3095 (__v4di)_mm256_setzero_si256());
3096}
3097
3098#define _mm_roundscale_pd(A, imm) \
3099 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3100 (int)(imm), \
3101 (__v2df)_mm_setzero_pd(), \
3102 (__mmask8)-1))
3103
3104
3105#define _mm_mask_roundscale_pd(W, U, A, imm) \
3106 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3107 (int)(imm), \
3108 (__v2df)(__m128d)(W), \
3109 (__mmask8)(U)))
3110
3111
3112#define _mm_maskz_roundscale_pd(U, A, imm) \
3113 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3114 (int)(imm), \
3115 (__v2df)_mm_setzero_pd(), \
3116 (__mmask8)(U)))
3117
3118
3119#define _mm256_roundscale_pd(A, imm) \
3120 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3121 (int)(imm), \
3122 (__v4df)_mm256_setzero_pd(), \
3123 (__mmask8)-1))
3124
3125
3126#define _mm256_mask_roundscale_pd(W, U, A, imm) \
3127 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3128 (int)(imm), \
3129 (__v4df)(__m256d)(W), \
3130 (__mmask8)(U)))
3131
3132
3133#define _mm256_maskz_roundscale_pd(U, A, imm) \
3134 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3135 (int)(imm), \
3136 (__v4df)_mm256_setzero_pd(), \
3137 (__mmask8)(U)))
3138
3139#define _mm_roundscale_ps(A, imm) \
3140 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3141 (__v4sf)_mm_setzero_ps(), \
3142 (__mmask8)-1))
3143
3144
3145#define _mm_mask_roundscale_ps(W, U, A, imm) \
3146 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3147 (__v4sf)(__m128)(W), \
3148 (__mmask8)(U)))
3149
3150
3151#define _mm_maskz_roundscale_ps(U, A, imm) \
3152 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3153 (__v4sf)_mm_setzero_ps(), \
3154 (__mmask8)(U)))
3155
3156#define _mm256_roundscale_ps(A, imm) \
3157 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3158 (__v8sf)_mm256_setzero_ps(), \
3159 (__mmask8)-1))
3160
3161#define _mm256_mask_roundscale_ps(W, U, A, imm) \
3162 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3163 (__v8sf)(__m256)(W), \
3164 (__mmask8)(U)))
3165
3166
3167#define _mm256_maskz_roundscale_ps(U, A, imm) \
3168 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3169 (__v8sf)_mm256_setzero_ps(), \
3170 (__mmask8)(U)))
3171
3172static __inline__ __m128d __DEFAULT_FN_ATTRS128
3173_mm_scalef_pd (__m128d __A, __m128d __B) {
3174 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3175 (__v2df) __B,
3176 (__v2df)
3177 _mm_setzero_pd (),
3178 (__mmask8) -1);
3179}
3180
3181static __inline__ __m128d __DEFAULT_FN_ATTRS128
3182_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3183 __m128d __B) {
3184 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3185 (__v2df) __B,
3186 (__v2df) __W,
3187 (__mmask8) __U);
3188}
3189
3190static __inline__ __m128d __DEFAULT_FN_ATTRS128
3191_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3192 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3193 (__v2df) __B,
3194 (__v2df)
3195 _mm_setzero_pd (),
3196 (__mmask8) __U);
3197}
3198
3199static __inline__ __m256d __DEFAULT_FN_ATTRS256
3200_mm256_scalef_pd (__m256d __A, __m256d __B) {
3201 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3202 (__v4df) __B,
3203 (__v4df)
3205 (__mmask8) -1);
3206}
3207
3208static __inline__ __m256d __DEFAULT_FN_ATTRS256
3209_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3210 __m256d __B) {
3211 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3212 (__v4df) __B,
3213 (__v4df) __W,
3214 (__mmask8) __U);
3215}
3216
3217static __inline__ __m256d __DEFAULT_FN_ATTRS256
3218_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3219 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3220 (__v4df) __B,
3221 (__v4df)
3223 (__mmask8) __U);
3224}
3225
3226static __inline__ __m128 __DEFAULT_FN_ATTRS128
3227_mm_scalef_ps (__m128 __A, __m128 __B) {
3228 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3229 (__v4sf) __B,
3230 (__v4sf)
3231 _mm_setzero_ps (),
3232 (__mmask8) -1);
3233}
3234
3235static __inline__ __m128 __DEFAULT_FN_ATTRS128
3236_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3237 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3238 (__v4sf) __B,
3239 (__v4sf) __W,
3240 (__mmask8) __U);
3241}
3242
3243static __inline__ __m128 __DEFAULT_FN_ATTRS128
3244_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3245 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3246 (__v4sf) __B,
3247 (__v4sf)
3248 _mm_setzero_ps (),
3249 (__mmask8) __U);
3250}
3251
3252static __inline__ __m256 __DEFAULT_FN_ATTRS256
3253_mm256_scalef_ps (__m256 __A, __m256 __B) {
3254 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3255 (__v8sf) __B,
3256 (__v8sf)
3258 (__mmask8) -1);
3259}
3260
3261static __inline__ __m256 __DEFAULT_FN_ATTRS256
3262_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3263 __m256 __B) {
3264 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3265 (__v8sf) __B,
3266 (__v8sf) __W,
3267 (__mmask8) __U);
3268}
3269
3270static __inline__ __m256 __DEFAULT_FN_ATTRS256
3271_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3272 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3273 (__v8sf) __B,
3274 (__v8sf)
3276 (__mmask8) __U);
3277}
3278
3279#define _mm_i64scatter_pd(addr, index, v1, scale) \
3280 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
3281 (__v2di)(__m128i)(index), \
3282 (__v2df)(__m128d)(v1), (int)(scale))
3283
3284#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3285 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
3286 (__v2di)(__m128i)(index), \
3287 (__v2df)(__m128d)(v1), (int)(scale))
3288
3289#define _mm_i64scatter_epi64(addr, index, v1, scale) \
3290 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
3291 (__v2di)(__m128i)(index), \
3292 (__v2di)(__m128i)(v1), (int)(scale))
3293
3294#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3295 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
3296 (__v2di)(__m128i)(index), \
3297 (__v2di)(__m128i)(v1), (int)(scale))
3298
3299#define _mm256_i64scatter_pd(addr, index, v1, scale) \
3300 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
3301 (__v4di)(__m256i)(index), \
3302 (__v4df)(__m256d)(v1), (int)(scale))
3303
3304#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3305 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
3306 (__v4di)(__m256i)(index), \
3307 (__v4df)(__m256d)(v1), (int)(scale))
3308
3309#define _mm256_i64scatter_epi64(addr, index, v1, scale) \
3310 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
3311 (__v4di)(__m256i)(index), \
3312 (__v4di)(__m256i)(v1), (int)(scale))
3313
3314#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3315 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
3316 (__v4di)(__m256i)(index), \
3317 (__v4di)(__m256i)(v1), (int)(scale))
3318
3319#define _mm_i64scatter_ps(addr, index, v1, scale) \
3320 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
3321 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3322 (int)(scale))
3323
3324#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3325 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
3326 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3327 (int)(scale))
3328
3329#define _mm_i64scatter_epi32(addr, index, v1, scale) \
3330 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
3331 (__v2di)(__m128i)(index), \
3332 (__v4si)(__m128i)(v1), (int)(scale))
3333
3334#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3335 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
3336 (__v2di)(__m128i)(index), \
3337 (__v4si)(__m128i)(v1), (int)(scale))
3338
3339#define _mm256_i64scatter_ps(addr, index, v1, scale) \
3340 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
3341 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3342 (int)(scale))
3343
3344#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3345 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
3346 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3347 (int)(scale))
3348
3349#define _mm256_i64scatter_epi32(addr, index, v1, scale) \
3350 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
3351 (__v4di)(__m256i)(index), \
3352 (__v4si)(__m128i)(v1), (int)(scale))
3353
3354#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3355 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
3356 (__v4di)(__m256i)(index), \
3357 (__v4si)(__m128i)(v1), (int)(scale))
3358
3359#define _mm_i32scatter_pd(addr, index, v1, scale) \
3360 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
3361 (__v4si)(__m128i)(index), \
3362 (__v2df)(__m128d)(v1), (int)(scale))
3363
3364#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3365 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
3366 (__v4si)(__m128i)(index), \
3367 (__v2df)(__m128d)(v1), (int)(scale))
3368
3369#define _mm_i32scatter_epi64(addr, index, v1, scale) \
3370 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
3371 (__v4si)(__m128i)(index), \
3372 (__v2di)(__m128i)(v1), (int)(scale))
3373
3374#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3375 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
3376 (__v4si)(__m128i)(index), \
3377 (__v2di)(__m128i)(v1), (int)(scale))
3378
3379#define _mm256_i32scatter_pd(addr, index, v1, scale) \
3380 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
3381 (__v4si)(__m128i)(index), \
3382 (__v4df)(__m256d)(v1), (int)(scale))
3383
3384#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3385 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
3386 (__v4si)(__m128i)(index), \
3387 (__v4df)(__m256d)(v1), (int)(scale))
3388
3389#define _mm256_i32scatter_epi64(addr, index, v1, scale) \
3390 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
3391 (__v4si)(__m128i)(index), \
3392 (__v4di)(__m256i)(v1), (int)(scale))
3393
3394#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3395 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
3396 (__v4si)(__m128i)(index), \
3397 (__v4di)(__m256i)(v1), (int)(scale))
3398
3399#define _mm_i32scatter_ps(addr, index, v1, scale) \
3400 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
3401 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3402 (int)(scale))
3403
3404#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3405 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
3406 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3407 (int)(scale))
3408
3409#define _mm_i32scatter_epi32(addr, index, v1, scale) \
3410 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
3411 (__v4si)(__m128i)(index), \
3412 (__v4si)(__m128i)(v1), (int)(scale))
3413
3414#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3415 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
3416 (__v4si)(__m128i)(index), \
3417 (__v4si)(__m128i)(v1), (int)(scale))
3418
3419#define _mm256_i32scatter_ps(addr, index, v1, scale) \
3420 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
3421 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3422 (int)(scale))
3423
3424#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3425 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
3426 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3427 (int)(scale))
3428
3429#define _mm256_i32scatter_epi32(addr, index, v1, scale) \
3430 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
3431 (__v8si)(__m256i)(index), \
3432 (__v8si)(__m256i)(v1), (int)(scale))
3433
3434#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3435 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
3436 (__v8si)(__m256i)(index), \
3437 (__v8si)(__m256i)(v1), (int)(scale))
3438
3439 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3440 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
3441 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3442 (__v2df)_mm_sqrt_pd(__A),
3443 (__v2df)__W);
3444 }
3445
3446 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3447 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
3448 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3449 (__v2df)_mm_sqrt_pd(__A),
3450 (__v2df)_mm_setzero_pd());
3451 }
3452
3453 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3454 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
3455 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3456 (__v4df)_mm256_sqrt_pd(__A),
3457 (__v4df)__W);
3458 }
3459
3460 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3461 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
3462 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3463 (__v4df)_mm256_sqrt_pd(__A),
3464 (__v4df)_mm256_setzero_pd());
3465 }
3466
3467 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3468 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
3469 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3470 (__v4sf)_mm_sqrt_ps(__A),
3471 (__v4sf)__W);
3472 }
3473
3474 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3475 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
3476 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3477 (__v4sf)_mm_sqrt_ps(__A),
3478 (__v4sf)_mm_setzero_ps());
3479 }
3480
3481 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3482 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
3483 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3484 (__v8sf)_mm256_sqrt_ps(__A),
3485 (__v8sf)__W);
3486 }
3487
3488 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3490 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3491 (__v8sf)_mm256_sqrt_ps(__A),
3492 (__v8sf)_mm256_setzero_ps());
3493 }
3494
3495 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
3496 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3497 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3498 (__v2df)_mm_sub_pd(__A, __B),
3499 (__v2df)__W);
3500 }
3501
3502 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
3503 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3504 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3505 (__v2df)_mm_sub_pd(__A, __B),
3506 (__v2df)_mm_setzero_pd());
3507 }
3508
3509 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
3510 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3511 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3512 (__v4df)_mm256_sub_pd(__A, __B),
3513 (__v4df)__W);
3514 }
3515
3516 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
3517 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3518 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3519 (__v4df)_mm256_sub_pd(__A, __B),
3520 (__v4df)_mm256_setzero_pd());
3521 }
3522
3523 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
3524 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3525 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3526 (__v4sf)_mm_sub_ps(__A, __B),
3527 (__v4sf)__W);
3528 }
3529
3530 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
3531 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3532 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3533 (__v4sf)_mm_sub_ps(__A, __B),
3534 (__v4sf)_mm_setzero_ps());
3535 }
3536
3537 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
3538 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3539 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3540 (__v8sf)_mm256_sub_ps(__A, __B),
3541 (__v8sf)__W);
3542 }
3543
3544 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
3545 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3546 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3547 (__v8sf)_mm256_sub_ps(__A, __B),
3548 (__v8sf)_mm256_setzero_ps());
3549 }
3550
3551 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3552 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
3553 return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
3554 (__v4si)__B);
3555 }
3556
3557 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3558 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I,
3559 __m128i __B) {
3560 return (__m128i)__builtin_ia32_selectd_128(__U,
3561 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3562 (__v4si)__A);
3563 }
3564
3565 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3566 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,
3567 __m128i __B) {
3568 return (__m128i)__builtin_ia32_selectd_128(__U,
3569 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3570 (__v4si)__I);
3571 }
3572
3573 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3574 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,
3575 __m128i __B) {
3576 return (__m128i)__builtin_ia32_selectd_128(__U,
3577 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3578 (__v4si)_mm_setzero_si128());
3579 }
3580
3581 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3582 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
3583 return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
3584 (__v8si) __B);
3585 }
3586
3587 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3588 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I,
3589 __m256i __B) {
3590 return (__m256i)__builtin_ia32_selectd_256(__U,
3591 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3592 (__v8si)__A);
3593 }
3594
3595 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3596 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U,
3597 __m256i __B) {
3598 return (__m256i)__builtin_ia32_selectd_256(__U,
3599 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3600 (__v8si)__I);
3601 }
3602
3603 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3604 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I,
3605 __m256i __B) {
3606 return (__m256i)__builtin_ia32_selectd_256(__U,
3607 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3608 (__v8si)_mm256_setzero_si256());
3609 }
3610
3611 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
3612 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
3613 return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
3614 (__v2df)__B);
3615 }
3616
3617 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
3618 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I,
3619 __m128d __B) {
3620 return (__m128d)__builtin_ia32_selectpd_128(__U,
3621 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3622 (__v2df)__A);
3623 }
3624
3625 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
3626 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U,
3627 __m128d __B) {
3628 return (__m128d)__builtin_ia32_selectpd_128(__U,
3629 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3630 (__v2df)(__m128d)__I);
3631 }
3632
3633 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
3634 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I,
3635 __m128d __B) {
3636 return (__m128d)__builtin_ia32_selectpd_128(__U,
3637 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3638 (__v2df)_mm_setzero_pd());
3639 }
3640
3641 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
3642 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
3643 return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
3644 (__v4df)__B);
3645 }
3646
3647 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
3648 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I,
3649 __m256d __B) {
3650 return (__m256d)__builtin_ia32_selectpd_256(__U,
3651 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3652 (__v4df)__A);
3653 }
3654
3655 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
3656 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,
3657 __m256d __B) {
3658 return (__m256d)__builtin_ia32_selectpd_256(__U,
3659 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3660 (__v4df)(__m256d)__I);
3661 }
3662
3663 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
3664 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,
3665 __m256d __B) {
3666 return (__m256d)__builtin_ia32_selectpd_256(__U,
3667 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3668 (__v4df)_mm256_setzero_pd());
3669 }
3670
3671 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
3672 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
3673 return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
3674 (__v4sf)__B);
3675 }
3676
3677 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
3678 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
3679 return (__m128)__builtin_ia32_selectps_128(__U,
3680 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3681 (__v4sf)__A);
3682 }
3683
3684 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
3685 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
3686 return (__m128)__builtin_ia32_selectps_128(__U,
3687 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3688 (__v4sf)(__m128)__I);
3689 }
3690
3691 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
3692 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
3693 return (__m128)__builtin_ia32_selectps_128(__U,
3694 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3695 (__v4sf)_mm_setzero_ps());
3696 }
3697
3698 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
3699 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
3700 return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
3701 (__v8sf) __B);
3702 }
3703
3704 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
3705 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I,
3706 __m256 __B) {
3707 return (__m256)__builtin_ia32_selectps_256(__U,
3708 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3709 (__v8sf)__A);
3710 }
3711
3712 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
3713 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,
3714 __m256 __B) {
3715 return (__m256)__builtin_ia32_selectps_256(__U,
3716 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3717 (__v8sf)(__m256)__I);
3718 }
3719
3720 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
3721 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I,
3722 __m256 __B) {
3723 return (__m256)__builtin_ia32_selectps_256(__U,
3724 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3725 (__v8sf)_mm256_setzero_ps());
3726 }
3727
3728 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3729 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
3730 return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
3731 (__v2di)__B);
3732 }
3733
3734 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3735 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I,
3736 __m128i __B) {
3737 return (__m128i)__builtin_ia32_selectq_128(__U,
3738 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3739 (__v2di)__A);
3740 }
3741
3742 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3743 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
3744 __m128i __B) {
3745 return (__m128i)__builtin_ia32_selectq_128(__U,
3746 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3747 (__v2di)__I);
3748 }
3749
3750 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3751 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I,
3752 __m128i __B) {
3753 return (__m128i)__builtin_ia32_selectq_128(__U,
3754 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3755 (__v2di)_mm_setzero_si128());
3756 }
3757
3758 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3759 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
3760 return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
3761 (__v4di) __B);
3762 }
3763
3764 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3765 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I,
3766 __m256i __B) {
3767 return (__m256i)__builtin_ia32_selectq_256(__U,
3768 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3769 (__v4di)__A);
3770 }
3771
3772 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3773 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U,
3774 __m256i __B) {
3775 return (__m256i)__builtin_ia32_selectq_256(__U,
3776 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3777 (__v4di)__I);
3778 }
3779
3780 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3781 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I,
3782 __m256i __B) {
3783 return (__m256i)__builtin_ia32_selectq_256(__U,
3784 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3785 (__v4di)_mm256_setzero_si256());
3786 }
3787
3788 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3789 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3790 {
3791 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3792 (__v4si)_mm_cvtepi8_epi32(__A),
3793 (__v4si)__W);
3794 }
3795
3796 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3798 {
3799 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3800 (__v4si)_mm_cvtepi8_epi32(__A),
3801 (__v4si)_mm_setzero_si128());
3802 }
3803
3804 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3805 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3806 {
3807 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3808 (__v8si)_mm256_cvtepi8_epi32(__A),
3809 (__v8si)__W);
3810 }
3811
3812 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3814 {
3815 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3816 (__v8si)_mm256_cvtepi8_epi32(__A),
3817 (__v8si)_mm256_setzero_si256());
3818 }
3819
3820 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3821 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
3822 {
3823 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3824 (__v2di)_mm_cvtepi8_epi64(__A),
3825 (__v2di)__W);
3826 }
3827
3828 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3830 {
3831 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3832 (__v2di)_mm_cvtepi8_epi64(__A),
3833 (__v2di)_mm_setzero_si128());
3834 }
3835
3836 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3837 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
3838 {
3839 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3840 (__v4di)_mm256_cvtepi8_epi64(__A),
3841 (__v4di)__W);
3842 }
3843
3844 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3846 {
3847 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3848 (__v4di)_mm256_cvtepi8_epi64(__A),
3849 (__v4di)_mm256_setzero_si256());
3850 }
3851
3852 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3853 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
3854 {
3855 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3856 (__v2di)_mm_cvtepi32_epi64(__X),
3857 (__v2di)__W);
3858 }
3859
3860 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3862 {
3863 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3864 (__v2di)_mm_cvtepi32_epi64(__X),
3865 (__v2di)_mm_setzero_si128());
3866 }
3867
3868 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3869 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
3870 {
3871 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3872 (__v4di)_mm256_cvtepi32_epi64(__X),
3873 (__v4di)__W);
3874 }
3875
3876 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3878 {
3879 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3880 (__v4di)_mm256_cvtepi32_epi64(__X),
3881 (__v4di)_mm256_setzero_si256());
3882 }
3883
3884 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3885 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3886 {
3887 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3888 (__v4si)_mm_cvtepi16_epi32(__A),
3889 (__v4si)__W);
3890 }
3891
3892 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3894 {
3895 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3896 (__v4si)_mm_cvtepi16_epi32(__A),
3897 (__v4si)_mm_setzero_si128());
3898 }
3899
3900 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3901 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
3902 {
3903 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3904 (__v8si)_mm256_cvtepi16_epi32(__A),
3905 (__v8si)__W);
3906 }
3907
3908 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3910 {
3911 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3912 (__v8si)_mm256_cvtepi16_epi32(__A),
3913 (__v8si)_mm256_setzero_si256());
3914 }
3915
3916 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3917 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
3918 {
3919 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3920 (__v2di)_mm_cvtepi16_epi64(__A),
3921 (__v2di)__W);
3922 }
3923
3924 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3926 {
3927 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3928 (__v2di)_mm_cvtepi16_epi64(__A),
3929 (__v2di)_mm_setzero_si128());
3930 }
3931
3932 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3933 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
3934 {
3935 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3936 (__v4di)_mm256_cvtepi16_epi64(__A),
3937 (__v4di)__W);
3938 }
3939
3940 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3942 {
3943 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3944 (__v4di)_mm256_cvtepi16_epi64(__A),
3945 (__v4di)_mm256_setzero_si256());
3946 }
3947
3948
3949 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3950 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3951 {
3952 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3953 (__v4si)_mm_cvtepu8_epi32(__A),
3954 (__v4si)__W);
3955 }
3956
3957 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3959 {
3960 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3961 (__v4si)_mm_cvtepu8_epi32(__A),
3962 (__v4si)_mm_setzero_si128());
3963 }
3964
3965 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3966 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
3967 {
3968 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3969 (__v8si)_mm256_cvtepu8_epi32(__A),
3970 (__v8si)__W);
3971 }
3972
3973 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3975 {
3976 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3977 (__v8si)_mm256_cvtepu8_epi32(__A),
3978 (__v8si)_mm256_setzero_si256());
3979 }
3980
3981 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3982 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
3983 {
3984 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3985 (__v2di)_mm_cvtepu8_epi64(__A),
3986 (__v2di)__W);
3987 }
3988
3989 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3991 {
3992 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3993 (__v2di)_mm_cvtepu8_epi64(__A),
3994 (__v2di)_mm_setzero_si128());
3995 }
3996
3997 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3998 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
3999 {
4000 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4001 (__v4di)_mm256_cvtepu8_epi64(__A),
4002 (__v4di)__W);
4003 }
4004
4005 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4007 {
4008 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4009 (__v4di)_mm256_cvtepu8_epi64(__A),
4010 (__v4di)_mm256_setzero_si256());
4011 }
4012
4013 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4014 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4015 {
4016 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4017 (__v2di)_mm_cvtepu32_epi64(__X),
4018 (__v2di)__W);
4019 }
4020
4021 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4023 {
4024 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4025 (__v2di)_mm_cvtepu32_epi64(__X),
4026 (__v2di)_mm_setzero_si128());
4027 }
4028
4029 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4030 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4031 {
4032 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4033 (__v4di)_mm256_cvtepu32_epi64(__X),
4034 (__v4di)__W);
4035 }
4036
4037 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4039 {
4040 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4041 (__v4di)_mm256_cvtepu32_epi64(__X),
4042 (__v4di)_mm256_setzero_si256());
4043 }
4044
4045 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4046 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4047 {
4048 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4049 (__v4si)_mm_cvtepu16_epi32(__A),
4050 (__v4si)__W);
4051 }
4052
4053 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4055 {
4056 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4057 (__v4si)_mm_cvtepu16_epi32(__A),
4058 (__v4si)_mm_setzero_si128());
4059 }
4060
4061 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4062 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4063 {
4064 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4065 (__v8si)_mm256_cvtepu16_epi32(__A),
4066 (__v8si)__W);
4067 }
4068
4069 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4071 {
4072 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4073 (__v8si)_mm256_cvtepu16_epi32(__A),
4074 (__v8si)_mm256_setzero_si256());
4075 }
4076
4077 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4078 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4079 {
4080 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4081 (__v2di)_mm_cvtepu16_epi64(__A),
4082 (__v2di)__W);
4083 }
4084
4085 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4087 {
4088 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4089 (__v2di)_mm_cvtepu16_epi64(__A),
4090 (__v2di)_mm_setzero_si128());
4091 }
4092
4093 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4094 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4095 {
4096 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4097 (__v4di)_mm256_cvtepu16_epi64(__A),
4098 (__v4di)__W);
4099 }
4100
4101 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4103 {
4104 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4105 (__v4di)_mm256_cvtepu16_epi64(__A),
4106 (__v4di)_mm256_setzero_si256());
4107 }
4108
4109
4110#define _mm_rol_epi32(a, b) \
4111 ((__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b)))
4112
4113#define _mm_mask_rol_epi32(w, u, a, b) \
4114 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4115 (__v4si)_mm_rol_epi32((a), (b)), \
4116 (__v4si)(__m128i)(w)))
4117
4118#define _mm_maskz_rol_epi32(u, a, b) \
4119 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4120 (__v4si)_mm_rol_epi32((a), (b)), \
4121 (__v4si)_mm_setzero_si128()))
4122
4123#define _mm256_rol_epi32(a, b) \
4124 ((__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b)))
4125
4126#define _mm256_mask_rol_epi32(w, u, a, b) \
4127 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4128 (__v8si)_mm256_rol_epi32((a), (b)), \
4129 (__v8si)(__m256i)(w)))
4130
4131#define _mm256_maskz_rol_epi32(u, a, b) \
4132 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4133 (__v8si)_mm256_rol_epi32((a), (b)), \
4134 (__v8si)_mm256_setzero_si256()))
4135
4136#define _mm_rol_epi64(a, b) \
4137 ((__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b)))
4138
4139#define _mm_mask_rol_epi64(w, u, a, b) \
4140 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4141 (__v2di)_mm_rol_epi64((a), (b)), \
4142 (__v2di)(__m128i)(w)))
4143
4144#define _mm_maskz_rol_epi64(u, a, b) \
4145 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4146 (__v2di)_mm_rol_epi64((a), (b)), \
4147 (__v2di)_mm_setzero_si128()))
4148
4149#define _mm256_rol_epi64(a, b) \
4150 ((__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b)))
4151
4152#define _mm256_mask_rol_epi64(w, u, a, b) \
4153 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4154 (__v4di)_mm256_rol_epi64((a), (b)), \
4155 (__v4di)(__m256i)(w)))
4156
4157#define _mm256_maskz_rol_epi64(u, a, b) \
4158 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4159 (__v4di)_mm256_rol_epi64((a), (b)), \
4160 (__v4di)_mm256_setzero_si256()))
4161
4162static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4163_mm_rolv_epi32 (__m128i __A, __m128i __B)
4164{
4165 return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B);
4166}
4167
4168static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4169_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4170{
4171 return (__m128i)__builtin_ia32_selectd_128(__U,
4172 (__v4si)_mm_rolv_epi32(__A, __B),
4173 (__v4si)__W);
4174}
4175
4176static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4177_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4178{
4179 return (__m128i)__builtin_ia32_selectd_128(__U,
4180 (__v4si)_mm_rolv_epi32(__A, __B),
4181 (__v4si)_mm_setzero_si128());
4182}
4183
4184static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4185_mm256_rolv_epi32 (__m256i __A, __m256i __B)
4186{
4187 return (__m256i)__builtin_elementwise_fshl((__v8su)__A, (__v8su)__A, (__v8su)__B);
4188}
4189
4190static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4191_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4192{
4193 return (__m256i)__builtin_ia32_selectd_256(__U,
4194 (__v8si)_mm256_rolv_epi32(__A, __B),
4195 (__v8si)__W);
4196}
4197
4198static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4199_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4200{
4201 return (__m256i)__builtin_ia32_selectd_256(__U,
4202 (__v8si)_mm256_rolv_epi32(__A, __B),
4203 (__v8si)_mm256_setzero_si256());
4204}
4205
4206static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4207_mm_rolv_epi64 (__m128i __A, __m128i __B)
4208{
4209 return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B);
4210}
4211
4212static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4213_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4214{
4215 return (__m128i)__builtin_ia32_selectq_128(__U,
4216 (__v2di)_mm_rolv_epi64(__A, __B),
4217 (__v2di)__W);
4218}
4219
4220static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4221_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4222{
4223 return (__m128i)__builtin_ia32_selectq_128(__U,
4224 (__v2di)_mm_rolv_epi64(__A, __B),
4225 (__v2di)_mm_setzero_si128());
4226}
4227
4228static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4229_mm256_rolv_epi64 (__m256i __A, __m256i __B)
4230{
4231 return (__m256i)__builtin_elementwise_fshl((__v4du)__A, (__v4du)__A, (__v4du)__B);
4232}
4233
4234static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4235_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4236{
4237 return (__m256i)__builtin_ia32_selectq_256(__U,
4238 (__v4di)_mm256_rolv_epi64(__A, __B),
4239 (__v4di)__W);
4240}
4241
4242static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4243_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4244{
4245 return (__m256i)__builtin_ia32_selectq_256(__U,
4246 (__v4di)_mm256_rolv_epi64(__A, __B),
4247 (__v4di)_mm256_setzero_si256());
4248}
4249
4250#define _mm_ror_epi32(a, b) \
4251 ((__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b)))
4252
4253#define _mm_mask_ror_epi32(w, u, a, b) \
4254 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4255 (__v4si)_mm_ror_epi32((a), (b)), \
4256 (__v4si)(__m128i)(w)))
4257
4258#define _mm_maskz_ror_epi32(u, a, b) \
4259 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4260 (__v4si)_mm_ror_epi32((a), (b)), \
4261 (__v4si)_mm_setzero_si128()))
4262
4263#define _mm256_ror_epi32(a, b) \
4264 ((__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b)))
4265
4266#define _mm256_mask_ror_epi32(w, u, a, b) \
4267 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4268 (__v8si)_mm256_ror_epi32((a), (b)), \
4269 (__v8si)(__m256i)(w)))
4270
4271#define _mm256_maskz_ror_epi32(u, a, b) \
4272 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4273 (__v8si)_mm256_ror_epi32((a), (b)), \
4274 (__v8si)_mm256_setzero_si256()))
4275
4276#define _mm_ror_epi64(a, b) \
4277 ((__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b)))
4278
4279#define _mm_mask_ror_epi64(w, u, a, b) \
4280 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4281 (__v2di)_mm_ror_epi64((a), (b)), \
4282 (__v2di)(__m128i)(w)))
4283
4284#define _mm_maskz_ror_epi64(u, a, b) \
4285 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4286 (__v2di)_mm_ror_epi64((a), (b)), \
4287 (__v2di)_mm_setzero_si128()))
4288
4289#define _mm256_ror_epi64(a, b) \
4290 ((__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b)))
4291
4292#define _mm256_mask_ror_epi64(w, u, a, b) \
4293 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4294 (__v4di)_mm256_ror_epi64((a), (b)), \
4295 (__v4di)(__m256i)(w)))
4296
4297#define _mm256_maskz_ror_epi64(u, a, b) \
4298 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4299 (__v4di)_mm256_ror_epi64((a), (b)), \
4300 (__v4di)_mm256_setzero_si256()))
4301
4302static __inline__ __m128i __DEFAULT_FN_ATTRS128
4303_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4304{
4305 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4306 (__v4si)_mm_sll_epi32(__A, __B),
4307 (__v4si)__W);
4308}
4309
4310static __inline__ __m128i __DEFAULT_FN_ATTRS128
4311_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4312{
4313 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4314 (__v4si)_mm_sll_epi32(__A, __B),
4315 (__v4si)_mm_setzero_si128());
4316}
4317
4318static __inline__ __m256i __DEFAULT_FN_ATTRS256
4319_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4320{
4321 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4322 (__v8si)_mm256_sll_epi32(__A, __B),
4323 (__v8si)__W);
4324}
4325
4326static __inline__ __m256i __DEFAULT_FN_ATTRS256
4327_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4328{
4329 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4330 (__v8si)_mm256_sll_epi32(__A, __B),
4331 (__v8si)_mm256_setzero_si256());
4332}
4333
4334static __inline__ __m128i __DEFAULT_FN_ATTRS128
4335_mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4336{
4337 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4338 (__v4si)_mm_slli_epi32(__A, (int)__B),
4339 (__v4si)__W);
4340}
4341
4342static __inline__ __m128i __DEFAULT_FN_ATTRS128
4343_mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
4344{
4345 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4346 (__v4si)_mm_slli_epi32(__A, (int)__B),
4347 (__v4si)_mm_setzero_si128());
4348}
4349
4350static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4351_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A,
4352 unsigned int __B) {
4353 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4354 (__v8si)_mm256_slli_epi32(__A, (int)__B),
4355 (__v8si)__W);
4356}
4357
4358static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4359_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) {
4360 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4361 (__v8si)_mm256_slli_epi32(__A, (int)__B),
4362 (__v8si)_mm256_setzero_si256());
4363}
4364
4365static __inline__ __m128i __DEFAULT_FN_ATTRS128
4366_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4367{
4368 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4369 (__v2di)_mm_sll_epi64(__A, __B),
4370 (__v2di)__W);
4371}
4372
4373static __inline__ __m128i __DEFAULT_FN_ATTRS128
4374_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4375{
4376 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4377 (__v2di)_mm_sll_epi64(__A, __B),
4378 (__v2di)_mm_setzero_si128());
4379}
4380
4381static __inline__ __m256i __DEFAULT_FN_ATTRS256
4382_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4383{
4384 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4385 (__v4di)_mm256_sll_epi64(__A, __B),
4386 (__v4di)__W);
4387}
4388
4389static __inline__ __m256i __DEFAULT_FN_ATTRS256
4390_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4391{
4392 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4393 (__v4di)_mm256_sll_epi64(__A, __B),
4394 (__v4di)_mm256_setzero_si256());
4395}
4396
4397static __inline__ __m128i __DEFAULT_FN_ATTRS128
4398_mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4399{
4400 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4401 (__v2di)_mm_slli_epi64(__A, (int)__B),
4402 (__v2di)__W);
4403}
4404
4405static __inline__ __m128i __DEFAULT_FN_ATTRS128
4406_mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
4407{
4408 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4409 (__v2di)_mm_slli_epi64(__A, (int)__B),
4410 (__v2di)_mm_setzero_si128());
4411}
4412
4413static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4414_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A,
4415 unsigned int __B) {
4416 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4417 (__v4di)_mm256_slli_epi64(__A, (int)__B),
4418 (__v4di)__W);
4419}
4420
4421static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4422_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) {
4423 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4424 (__v4di)_mm256_slli_epi64(__A, (int)__B),
4425 (__v4di)_mm256_setzero_si256());
4426}
4427
4428static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4429_mm_rorv_epi32 (__m128i __A, __m128i __B)
4430{
4431 return (__m128i)__builtin_elementwise_fshr((__v4su)__A, (__v4su)__A, (__v4su)__B);
4432}
4433
4434static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4435_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4436{
4437 return (__m128i)__builtin_ia32_selectd_128(__U,
4438 (__v4si)_mm_rorv_epi32(__A, __B),
4439 (__v4si)__W);
4440}
4441
4442static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4443_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4444{
4445 return (__m128i)__builtin_ia32_selectd_128(__U,
4446 (__v4si)_mm_rorv_epi32(__A, __B),
4447 (__v4si)_mm_setzero_si128());
4448}
4449
4450static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4451_mm256_rorv_epi32 (__m256i __A, __m256i __B)
4452{
4453 return (__m256i)__builtin_elementwise_fshr((__v8su)__A, (__v8su)__A, (__v8su)__B);
4454}
4455
4456static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4457_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4458{
4459 return (__m256i)__builtin_ia32_selectd_256(__U,
4460 (__v8si)_mm256_rorv_epi32(__A, __B),
4461 (__v8si)__W);
4462}
4463
4464static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4465_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4466{
4467 return (__m256i)__builtin_ia32_selectd_256(__U,
4468 (__v8si)_mm256_rorv_epi32(__A, __B),
4469 (__v8si)_mm256_setzero_si256());
4470}
4471
4472static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4473_mm_rorv_epi64 (__m128i __A, __m128i __B)
4474{
4475 return (__m128i)__builtin_elementwise_fshr((__v2du)__A, (__v2du)__A, (__v2du)__B);
4476}
4477
4478static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4479_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4480{
4481 return (__m128i)__builtin_ia32_selectq_128(__U,
4482 (__v2di)_mm_rorv_epi64(__A, __B),
4483 (__v2di)__W);
4484}
4485
4486static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4487_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4488{
4489 return (__m128i)__builtin_ia32_selectq_128(__U,
4490 (__v2di)_mm_rorv_epi64(__A, __B),
4491 (__v2di)_mm_setzero_si128());
4492}
4493
4494static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4495_mm256_rorv_epi64 (__m256i __A, __m256i __B)
4496{
4497 return (__m256i)__builtin_elementwise_fshr((__v4du)__A, (__v4du)__A, (__v4du)__B);
4498}
4499
4500static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4501_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4502{
4503 return (__m256i)__builtin_ia32_selectq_256(__U,
4504 (__v4di)_mm256_rorv_epi64(__A, __B),
4505 (__v4di)__W);
4506}
4507
4508static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4509_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4510{
4511 return (__m256i)__builtin_ia32_selectq_256(__U,
4512 (__v4di)_mm256_rorv_epi64(__A, __B),
4513 (__v4di)_mm256_setzero_si256());
4514}
4515
4516static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4517_mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4518{
4519 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4520 (__v2di)_mm_sllv_epi64(__X, __Y),
4521 (__v2di)__W);
4522}
4523
4524static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4525_mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4526{
4527 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4528 (__v2di)_mm_sllv_epi64(__X, __Y),
4529 (__v2di)_mm_setzero_si128());
4530}
4531
4532static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4533_mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4534{
4535 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4536 (__v4di)_mm256_sllv_epi64(__X, __Y),
4537 (__v4di)__W);
4538}
4539
4540static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4541_mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4542{
4543 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4544 (__v4di)_mm256_sllv_epi64(__X, __Y),
4545 (__v4di)_mm256_setzero_si256());
4546}
4547
4548static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4549_mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4550{
4551 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4552 (__v4si)_mm_sllv_epi32(__X, __Y),
4553 (__v4si)__W);
4554}
4555
4556static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4557_mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4558{
4559 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4560 (__v4si)_mm_sllv_epi32(__X, __Y),
4561 (__v4si)_mm_setzero_si128());
4562}
4563
4564static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4565_mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4566{
4567 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4568 (__v8si)_mm256_sllv_epi32(__X, __Y),
4569 (__v8si)__W);
4570}
4571
4572static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4573_mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4574{
4575 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4576 (__v8si)_mm256_sllv_epi32(__X, __Y),
4577 (__v8si)_mm256_setzero_si256());
4578}
4579
4580static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4581_mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4582{
4583 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4584 (__v2di)_mm_srlv_epi64(__X, __Y),
4585 (__v2di)__W);
4586}
4587
4588static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4589_mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4590{
4591 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4592 (__v2di)_mm_srlv_epi64(__X, __Y),
4593 (__v2di)_mm_setzero_si128());
4594}
4595
4596static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4597_mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4598{
4599 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4600 (__v4di)_mm256_srlv_epi64(__X, __Y),
4601 (__v4di)__W);
4602}
4603
4604static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4605_mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4606{
4607 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4608 (__v4di)_mm256_srlv_epi64(__X, __Y),
4609 (__v4di)_mm256_setzero_si256());
4610}
4611
4612static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4613_mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4614{
4615 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4616 (__v4si)_mm_srlv_epi32(__X, __Y),
4617 (__v4si)__W);
4618}
4619
4620static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4621_mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4622{
4623 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4624 (__v4si)_mm_srlv_epi32(__X, __Y),
4625 (__v4si)_mm_setzero_si128());
4626}
4627
4628static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4629_mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4630{
4631 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4632 (__v8si)_mm256_srlv_epi32(__X, __Y),
4633 (__v8si)__W);
4634}
4635
4636static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4637_mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4638{
4639 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4640 (__v8si)_mm256_srlv_epi32(__X, __Y),
4641 (__v8si)_mm256_setzero_si256());
4642}
4643
4644static __inline__ __m128i __DEFAULT_FN_ATTRS128
4645_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4646{
4647 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4648 (__v4si)_mm_srl_epi32(__A, __B),
4649 (__v4si)__W);
4650}
4651
4652static __inline__ __m128i __DEFAULT_FN_ATTRS128
4653_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4654{
4655 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4656 (__v4si)_mm_srl_epi32(__A, __B),
4657 (__v4si)_mm_setzero_si128());
4658}
4659
4660static __inline__ __m256i __DEFAULT_FN_ATTRS256
4661_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4662{
4663 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4664 (__v8si)_mm256_srl_epi32(__A, __B),
4665 (__v8si)__W);
4666}
4667
4668static __inline__ __m256i __DEFAULT_FN_ATTRS256
4669_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4670{
4671 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4672 (__v8si)_mm256_srl_epi32(__A, __B),
4673 (__v8si)_mm256_setzero_si256());
4674}
4675
4676static __inline__ __m128i __DEFAULT_FN_ATTRS128
4677_mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4678{
4679 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4680 (__v4si)_mm_srli_epi32(__A, (int)__B),
4681 (__v4si)__W);
4682}
4683
4684static __inline__ __m128i __DEFAULT_FN_ATTRS128
4685_mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
4686{
4687 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4688 (__v4si)_mm_srli_epi32(__A, (int)__B),
4689 (__v4si)_mm_setzero_si128());
4690}
4691
4692static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4693_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A,
4694 unsigned int __B) {
4695 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4696 (__v8si)_mm256_srli_epi32(__A, (int)__B),
4697 (__v8si)__W);
4698}
4699
4700static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4701_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) {
4702 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4703 (__v8si)_mm256_srli_epi32(__A, (int)__B),
4704 (__v8si)_mm256_setzero_si256());
4705}
4706
4707static __inline__ __m128i __DEFAULT_FN_ATTRS128
4708_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4709{
4710 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4711 (__v2di)_mm_srl_epi64(__A, __B),
4712 (__v2di)__W);
4713}
4714
4715static __inline__ __m128i __DEFAULT_FN_ATTRS128
4716_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4717{
4718 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4719 (__v2di)_mm_srl_epi64(__A, __B),
4720 (__v2di)_mm_setzero_si128());
4721}
4722
4723static __inline__ __m256i __DEFAULT_FN_ATTRS256
4724_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4725{
4726 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4727 (__v4di)_mm256_srl_epi64(__A, __B),
4728 (__v4di)__W);
4729}
4730
4731static __inline__ __m256i __DEFAULT_FN_ATTRS256
4732_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4733{
4734 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4735 (__v4di)_mm256_srl_epi64(__A, __B),
4736 (__v4di)_mm256_setzero_si256());
4737}
4738
4739static __inline__ __m128i __DEFAULT_FN_ATTRS128
4740_mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4741{
4742 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4743 (__v2di)_mm_srli_epi64(__A, (int)__B),
4744 (__v2di)__W);
4745}
4746
4747static __inline__ __m128i __DEFAULT_FN_ATTRS128
4748_mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
4749{
4750 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4751 (__v2di)_mm_srli_epi64(__A, (int)__B),
4752 (__v2di)_mm_setzero_si128());
4753}
4754
4755static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4756_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A,
4757 unsigned int __B) {
4758 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4759 (__v4di)_mm256_srli_epi64(__A, (int)__B),
4760 (__v4di)__W);
4761}
4762
4763static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4764_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) {
4765 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4766 (__v4di)_mm256_srli_epi64(__A, (int)__B),
4767 (__v4di)_mm256_setzero_si256());
4768}
4769
4770static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4771_mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4772{
4773 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4774 (__v4si)_mm_srav_epi32(__X, __Y),
4775 (__v4si)__W);
4776}
4777
4778static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4779_mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4780{
4781 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4782 (__v4si)_mm_srav_epi32(__X, __Y),
4783 (__v4si)_mm_setzero_si128());
4784}
4785
4786static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4787_mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4788{
4789 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4790 (__v8si)_mm256_srav_epi32(__X, __Y),
4791 (__v8si)__W);
4792}
4793
4794static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4795_mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4796{
4797 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4798 (__v8si)_mm256_srav_epi32(__X, __Y),
4799 (__v8si)_mm256_setzero_si256());
4800}
4801
4802static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4803_mm_srav_epi64(__m128i __X, __m128i __Y)
4804{
4805 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
4806}
4807
4808static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4809_mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4810{
4811 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4812 (__v2di)_mm_srav_epi64(__X, __Y),
4813 (__v2di)__W);
4814}
4815
4816static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4817_mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4818{
4819 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4820 (__v2di)_mm_srav_epi64(__X, __Y),
4821 (__v2di)_mm_setzero_si128());
4822}
4823
4824static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4825_mm256_srav_epi64(__m256i __X, __m256i __Y)
4826{
4827 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
4828}
4829
4830static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4831_mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4832{
4833 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4834 (__v4di)_mm256_srav_epi64(__X, __Y),
4835 (__v4di)__W);
4836}
4837
4838static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4839_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
4840{
4841 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4842 (__v4di)_mm256_srav_epi64(__X, __Y),
4843 (__v4di)_mm256_setzero_si256());
4844}
4845
4846static __inline__ __m128i __DEFAULT_FN_ATTRS128
4847_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
4848{
4849 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
4850 (__v4si) __A,
4851 (__v4si) __W);
4852}
4853
4854static __inline__ __m128i __DEFAULT_FN_ATTRS128
4856{
4857 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
4858 (__v4si) __A,
4859 (__v4si) _mm_setzero_si128 ());
4860}
4861
4862
4863static __inline__ __m256i __DEFAULT_FN_ATTRS256
4864_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
4865{
4866 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
4867 (__v8si) __A,
4868 (__v8si) __W);
4869}
4870
4871static __inline__ __m256i __DEFAULT_FN_ATTRS256
4873{
4874 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
4875 (__v8si) __A,
4876 (__v8si) _mm256_setzero_si256 ());
4877}
4878
4879static __inline __m128i __DEFAULT_FN_ATTRS128
4880_mm_load_epi32 (void const *__P)
4881{
4882 return *(const __m128i *) __P;
4883}
4884
4885static __inline__ __m128i __DEFAULT_FN_ATTRS128
4886_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
4887{
4888 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
4889 (__v4si) __W,
4890 (__mmask8)
4891 __U);
4892}
4893
4894static __inline__ __m128i __DEFAULT_FN_ATTRS128
4896{
4897 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
4898 (__v4si)
4900 (__mmask8)
4901 __U);
4902}
4903
4904static __inline __m256i __DEFAULT_FN_ATTRS256
4906{
4907 return *(const __m256i *) __P;
4908}
4909
4910static __inline__ __m256i __DEFAULT_FN_ATTRS256
4911_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
4912{
4913 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
4914 (__v8si) __W,
4915 (__mmask8)
4916 __U);
4917}
4918
4919static __inline__ __m256i __DEFAULT_FN_ATTRS256
4921{
4922 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
4923 (__v8si)
4925 (__mmask8)
4926 __U);
4927}
4928
4929static __inline void __DEFAULT_FN_ATTRS128
4930_mm_store_epi32 (void *__P, __m128i __A)
4931{
4932 *(__m128i *) __P = __A;
4933}
4934
4935static __inline__ void __DEFAULT_FN_ATTRS128
4936_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
4937{
4938 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
4939 (__v4si) __A,
4940 (__mmask8) __U);
4941}
4942
4943static __inline void __DEFAULT_FN_ATTRS256
4944_mm256_store_epi32 (void *__P, __m256i __A)
4945{
4946 *(__m256i *) __P = __A;
4947}
4948
4949static __inline__ void __DEFAULT_FN_ATTRS256
4950_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
4951{
4952 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
4953 (__v8si) __A,
4954 (__mmask8) __U);
4955}
4956
4957static __inline__ __m128i __DEFAULT_FN_ATTRS128
4958_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
4959{
4960 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
4961 (__v2di) __A,
4962 (__v2di) __W);
4963}
4964
4965static __inline__ __m128i __DEFAULT_FN_ATTRS128
4967{
4968 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
4969 (__v2di) __A,
4970 (__v2di) _mm_setzero_si128 ());
4971}
4972
4973static __inline__ __m256i __DEFAULT_FN_ATTRS256
4974_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
4975{
4976 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
4977 (__v4di) __A,
4978 (__v4di) __W);
4979}
4980
4981static __inline__ __m256i __DEFAULT_FN_ATTRS256
4983{
4984 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
4985 (__v4di) __A,
4986 (__v4di) _mm256_setzero_si256 ());
4987}
4988
4989static __inline __m128i __DEFAULT_FN_ATTRS128
4990_mm_load_epi64 (void const *__P)
4991{
4992 return *(const __m128i *) __P;
4993}
4994
4995static __inline__ __m128i __DEFAULT_FN_ATTRS128
4996_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
4997{
4998 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
4999 (__v2di) __W,
5000 (__mmask8)
5001 __U);
5002}
5003
5004static __inline__ __m128i __DEFAULT_FN_ATTRS128
5006{
5007 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
5008 (__v2di)
5010 (__mmask8)
5011 __U);
5012}
5013
5014static __inline __m256i __DEFAULT_FN_ATTRS256
5016{
5017 return *(const __m256i *) __P;
5018}
5019
5020static __inline__ __m256i __DEFAULT_FN_ATTRS256
5021_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5022{
5023 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
5024 (__v4di) __W,
5025 (__mmask8)
5026 __U);
5027}
5028
5029static __inline__ __m256i __DEFAULT_FN_ATTRS256
5031{
5032 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
5033 (__v4di)
5035 (__mmask8)
5036 __U);
5037}
5038
5039static __inline void __DEFAULT_FN_ATTRS128
5040_mm_store_epi64 (void *__P, __m128i __A)
5041{
5042 *(__m128i *) __P = __A;
5043}
5044
5045static __inline__ void __DEFAULT_FN_ATTRS128
5046_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
5047{
5048 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5049 (__v2di) __A,
5050 (__mmask8) __U);
5051}
5052
5053static __inline void __DEFAULT_FN_ATTRS256
5054_mm256_store_epi64 (void *__P, __m256i __A)
5055{
5056 *(__m256i *) __P = __A;
5057}
5058
5059static __inline__ void __DEFAULT_FN_ATTRS256
5060_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
5061{
5062 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5063 (__v4di) __A,
5064 (__mmask8) __U);
5065}
5066
5067static __inline__ __m128d __DEFAULT_FN_ATTRS128
5068_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5069{
5070 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5071 (__v2df)_mm_movedup_pd(__A),
5072 (__v2df)__W);
5073}
5074
5075static __inline__ __m128d __DEFAULT_FN_ATTRS128
5077{
5078 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5079 (__v2df)_mm_movedup_pd(__A),
5080 (__v2df)_mm_setzero_pd());
5081}
5082
5083static __inline__ __m256d __DEFAULT_FN_ATTRS256
5084_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5085{
5086 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5087 (__v4df)_mm256_movedup_pd(__A),
5088 (__v4df)__W);
5089}
5090
5091static __inline__ __m256d __DEFAULT_FN_ATTRS256
5093{
5094 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5095 (__v4df)_mm256_movedup_pd(__A),
5096 (__v4df)_mm256_setzero_pd());
5097}
5098
5099static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
5100_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) {
5101 return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si)_mm_set1_epi32(__A),
5102 (__v4si)__O);
5103}
5104
5105static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
5107 return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si)_mm_set1_epi32(__A),
5108 (__v4si)_mm_setzero_si128());
5109}
5110
5111static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
5112_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) {
5113 return (__m256i)__builtin_ia32_selectd_256(
5114 __M, (__v8si)_mm256_set1_epi32(__A), (__v8si)__O);
5115}
5116
5117static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
5119 return (__m256i)__builtin_ia32_selectd_256(
5120 __M, (__v8si)_mm256_set1_epi32(__A), (__v8si)_mm256_setzero_si256());
5121}
5122
5123static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
5124_mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A) {
5125 return (__m128i) __builtin_ia32_selectq_128(__M,
5126 (__v2di) _mm_set1_epi64x(__A),
5127 (__v2di) __O);
5128}
5129
5130static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
5131_mm_maskz_set1_epi64(__mmask8 __M, long long __A) {
5132 return (__m128i) __builtin_ia32_selectq_128(__M,
5133 (__v2di) _mm_set1_epi64x(__A),
5134 (__v2di) _mm_setzero_si128());
5135}
5136
5137static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
5138_mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A) {
5139 return (__m256i) __builtin_ia32_selectq_256(__M,
5140 (__v4di) _mm256_set1_epi64x(__A),
5141 (__v4di) __O) ;
5142}
5143
5144static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
5146 return (__m256i)__builtin_ia32_selectq_256(
5147 __M, (__v4di)_mm256_set1_epi64x(__A), (__v4di)_mm256_setzero_si256());
5148}
5149
5150#define _mm_fixupimm_pd(A, B, C, imm) \
5151 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5152 (__v2df)(__m128d)(B), \
5153 (__v2di)(__m128i)(C), (int)(imm), \
5154 (__mmask8)-1))
5155
5156#define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
5157 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5158 (__v2df)(__m128d)(B), \
5159 (__v2di)(__m128i)(C), (int)(imm), \
5160 (__mmask8)(U)))
5161
5162#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
5163 ((__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5164 (__v2df)(__m128d)(B), \
5165 (__v2di)(__m128i)(C), \
5166 (int)(imm), (__mmask8)(U)))
5167
5168#define _mm256_fixupimm_pd(A, B, C, imm) \
5169 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5170 (__v4df)(__m256d)(B), \
5171 (__v4di)(__m256i)(C), (int)(imm), \
5172 (__mmask8)-1))
5173
5174#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
5175 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5176 (__v4df)(__m256d)(B), \
5177 (__v4di)(__m256i)(C), (int)(imm), \
5178 (__mmask8)(U)))
5179
5180#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
5181 ((__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5182 (__v4df)(__m256d)(B), \
5183 (__v4di)(__m256i)(C), \
5184 (int)(imm), (__mmask8)(U)))
5185
5186#define _mm_fixupimm_ps(A, B, C, imm) \
5187 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5188 (__v4sf)(__m128)(B), \
5189 (__v4si)(__m128i)(C), (int)(imm), \
5190 (__mmask8)-1))
5191
5192#define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
5193 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5194 (__v4sf)(__m128)(B), \
5195 (__v4si)(__m128i)(C), (int)(imm), \
5196 (__mmask8)(U)))
5197
5198#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
5199 ((__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5200 (__v4sf)(__m128)(B), \
5201 (__v4si)(__m128i)(C), (int)(imm), \
5202 (__mmask8)(U)))
5203
5204#define _mm256_fixupimm_ps(A, B, C, imm) \
5205 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5206 (__v8sf)(__m256)(B), \
5207 (__v8si)(__m256i)(C), (int)(imm), \
5208 (__mmask8)-1))
5209
5210#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
5211 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5212 (__v8sf)(__m256)(B), \
5213 (__v8si)(__m256i)(C), (int)(imm), \
5214 (__mmask8)(U)))
5215
5216#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
5217 ((__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5218 (__v8sf)(__m256)(B), \
5219 (__v8si)(__m256i)(C), (int)(imm), \
5220 (__mmask8)(U)))
5221
5222static __inline__ __m128d __DEFAULT_FN_ATTRS128
5223_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
5224{
5225 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
5226 (__v2df) __W,
5227 (__mmask8) __U);
5228}
5229
5230static __inline__ __m128d __DEFAULT_FN_ATTRS128
5232{
5233 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
5234 (__v2df)
5235 _mm_setzero_pd (),
5236 (__mmask8) __U);
5237}
5238
5239static __inline__ __m256d __DEFAULT_FN_ATTRS256
5240_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
5241{
5242 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
5243 (__v4df) __W,
5244 (__mmask8) __U);
5245}
5246
5247static __inline__ __m256d __DEFAULT_FN_ATTRS256
5249{
5250 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
5251 (__v4df)
5253 (__mmask8) __U);
5254}
5255
5256static __inline__ __m128 __DEFAULT_FN_ATTRS128
5257_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
5258{
5259 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
5260 (__v4sf) __W,
5261 (__mmask8) __U);
5262}
5263
5264static __inline__ __m128 __DEFAULT_FN_ATTRS128
5266{
5267 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
5268 (__v4sf)
5269 _mm_setzero_ps (),
5270 (__mmask8) __U);
5271}
5272
5273static __inline__ __m256 __DEFAULT_FN_ATTRS256
5274_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
5275{
5276 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
5277 (__v8sf) __W,
5278 (__mmask8) __U);
5279}
5280
5281static __inline__ __m256 __DEFAULT_FN_ATTRS256
5283{
5284 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
5285 (__v8sf)
5287 (__mmask8) __U);
5288}
5289
5290static __inline __m128i __DEFAULT_FN_ATTRS128
5292{
5293 struct __loadu_epi64 {
5294 __m128i_u __v;
5295 } __attribute__((__packed__, __may_alias__));
5296 return ((const struct __loadu_epi64*)__P)->__v;
5297}
5298
5299static __inline__ __m128i __DEFAULT_FN_ATTRS128
5300_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5301{
5302 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
5303 (__v2di) __W,
5304 (__mmask8) __U);
5305}
5306
5307static __inline__ __m128i __DEFAULT_FN_ATTRS128
5309{
5310 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
5311 (__v2di)
5313 (__mmask8) __U);
5314}
5315
5316static __inline __m256i __DEFAULT_FN_ATTRS256
5318{
5319 struct __loadu_epi64 {
5320 __m256i_u __v;
5321 } __attribute__((__packed__, __may_alias__));
5322 return ((const struct __loadu_epi64*)__P)->__v;
5323}
5324
5325static __inline__ __m256i __DEFAULT_FN_ATTRS256
5326_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5327{
5328 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
5329 (__v4di) __W,
5330 (__mmask8) __U);
5331}
5332
5333static __inline__ __m256i __DEFAULT_FN_ATTRS256
5335{
5336 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
5337 (__v4di)
5339 (__mmask8) __U);
5340}
5341
5342static __inline __m128i __DEFAULT_FN_ATTRS128
5344{
5345 struct __loadu_epi32 {
5346 __m128i_u __v;
5347 } __attribute__((__packed__, __may_alias__));
5348 return ((const struct __loadu_epi32*)__P)->__v;
5349}
5350
5351static __inline__ __m128i __DEFAULT_FN_ATTRS128
5352_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5353{
5354 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
5355 (__v4si) __W,
5356 (__mmask8) __U);
5357}
5358
5359static __inline__ __m128i __DEFAULT_FN_ATTRS128
5361{
5362 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
5363 (__v4si)
5365 (__mmask8) __U);
5366}
5367
5368static __inline __m256i __DEFAULT_FN_ATTRS256
5370{
5371 struct __loadu_epi32 {
5372 __m256i_u __v;
5373 } __attribute__((__packed__, __may_alias__));
5374 return ((const struct __loadu_epi32*)__P)->__v;
5375}
5376
5377static __inline__ __m256i __DEFAULT_FN_ATTRS256
5378_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5379{
5380 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
5381 (__v8si) __W,
5382 (__mmask8) __U);
5383}
5384
5385static __inline__ __m256i __DEFAULT_FN_ATTRS256
5387{
5388 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
5389 (__v8si)
5391 (__mmask8) __U);
5392}
5393
5394static __inline__ __m128d __DEFAULT_FN_ATTRS128
5395_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
5396{
5397 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
5398 (__v2df) __W,
5399 (__mmask8) __U);
5400}
5401
5402static __inline__ __m128d __DEFAULT_FN_ATTRS128
5404{
5405 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
5406 (__v2df)
5407 _mm_setzero_pd (),
5408 (__mmask8) __U);
5409}
5410
5411static __inline__ __m256d __DEFAULT_FN_ATTRS256
5412_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5413{
5414 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
5415 (__v4df) __W,
5416 (__mmask8) __U);
5417}
5418
5419static __inline__ __m256d __DEFAULT_FN_ATTRS256
5421{
5422 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
5423 (__v4df)
5425 (__mmask8) __U);
5426}
5427
5428static __inline__ __m128 __DEFAULT_FN_ATTRS128
5429_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
5430{
5431 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
5432 (__v4sf) __W,
5433 (__mmask8) __U);
5434}
5435
5436static __inline__ __m128 __DEFAULT_FN_ATTRS128
5438{
5439 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
5440 (__v4sf)
5441 _mm_setzero_ps (),
5442 (__mmask8) __U);
5443}
5444
5445static __inline__ __m256 __DEFAULT_FN_ATTRS256
5446_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
5447{
5448 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
5449 (__v8sf) __W,
5450 (__mmask8) __U);
5451}
5452
5453static __inline__ __m256 __DEFAULT_FN_ATTRS256
5455{
5456 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
5457 (__v8sf)
5459 (__mmask8) __U);
5460}
5461
5462static __inline__ void __DEFAULT_FN_ATTRS128
5463_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
5464{
5465 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
5466 (__v2df) __A,
5467 (__mmask8) __U);
5468}
5469
5470static __inline__ void __DEFAULT_FN_ATTRS256
5471_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
5472{
5473 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
5474 (__v4df) __A,
5475 (__mmask8) __U);
5476}
5477
5478static __inline__ void __DEFAULT_FN_ATTRS128
5479_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
5480{
5481 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
5482 (__v4sf) __A,
5483 (__mmask8) __U);
5484}
5485
5486static __inline__ void __DEFAULT_FN_ATTRS256
5487_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
5488{
5489 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
5490 (__v8sf) __A,
5491 (__mmask8) __U);
5492}
5493
5494static __inline void __DEFAULT_FN_ATTRS128
5495_mm_storeu_epi64 (void *__P, __m128i __A)
5496{
5497 struct __storeu_epi64 {
5498 __m128i_u __v;
5499 } __attribute__((__packed__, __may_alias__));
5500 ((struct __storeu_epi64*)__P)->__v = __A;
5501}
5502
5503static __inline__ void __DEFAULT_FN_ATTRS128
5504_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5505{
5506 __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
5507 (__v2di) __A,
5508 (__mmask8) __U);
5509}
5510
5511static __inline void __DEFAULT_FN_ATTRS256
5512_mm256_storeu_epi64 (void *__P, __m256i __A)
5513{
5514 struct __storeu_epi64 {
5515 __m256i_u __v;
5516 } __attribute__((__packed__, __may_alias__));
5517 ((struct __storeu_epi64*)__P)->__v = __A;
5518}
5519
5520static __inline__ void __DEFAULT_FN_ATTRS256
5521_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5522{
5523 __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
5524 (__v4di) __A,
5525 (__mmask8) __U);
5526}
5527
5528static __inline void __DEFAULT_FN_ATTRS128
5529_mm_storeu_epi32 (void *__P, __m128i __A)
5530{
5531 struct __storeu_epi32 {
5532 __m128i_u __v;
5533 } __attribute__((__packed__, __may_alias__));
5534 ((struct __storeu_epi32*)__P)->__v = __A;
5535}
5536
5537static __inline__ void __DEFAULT_FN_ATTRS128
5538_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5539{
5540 __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
5541 (__v4si) __A,
5542 (__mmask8) __U);
5543}
5544
5545static __inline void __DEFAULT_FN_ATTRS256
5546_mm256_storeu_epi32 (void *__P, __m256i __A)
5547{
5548 struct __storeu_epi32 {
5549 __m256i_u __v;
5550 } __attribute__((__packed__, __may_alias__));
5551 ((struct __storeu_epi32*)__P)->__v = __A;
5552}
5553
5554static __inline__ void __DEFAULT_FN_ATTRS256
5555_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5556{
5557 __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
5558 (__v8si) __A,
5559 (__mmask8) __U);
5560}
5561
5562static __inline__ void __DEFAULT_FN_ATTRS128
5563_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
5564{
5565 __builtin_ia32_storeupd128_mask ((__v2df *) __P,
5566 (__v2df) __A,
5567 (__mmask8) __U);
5568}
5569
5570static __inline__ void __DEFAULT_FN_ATTRS256
5571_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
5572{
5573 __builtin_ia32_storeupd256_mask ((__v4df *) __P,
5574 (__v4df) __A,
5575 (__mmask8) __U);
5576}
5577
5578static __inline__ void __DEFAULT_FN_ATTRS128
5579_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
5580{
5581 __builtin_ia32_storeups128_mask ((__v4sf *) __P,
5582 (__v4sf) __A,
5583 (__mmask8) __U);
5584}
5585
5586static __inline__ void __DEFAULT_FN_ATTRS256
5587_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
5588{
5589 __builtin_ia32_storeups256_mask ((__v8sf *) __P,
5590 (__v8sf) __A,
5591 (__mmask8) __U);
5592}
5593
5594static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
5595_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
5596 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5597 (__v2df)_mm_unpackhi_pd(__A, __B),
5598 (__v2df)__W);
5599}
5600
5601static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
5602_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) {
5603 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5604 (__v2df)_mm_unpackhi_pd(__A, __B),
5605 (__v2df)_mm_setzero_pd());
5606}
5607
5608static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
5609_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
5610 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5611 (__v4df)_mm256_unpackhi_pd(__A, __B),
5612 (__v4df)__W);
5613}
5614
5615static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
5616_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) {
5617 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5618 (__v4df)_mm256_unpackhi_pd(__A, __B),
5619 (__v4df)_mm256_setzero_pd());
5620}
5621
5622static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
5623_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
5624 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5625 (__v4sf)_mm_unpackhi_ps(__A, __B),
5626 (__v4sf)__W);
5627}
5628
5629static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
5630_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) {
5631 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5632 (__v4sf)_mm_unpackhi_ps(__A, __B),
5633 (__v4sf)_mm_setzero_ps());
5634}
5635
5636static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
5637_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
5638 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5639 (__v8sf)_mm256_unpackhi_ps(__A, __B),
5640 (__v8sf)__W);
5641}
5642
5643static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
5644_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) {
5645 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5646 (__v8sf)_mm256_unpackhi_ps(__A, __B),
5647 (__v8sf)_mm256_setzero_ps());
5648}
5649
5650static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
5651_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
5652 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5653 (__v2df)_mm_unpacklo_pd(__A, __B),
5654 (__v2df)__W);
5655}
5656
5657static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
5658_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) {
5659 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5660 (__v2df)_mm_unpacklo_pd(__A, __B),
5661 (__v2df)_mm_setzero_pd());
5662}
5663
5664static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
5665_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
5666 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5667 (__v4df)_mm256_unpacklo_pd(__A, __B),
5668 (__v4df)__W);
5669}
5670
5671static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
5672_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) {
5673 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5674 (__v4df)_mm256_unpacklo_pd(__A, __B),
5675 (__v4df)_mm256_setzero_pd());
5676}
5677
5678static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
5679_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
5680 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5681 (__v4sf)_mm_unpacklo_ps(__A, __B),
5682 (__v4sf)__W);
5683}
5684
5685static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
5686_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) {
5687 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5688 (__v4sf)_mm_unpacklo_ps(__A, __B),
5689 (__v4sf)_mm_setzero_ps());
5690}
5691
5692static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
5693_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
5694 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5695 (__v8sf)_mm256_unpacklo_ps(__A, __B),
5696 (__v8sf)__W);
5697}
5698
5699static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
5700_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) {
5701 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5702 (__v8sf)_mm256_unpacklo_ps(__A, __B),
5703 (__v8sf)_mm256_setzero_ps());
5704}
5705
5706static __inline__ __m128d __DEFAULT_FN_ATTRS128
5707_mm_rcp14_pd (__m128d __A)
5708{
5709 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5710 (__v2df)
5711 _mm_setzero_pd (),
5712 (__mmask8) -1);
5713}
5714
5715static __inline__ __m128d __DEFAULT_FN_ATTRS128
5716_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
5717{
5718 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5719 (__v2df) __W,
5720 (__mmask8) __U);
5721}
5722
5723static __inline__ __m128d __DEFAULT_FN_ATTRS128
5725{
5726 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5727 (__v2df)
5728 _mm_setzero_pd (),
5729 (__mmask8) __U);
5730}
5731
5732static __inline__ __m256d __DEFAULT_FN_ATTRS256
5733_mm256_rcp14_pd (__m256d __A)
5734{
5735 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5736 (__v4df)
5738 (__mmask8) -1);
5739}
5740
5741static __inline__ __m256d __DEFAULT_FN_ATTRS256
5742_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
5743{
5744 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5745 (__v4df) __W,
5746 (__mmask8) __U);
5747}
5748
5749static __inline__ __m256d __DEFAULT_FN_ATTRS256
5751{
5752 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5753 (__v4df)
5755 (__mmask8) __U);
5756}
5757
5758static __inline__ __m128 __DEFAULT_FN_ATTRS128
5759_mm_rcp14_ps (__m128 __A)
5760{
5761 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5762 (__v4sf)
5763 _mm_setzero_ps (),
5764 (__mmask8) -1);
5765}
5766
5767static __inline__ __m128 __DEFAULT_FN_ATTRS128
5768_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
5769{
5770 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5771 (__v4sf) __W,
5772 (__mmask8) __U);
5773}
5774
5775static __inline__ __m128 __DEFAULT_FN_ATTRS128
5777{
5778 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5779 (__v4sf)
5780 _mm_setzero_ps (),
5781 (__mmask8) __U);
5782}
5783
5784static __inline__ __m256 __DEFAULT_FN_ATTRS256
5786{
5787 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
5788 (__v8sf)
5790 (__mmask8) -1);
5791}
5792
5793static __inline__ __m256 __DEFAULT_FN_ATTRS256
5794_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
5795{
5796 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
5797 (__v8sf) __W,
5798 (__mmask8) __U);
5799}
5800
5801static __inline__ __m256 __DEFAULT_FN_ATTRS256
5803{
5804 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
5805 (__v8sf)
5807 (__mmask8) __U);
5808}
5809
5810#define _mm_mask_permute_pd(W, U, X, C) \
5811 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
5812 (__v2df)_mm_permute_pd((X), (C)), \
5813 (__v2df)(__m128d)(W)))
5814
5815#define _mm_maskz_permute_pd(U, X, C) \
5816 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
5817 (__v2df)_mm_permute_pd((X), (C)), \
5818 (__v2df)_mm_setzero_pd()))
5819
5820#define _mm256_mask_permute_pd(W, U, X, C) \
5821 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
5822 (__v4df)_mm256_permute_pd((X), (C)), \
5823 (__v4df)(__m256d)(W)))
5824
5825#define _mm256_maskz_permute_pd(U, X, C) \
5826 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
5827 (__v4df)_mm256_permute_pd((X), (C)), \
5828 (__v4df)_mm256_setzero_pd()))
5829
5830#define _mm_mask_permute_ps(W, U, X, C) \
5831 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
5832 (__v4sf)_mm_permute_ps((X), (C)), \
5833 (__v4sf)(__m128)(W)))
5834
5835#define _mm_maskz_permute_ps(U, X, C) \
5836 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
5837 (__v4sf)_mm_permute_ps((X), (C)), \
5838 (__v4sf)_mm_setzero_ps()))
5839
5840#define _mm256_mask_permute_ps(W, U, X, C) \
5841 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
5842 (__v8sf)_mm256_permute_ps((X), (C)), \
5843 (__v8sf)(__m256)(W)))
5844
5845#define _mm256_maskz_permute_ps(U, X, C) \
5846 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
5847 (__v8sf)_mm256_permute_ps((X), (C)), \
5848 (__v8sf)_mm256_setzero_ps()))
5849
5850static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
5851_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) {
5852 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5853 (__v2df)_mm_permutevar_pd(__A, __C),
5854 (__v2df)__W);
5855}
5856
5857static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
5858_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) {
5859 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5860 (__v2df)_mm_permutevar_pd(__A, __C),
5861 (__v2df)_mm_setzero_pd());
5862}
5863
5864static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
5865_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) {
5866 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5867 (__v4df)_mm256_permutevar_pd(__A, __C),
5868 (__v4df)__W);
5869}
5870
5871static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
5872_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) {
5873 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5874 (__v4df)_mm256_permutevar_pd(__A, __C),
5875 (__v4df)_mm256_setzero_pd());
5876}
5877
5878static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
5879_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) {
5880 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5881 (__v4sf)_mm_permutevar_ps(__A, __C),
5882 (__v4sf)__W);
5883}
5884
5885static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
5886_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) {
5887 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5888 (__v4sf)_mm_permutevar_ps(__A, __C),
5889 (__v4sf)_mm_setzero_ps());
5890}
5891
5892static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
5893_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) {
5894 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5895 (__v8sf)_mm256_permutevar_ps(__A, __C),
5896 (__v8sf)__W);
5897}
5898
5899static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
5900_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) {
5901 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5902 (__v8sf)_mm256_permutevar_ps(__A, __C),
5903 (__v8sf)_mm256_setzero_ps());
5904}
5905
5906static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5907_mm_test_epi32_mask (__m128i __A, __m128i __B)
5908{
5910}
5911
5912static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5913_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5914{
5915 return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B),
5917}
5918
5919static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5920_mm256_test_epi32_mask (__m256i __A, __m256i __B)
5921{
5922 return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B),
5924}
5925
5926static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5927_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5928{
5929 return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
5931}
5932
5933static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5934_mm_test_epi64_mask (__m128i __A, __m128i __B)
5935{
5937}
5938
5939static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5940_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5941{
5942 return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B),
5944}
5945
5946static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5947_mm256_test_epi64_mask (__m256i __A, __m256i __B)
5948{
5949 return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B),
5951}
5952
5953static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5954_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5955{
5956 return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
5958}
5959
5960static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5961_mm_testn_epi32_mask (__m128i __A, __m128i __B)
5962{
5964}
5965
5966static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5967_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5968{
5969 return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B),
5971}
5972
5973static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5974_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
5975{
5976 return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B),
5978}
5979
5980static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5981_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5982{
5983 return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
5985}
5986
5987static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5988_mm_testn_epi64_mask (__m128i __A, __m128i __B)
5989{
5991}
5992
5993static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5994_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5995{
5996 return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B),
5998}
5999
6000static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6001_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
6002{
6003 return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B),
6005}
6006
6007static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6008_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6009{
6010 return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6012}
6013
6014static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6015_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
6016 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6017 (__v4si)_mm_unpackhi_epi32(__A, __B),
6018 (__v4si)__W);
6019}
6020
6021static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6022_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
6023 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6024 (__v4si)_mm_unpackhi_epi32(__A, __B),
6025 (__v4si)_mm_setzero_si128());
6026}
6027
6028static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6029_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A,
6030 __m256i __B) {
6031 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6032 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6033 (__v8si)__W);
6034}
6035
6036static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6037_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
6038 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6039 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6040 (__v8si)_mm256_setzero_si256());
6041}
6042
6043static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6044_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
6045 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6046 (__v2di)_mm_unpackhi_epi64(__A, __B),
6047 (__v2di)__W);
6048}
6049
6050static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6051_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
6052 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6053 (__v2di)_mm_unpackhi_epi64(__A, __B),
6054 (__v2di)_mm_setzero_si128());
6055}
6056
6057static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6058_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A,
6059 __m256i __B) {
6060 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6061 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6062 (__v4di)__W);
6063}
6064
6065static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6066_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
6067 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6068 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6069 (__v4di)_mm256_setzero_si256());
6070}
6071
6072static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6073_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
6074 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6075 (__v4si)_mm_unpacklo_epi32(__A, __B),
6076 (__v4si)__W);
6077}
6078
6079static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6080_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
6081 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6082 (__v4si)_mm_unpacklo_epi32(__A, __B),
6083 (__v4si)_mm_setzero_si128());
6084}
6085
6086static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6087_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A,
6088 __m256i __B) {
6089 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6090 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6091 (__v8si)__W);
6092}
6093
6094static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6095_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
6096 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6097 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6098 (__v8si)_mm256_setzero_si256());
6099}
6100
6101static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6102_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
6103 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6104 (__v2di)_mm_unpacklo_epi64(__A, __B),
6105 (__v2di)__W);
6106}
6107
6108static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6109_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
6110 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6111 (__v2di)_mm_unpacklo_epi64(__A, __B),
6112 (__v2di)_mm_setzero_si128());
6113}
6114
6115static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6116_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A,
6117 __m256i __B) {
6118 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6119 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6120 (__v4di)__W);
6121}
6122
6123static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6124_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
6125 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6126 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6127 (__v4di)_mm256_setzero_si256());
6128}
6129
6130static __inline__ __m128i __DEFAULT_FN_ATTRS128
6131_mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6132{
6133 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6134 (__v4si)_mm_sra_epi32(__A, __B),
6135 (__v4si)__W);
6136}
6137
6138static __inline__ __m128i __DEFAULT_FN_ATTRS128
6139_mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6140{
6141 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6142 (__v4si)_mm_sra_epi32(__A, __B),
6143 (__v4si)_mm_setzero_si128());
6144}
6145
6146static __inline__ __m256i __DEFAULT_FN_ATTRS256
6147_mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6148{
6149 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6150 (__v8si)_mm256_sra_epi32(__A, __B),
6151 (__v8si)__W);
6152}
6153
6154static __inline__ __m256i __DEFAULT_FN_ATTRS256
6155_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
6156{
6157 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6158 (__v8si)_mm256_sra_epi32(__A, __B),
6159 (__v8si)_mm256_setzero_si256());
6160}
6161
6162static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6163_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) {
6164 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6165 (__v4si)_mm_srai_epi32(__A, (int)__B),
6166 (__v4si)__W);
6167}
6168
6169static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6170_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B) {
6171 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6172 (__v4si)_mm_srai_epi32(__A, (int)__B),
6173 (__v4si)_mm_setzero_si128());
6174}
6175
6176static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6177_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A,
6178 unsigned int __B) {
6179 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6180 (__v8si)_mm256_srai_epi32(__A, (int)__B),
6181 (__v8si)__W);
6182}
6183
6184static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6185_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B) {
6186 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6187 (__v8si)_mm256_srai_epi32(__A, (int)__B),
6188 (__v8si)_mm256_setzero_si256());
6189}
6190
6191static __inline__ __m128i __DEFAULT_FN_ATTRS128
6192_mm_sra_epi64(__m128i __A, __m128i __B)
6193{
6194 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
6195}
6196
6197static __inline__ __m128i __DEFAULT_FN_ATTRS128
6198_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6199{
6200 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6201 (__v2di)_mm_sra_epi64(__A, __B), \
6202 (__v2di)__W);
6203}
6204
6205static __inline__ __m128i __DEFAULT_FN_ATTRS128
6206_mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6207{
6208 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6209 (__v2di)_mm_sra_epi64(__A, __B), \
6210 (__v2di)_mm_setzero_si128());
6211}
6212
6213static __inline__ __m256i __DEFAULT_FN_ATTRS256
6214_mm256_sra_epi64(__m256i __A, __m128i __B)
6215{
6216 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
6217}
6218
6219static __inline__ __m256i __DEFAULT_FN_ATTRS256
6220_mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6221{
6222 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6223 (__v4di)_mm256_sra_epi64(__A, __B), \
6224 (__v4di)__W);
6225}
6226
6227static __inline__ __m256i __DEFAULT_FN_ATTRS256
6228_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
6229{
6230 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6231 (__v4di)_mm256_sra_epi64(__A, __B), \
6232 (__v4di)_mm256_setzero_si256());
6233}
6234
6235static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6236_mm_srai_epi64(__m128i __A, unsigned int __imm) {
6237 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, (int)__imm);
6238}
6239
6241 __m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm) {
6242 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6243 (__v2di)_mm_srai_epi64(__A, __imm), \
6244 (__v2di)__W);
6245}
6246
6247static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6248_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm) {
6249 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6250 (__v2di)_mm_srai_epi64(__A, __imm), \
6251 (__v2di)_mm_setzero_si128());
6252}
6253
6254static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6255_mm256_srai_epi64(__m256i __A, unsigned int __imm) {
6256 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, (int)__imm);
6257}
6258
6259static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6260_mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A,
6261 unsigned int __imm) {
6262 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6263 (__v4di)_mm256_srai_epi64(__A, __imm), \
6264 (__v4di)__W);
6265}
6266
6267static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6268_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm) {
6269 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6270 (__v4di)_mm256_srai_epi64(__A, __imm), \
6271 (__v4di)_mm256_setzero_si256());
6272}
6273
6274#define _mm_ternarylogic_epi32(A, B, C, imm) \
6275 ((__m128i)__builtin_ia32_pternlogd128_mask( \
6276 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \
6277 (unsigned char)(imm), (__mmask8)-1))
6278
6279#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \
6280 ((__m128i)__builtin_ia32_pternlogd128_mask( \
6281 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \
6282 (unsigned char)(imm), (__mmask8)(U)))
6283
6284#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6285 ((__m128i)__builtin_ia32_pternlogd128_maskz( \
6286 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \
6287 (unsigned char)(imm), (__mmask8)(U)))
6288
6289#define _mm256_ternarylogic_epi32(A, B, C, imm) \
6290 ((__m256i)__builtin_ia32_pternlogd256_mask( \
6291 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \
6292 (unsigned char)(imm), (__mmask8)-1))
6293
6294#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \
6295 ((__m256i)__builtin_ia32_pternlogd256_mask( \
6296 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \
6297 (unsigned char)(imm), (__mmask8)(U)))
6298
6299#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6300 ((__m256i)__builtin_ia32_pternlogd256_maskz( \
6301 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \
6302 (unsigned char)(imm), (__mmask8)(U)))
6303
6304#define _mm_ternarylogic_epi64(A, B, C, imm) \
6305 ((__m128i)__builtin_ia32_pternlogq128_mask( \
6306 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \
6307 (unsigned char)(imm), (__mmask8)-1))
6308
6309#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \
6310 ((__m128i)__builtin_ia32_pternlogq128_mask( \
6311 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \
6312 (unsigned char)(imm), (__mmask8)(U)))
6313
6314#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6315 ((__m128i)__builtin_ia32_pternlogq128_maskz( \
6316 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \
6317 (unsigned char)(imm), (__mmask8)(U)))
6318
6319#define _mm256_ternarylogic_epi64(A, B, C, imm) \
6320 ((__m256i)__builtin_ia32_pternlogq256_mask( \
6321 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \
6322 (unsigned char)(imm), (__mmask8)-1))
6323
6324#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \
6325 ((__m256i)__builtin_ia32_pternlogq256_mask( \
6326 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \
6327 (unsigned char)(imm), (__mmask8)(U)))
6328
6329#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6330 ((__m256i)__builtin_ia32_pternlogq256_maskz( \
6331 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \
6332 (unsigned char)(imm), (__mmask8)(U)))
6333
6334#define _mm256_shuffle_f32x4(A, B, imm) \
6335 ((__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
6336 (__v8sf)(__m256)(B), (int)(imm)))
6337
6338#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \
6339 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6340 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6341 (__v8sf)(__m256)(W)))
6342
6343#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \
6344 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6345 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6346 (__v8sf)_mm256_setzero_ps()))
6347
6348#define _mm256_shuffle_f64x2(A, B, imm) \
6349 ((__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
6350 (__v4df)(__m256d)(B), (int)(imm)))
6351
6352#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \
6353 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6354 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6355 (__v4df)(__m256d)(W)))
6356
6357#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \
6358 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6359 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6360 (__v4df)_mm256_setzero_pd()))
6361
6362#define _mm256_shuffle_i32x4(A, B, imm) \
6363 ((__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
6364 (__v8si)(__m256i)(B), (int)(imm)))
6365
6366#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \
6367 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6368 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6369 (__v8si)(__m256i)(W)))
6370
6371#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \
6372 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6373 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6374 (__v8si)_mm256_setzero_si256()))
6375
6376#define _mm256_shuffle_i64x2(A, B, imm) \
6377 ((__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
6378 (__v4di)(__m256i)(B), (int)(imm)))
6379
6380#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \
6381 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6382 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6383 (__v4di)(__m256i)(W)))
6384
6385
6386#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \
6387 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6388 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6389 (__v4di)_mm256_setzero_si256()))
6390
6391#define _mm_mask_shuffle_pd(W, U, A, B, M) \
6392 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6393 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6394 (__v2df)(__m128d)(W)))
6395
6396#define _mm_maskz_shuffle_pd(U, A, B, M) \
6397 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6398 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6399 (__v2df)_mm_setzero_pd()))
6400
6401#define _mm256_mask_shuffle_pd(W, U, A, B, M) \
6402 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6403 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6404 (__v4df)(__m256d)(W)))
6405
6406#define _mm256_maskz_shuffle_pd(U, A, B, M) \
6407 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6408 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6409 (__v4df)_mm256_setzero_pd()))
6410
6411#define _mm_mask_shuffle_ps(W, U, A, B, M) \
6412 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6413 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6414 (__v4sf)(__m128)(W)))
6415
6416#define _mm_maskz_shuffle_ps(U, A, B, M) \
6417 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6418 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6419 (__v4sf)_mm_setzero_ps()))
6420
6421#define _mm256_mask_shuffle_ps(W, U, A, B, M) \
6422 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6423 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6424 (__v8sf)(__m256)(W)))
6425
6426#define _mm256_maskz_shuffle_ps(U, A, B, M) \
6427 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6428 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6429 (__v8sf)_mm256_setzero_ps()))
6430
6431static __inline__ __m128d __DEFAULT_FN_ATTRS128
6432_mm_rsqrt14_pd (__m128d __A)
6433{
6434 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6435 (__v2df)
6436 _mm_setzero_pd (),
6437 (__mmask8) -1);
6438}
6439
6440static __inline__ __m128d __DEFAULT_FN_ATTRS128
6441_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
6442{
6443 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6444 (__v2df) __W,
6445 (__mmask8) __U);
6446}
6447
6448static __inline__ __m128d __DEFAULT_FN_ATTRS128
6450{
6451 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6452 (__v2df)
6453 _mm_setzero_pd (),
6454 (__mmask8) __U);
6455}
6456
6457static __inline__ __m256d __DEFAULT_FN_ATTRS256
6459{
6460 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6461 (__v4df)
6463 (__mmask8) -1);
6464}
6465
6466static __inline__ __m256d __DEFAULT_FN_ATTRS256
6467_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
6468{
6469 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6470 (__v4df) __W,
6471 (__mmask8) __U);
6472}
6473
6474static __inline__ __m256d __DEFAULT_FN_ATTRS256
6476{
6477 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6478 (__v4df)
6480 (__mmask8) __U);
6481}
6482
6483static __inline__ __m128 __DEFAULT_FN_ATTRS128
6484_mm_rsqrt14_ps (__m128 __A)
6485{
6486 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6487 (__v4sf)
6488 _mm_setzero_ps (),
6489 (__mmask8) -1);
6490}
6491
6492static __inline__ __m128 __DEFAULT_FN_ATTRS128
6493_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6494{
6495 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6496 (__v4sf) __W,
6497 (__mmask8) __U);
6498}
6499
6500static __inline__ __m128 __DEFAULT_FN_ATTRS128
6502{
6503 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6504 (__v4sf)
6505 _mm_setzero_ps (),
6506 (__mmask8) __U);
6507}
6508
6509static __inline__ __m256 __DEFAULT_FN_ATTRS256
6511{
6512 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6513 (__v8sf)
6515 (__mmask8) -1);
6516}
6517
6518static __inline__ __m256 __DEFAULT_FN_ATTRS256
6519_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6520{
6521 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6522 (__v8sf) __W,
6523 (__mmask8) __U);
6524}
6525
6526static __inline__ __m256 __DEFAULT_FN_ATTRS256
6528{
6529 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6530 (__v8sf)
6532 (__mmask8) __U);
6533}
6534
6535static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
6537 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6538 0, 1, 2, 3, 0, 1, 2, 3);
6539}
6540
6541static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
6542_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) {
6543 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6544 (__v8sf)_mm256_broadcast_f32x4(__A),
6545 (__v8sf)__O);
6546}
6547
6548static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
6550 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6551 (__v8sf)_mm256_broadcast_f32x4(__A),
6552 (__v8sf)_mm256_setzero_ps());
6553}
6554
6555static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6557 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6558 0, 1, 2, 3, 0, 1, 2, 3);
6559}
6560
6561static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6562_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) {
6563 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6564 (__v8si)_mm256_broadcast_i32x4(__A),
6565 (__v8si)__O);
6566}
6567
6568static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6570 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6571 (__v8si)_mm256_broadcast_i32x4(__A),
6572 (__v8si)_mm256_setzero_si256());
6573}
6574
6575static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
6576_mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A) {
6577 return (__m256d)__builtin_ia32_selectpd_256(__M,
6578 (__v4df) _mm256_broadcastsd_pd(__A),
6579 (__v4df) __O);
6580}
6581
6582static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
6584 return (__m256d)__builtin_ia32_selectpd_256(__M,
6585 (__v4df) _mm256_broadcastsd_pd(__A),
6586 (__v4df) _mm256_setzero_pd());
6587}
6588
6589static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
6590_mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A) {
6591 return (__m128)__builtin_ia32_selectps_128(__M,
6592 (__v4sf) _mm_broadcastss_ps(__A),
6593 (__v4sf) __O);
6594}
6595
6596static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
6598 return (__m128)__builtin_ia32_selectps_128(__M,
6599 (__v4sf) _mm_broadcastss_ps(__A),
6600 (__v4sf) _mm_setzero_ps());
6601}
6602
6603static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
6604_mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A) {
6605 return (__m256)__builtin_ia32_selectps_256(__M,
6606 (__v8sf) _mm256_broadcastss_ps(__A),
6607 (__v8sf) __O);
6608}
6609
6610static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
6612 return (__m256)__builtin_ia32_selectps_256(__M,
6613 (__v8sf) _mm256_broadcastss_ps(__A),
6614 (__v8sf) _mm256_setzero_ps());
6615}
6616
6617static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6618_mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A) {
6619 return (__m128i)__builtin_ia32_selectd_128(__M,
6620 (__v4si) _mm_broadcastd_epi32(__A),
6621 (__v4si) __O);
6622}
6623
6624static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6626 return (__m128i)__builtin_ia32_selectd_128(__M,
6627 (__v4si) _mm_broadcastd_epi32(__A),
6628 (__v4si) _mm_setzero_si128());
6629}
6630
6631static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6632_mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A) {
6633 return (__m256i)__builtin_ia32_selectd_256(__M,
6634 (__v8si) _mm256_broadcastd_epi32(__A),
6635 (__v8si) __O);
6636}
6637
6638static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6640 return (__m256i)__builtin_ia32_selectd_256(__M,
6641 (__v8si) _mm256_broadcastd_epi32(__A),
6642 (__v8si) _mm256_setzero_si256());
6643}
6644
6645static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6646_mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A) {
6647 return (__m128i)__builtin_ia32_selectq_128(__M,
6648 (__v2di) _mm_broadcastq_epi64(__A),
6649 (__v2di) __O);
6650}
6651
6652static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6654 return (__m128i)__builtin_ia32_selectq_128(__M,
6655 (__v2di) _mm_broadcastq_epi64(__A),
6656 (__v2di) _mm_setzero_si128());
6657}
6658
6659static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6660_mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A) {
6661 return (__m256i)__builtin_ia32_selectq_256(__M,
6662 (__v4di) _mm256_broadcastq_epi64(__A),
6663 (__v4di) __O);
6664}
6665
6666static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6668 return (__m256i)__builtin_ia32_selectq_256(__M,
6669 (__v4di) _mm256_broadcastq_epi64(__A),
6670 (__v4di) _mm256_setzero_si256());
6671}
6672
6673static __inline__ __m128i __DEFAULT_FN_ATTRS128
6675{
6676 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6677 (__v16qi)_mm_undefined_si128(),
6678 (__mmask8) -1);
6679}
6680
6681static __inline__ __m128i __DEFAULT_FN_ATTRS128
6682_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6683{
6684 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6685 (__v16qi) __O, __M);
6686}
6687
6688static __inline__ __m128i __DEFAULT_FN_ATTRS128
6690{
6691 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6692 (__v16qi) _mm_setzero_si128 (),
6693 __M);
6694}
6695
6696static __inline__ void __DEFAULT_FN_ATTRS128
6698{
6699 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
6700}
6701
6702static __inline__ __m128i __DEFAULT_FN_ATTRS256
6704{
6705 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6706 (__v16qi)_mm_undefined_si128(),
6707 (__mmask8) -1);
6708}
6709
6710static __inline__ __m128i __DEFAULT_FN_ATTRS256
6711_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
6712{
6713 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6714 (__v16qi) __O, __M);
6715}
6716
6717static __inline__ __m128i __DEFAULT_FN_ATTRS256
6719{
6720 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6721 (__v16qi) _mm_setzero_si128 (),
6722 __M);
6723}
6724
6725static __inline__ void __DEFAULT_FN_ATTRS256
6727{
6728 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
6729}
6730
6731static __inline__ __m128i __DEFAULT_FN_ATTRS128
6733{
6734 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
6735 (__v8hi)_mm_setzero_si128 (),
6736 (__mmask8) -1);
6737}
6738
6739static __inline__ __m128i __DEFAULT_FN_ATTRS128
6740_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
6741{
6742 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
6743 (__v8hi)__O,
6744 __M);
6745}
6746
6747static __inline__ __m128i __DEFAULT_FN_ATTRS128
6749{
6750 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
6751 (__v8hi) _mm_setzero_si128 (),
6752 __M);
6753}
6754
6755static __inline__ void __DEFAULT_FN_ATTRS128
6757{
6758 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
6759}
6760
6761static __inline__ __m128i __DEFAULT_FN_ATTRS256
6763{
6764 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
6765 (__v8hi)_mm_undefined_si128(),
6766 (__mmask8) -1);
6767}
6768
6769static __inline__ __m128i __DEFAULT_FN_ATTRS256
6770_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
6771{
6772 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
6773 (__v8hi) __O, __M);
6774}
6775
6776static __inline__ __m128i __DEFAULT_FN_ATTRS256
6778{
6779 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
6780 (__v8hi) _mm_setzero_si128 (),
6781 __M);
6782}
6783
6784static __inline__ void __DEFAULT_FN_ATTRS256
6786{
6787 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
6788}
6789
6790static __inline__ __m128i __DEFAULT_FN_ATTRS128
6792{
6793 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
6794 (__v16qi)_mm_undefined_si128(),
6795 (__mmask8) -1);
6796}
6797
6798static __inline__ __m128i __DEFAULT_FN_ATTRS128
6799_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6800{
6801 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
6802 (__v16qi) __O, __M);
6803}
6804
6805static __inline__ __m128i __DEFAULT_FN_ATTRS128
6807{
6808 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
6809 (__v16qi) _mm_setzero_si128 (),
6810 __M);
6811}
6812
6813static __inline__ void __DEFAULT_FN_ATTRS128
6815{
6816 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
6817}
6818
6819static __inline__ __m128i __DEFAULT_FN_ATTRS256
6821{
6822 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
6823 (__v16qi)_mm_undefined_si128(),
6824 (__mmask8) -1);
6825}
6826
6827static __inline__ __m128i __DEFAULT_FN_ATTRS256
6828_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
6829{
6830 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
6831 (__v16qi) __O, __M);
6832}
6833
6834static __inline__ __m128i __DEFAULT_FN_ATTRS256
6836{
6837 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
6838 (__v16qi) _mm_setzero_si128 (),
6839 __M);
6840}
6841
6842static __inline__ void __DEFAULT_FN_ATTRS256
6844{
6845 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
6846}
6847
6848static __inline__ __m128i __DEFAULT_FN_ATTRS128
6850{
6851 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
6852 (__v4si)_mm_undefined_si128(),
6853 (__mmask8) -1);
6854}
6855
6856static __inline__ __m128i __DEFAULT_FN_ATTRS128
6857_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
6858{
6859 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
6860 (__v4si) __O, __M);
6861}
6862
6863static __inline__ __m128i __DEFAULT_FN_ATTRS128
6865{
6866 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
6867 (__v4si) _mm_setzero_si128 (),
6868 __M);
6869}
6870
6871static __inline__ void __DEFAULT_FN_ATTRS128
6873{
6874 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
6875}
6876
6877static __inline__ __m128i __DEFAULT_FN_ATTRS256
6879{
6880 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
6881 (__v4si)_mm_undefined_si128(),
6882 (__mmask8) -1);
6883}
6884
6885static __inline__ __m128i __DEFAULT_FN_ATTRS256
6886_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
6887{
6888 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
6889 (__v4si)__O,
6890 __M);
6891}
6892
6893static __inline__ __m128i __DEFAULT_FN_ATTRS256
6895{
6896 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
6897 (__v4si) _mm_setzero_si128 (),
6898 __M);
6899}
6900
6901static __inline__ void __DEFAULT_FN_ATTRS256
6903{
6904 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
6905}
6906
6907static __inline__ __m128i __DEFAULT_FN_ATTRS128
6909{
6910 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
6911 (__v8hi)_mm_undefined_si128(),
6912 (__mmask8) -1);
6913}
6914
6915static __inline__ __m128i __DEFAULT_FN_ATTRS128
6916_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
6917{
6918 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
6919 (__v8hi) __O, __M);
6920}
6921
6922static __inline__ __m128i __DEFAULT_FN_ATTRS128
6924{
6925 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
6926 (__v8hi) _mm_setzero_si128 (),
6927 __M);
6928}
6929
6930static __inline__ void __DEFAULT_FN_ATTRS128
6932{
6933 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
6934}
6935
6936static __inline__ __m128i __DEFAULT_FN_ATTRS256
6938{
6939 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
6940 (__v8hi)_mm_undefined_si128(),
6941 (__mmask8) -1);
6942}
6943
6944static __inline__ __m128i __DEFAULT_FN_ATTRS256
6945_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
6946{
6947 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
6948 (__v8hi) __O, __M);
6949}
6950
6951static __inline__ __m128i __DEFAULT_FN_ATTRS256
6953{
6954 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
6955 (__v8hi) _mm_setzero_si128 (),
6956 __M);
6957}
6958
6959static __inline__ void __DEFAULT_FN_ATTRS256
6961{
6962 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
6963}
6964
6965static __inline__ __m128i __DEFAULT_FN_ATTRS128
6967{
6968 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
6969 (__v16qi)_mm_undefined_si128(),
6970 (__mmask8) -1);
6971}
6972
6973static __inline__ __m128i __DEFAULT_FN_ATTRS128
6974_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6975{
6976 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
6977 (__v16qi) __O,
6978 __M);
6979}
6980
6981static __inline__ __m128i __DEFAULT_FN_ATTRS128
6983{
6984 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
6985 (__v16qi) _mm_setzero_si128 (),
6986 __M);
6987}
6988
6989static __inline__ void __DEFAULT_FN_ATTRS128
6991{
6992 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
6993}
6994
6995static __inline__ __m128i __DEFAULT_FN_ATTRS256
6997{
6998 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
6999 (__v16qi)_mm_undefined_si128(),
7000 (__mmask8) -1);
7001}
7002
7003static __inline__ __m128i __DEFAULT_FN_ATTRS256
7004_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7005{
7006 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7007 (__v16qi) __O,
7008 __M);
7009}
7010
7011static __inline__ __m128i __DEFAULT_FN_ATTRS256
7013{
7014 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7015 (__v16qi) _mm_setzero_si128 (),
7016 __M);
7017}
7018
7019static __inline__ void __DEFAULT_FN_ATTRS256
7021{
7022 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7023}
7024
7025static __inline__ __m128i __DEFAULT_FN_ATTRS128
7027{
7028 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7029 (__v8hi)_mm_undefined_si128(),
7030 (__mmask8) -1);
7031}
7032
7033static __inline__ __m128i __DEFAULT_FN_ATTRS128
7034_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7035{
7036 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7037 (__v8hi) __O, __M);
7038}
7039
7040static __inline__ __m128i __DEFAULT_FN_ATTRS128
7042{
7043 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7044 (__v8hi) _mm_setzero_si128 (),
7045 __M);
7046}
7047
7048static __inline__ void __DEFAULT_FN_ATTRS128
7050{
7051 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7052}
7053
7054static __inline__ __m128i __DEFAULT_FN_ATTRS256
7056{
7057 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7058 (__v8hi) _mm_undefined_si128(),
7059 (__mmask8) -1);
7060}
7061
7062static __inline__ __m128i __DEFAULT_FN_ATTRS256
7063_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7064{
7065 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7066 (__v8hi) __O, __M);
7067}
7068
7069static __inline__ __m128i __DEFAULT_FN_ATTRS256
7071{
7072 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7073 (__v8hi) _mm_setzero_si128 (),
7074 __M);
7075}
7076
7077static __inline__ void __DEFAULT_FN_ATTRS256
7079{
7080 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7081}
7082
7083static __inline__ __m128i __DEFAULT_FN_ATTRS128
7085{
7086 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7087 (__v16qi)_mm_undefined_si128(),
7088 (__mmask8) -1);
7089}
7090
7091static __inline__ __m128i __DEFAULT_FN_ATTRS128
7092_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7093{
7094 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7095 (__v16qi) __O,
7096 __M);
7097}
7098
7099static __inline__ __m128i __DEFAULT_FN_ATTRS128
7101{
7102 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7103 (__v16qi) _mm_setzero_si128 (),
7104 __M);
7105}
7106
7107static __inline__ void __DEFAULT_FN_ATTRS128
7109{
7110 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7111}
7112
7113static __inline__ __m128i __DEFAULT_FN_ATTRS256
7115{
7116 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7117 (__v16qi)_mm_undefined_si128(),
7118 (__mmask8) -1);
7119}
7120
7121static __inline__ __m128i __DEFAULT_FN_ATTRS256
7122_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7123{
7124 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7125 (__v16qi) __O,
7126 __M);
7127}
7128
7129static __inline__ __m128i __DEFAULT_FN_ATTRS256
7131{
7132 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7133 (__v16qi) _mm_setzero_si128 (),
7134 __M);
7135}
7136
7137static __inline__ void __DEFAULT_FN_ATTRS256
7139{
7140 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7141}
7142
7143static __inline__ __m128i __DEFAULT_FN_ATTRS128
7145{
7146 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7147 (__v4si)_mm_undefined_si128(),
7148 (__mmask8) -1);
7149}
7150
7151static __inline__ __m128i __DEFAULT_FN_ATTRS128
7152_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7153{
7154 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7155 (__v4si) __O, __M);
7156}
7157
7158static __inline__ __m128i __DEFAULT_FN_ATTRS128
7160{
7161 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7162 (__v4si) _mm_setzero_si128 (),
7163 __M);
7164}
7165
7166static __inline__ void __DEFAULT_FN_ATTRS128
7168{
7169 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7170}
7171
7172static __inline__ __m128i __DEFAULT_FN_ATTRS256
7174{
7175 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7176 (__v4si)_mm_undefined_si128(),
7177 (__mmask8) -1);
7178}
7179
7180static __inline__ __m128i __DEFAULT_FN_ATTRS256
7181_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7182{
7183 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7184 (__v4si) __O, __M);
7185}
7186
7187static __inline__ __m128i __DEFAULT_FN_ATTRS256
7189{
7190 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7191 (__v4si) _mm_setzero_si128 (),
7192 __M);
7193}
7194
7195static __inline__ void __DEFAULT_FN_ATTRS256
7197{
7198 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7199}
7200
7201static __inline__ __m128i __DEFAULT_FN_ATTRS128
7203{
7204 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7205 (__v8hi)_mm_undefined_si128(),
7206 (__mmask8) -1);
7207}
7208
7209static __inline__ __m128i __DEFAULT_FN_ATTRS128
7210_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7211{
7212 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7213 (__v8hi) __O, __M);
7214}
7215
7216static __inline__ __m128i __DEFAULT_FN_ATTRS128
7218{
7219 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7220 (__v8hi) _mm_setzero_si128 (),
7221 __M);
7222}
7223
7224static __inline__ void __DEFAULT_FN_ATTRS128
7226{
7227 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7228}
7229
7230static __inline__ __m128i __DEFAULT_FN_ATTRS256
7232{
7233 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7234 (__v8hi)_mm_undefined_si128(),
7235 (__mmask8) -1);
7236}
7237
7238static __inline__ __m128i __DEFAULT_FN_ATTRS256
7239_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7240{
7241 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7242 (__v8hi) __O, __M);
7243}
7244
7245static __inline__ __m128i __DEFAULT_FN_ATTRS256
7247{
7248 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7249 (__v8hi) _mm_setzero_si128 (),
7250 __M);
7251}
7252
7253static __inline__ void __DEFAULT_FN_ATTRS256
7255{
7256 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7257}
7258
7259static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
7260_mm_cvtepi32_epi8(__m128i __A) {
7261 return (__m128i)__builtin_shufflevector(
7262 __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7263 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7264}
7265
7266static __inline__ __m128i __DEFAULT_FN_ATTRS128
7267_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7268{
7269 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7270 (__v16qi) __O, __M);
7271}
7272
7273static __inline__ __m128i __DEFAULT_FN_ATTRS128
7275{
7276 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7277 (__v16qi)
7279 __M);
7280}
7281
7282static __inline__ void __DEFAULT_FN_ATTRS128
7284{
7285 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7286}
7287
7288static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
7290 return (__m128i)__builtin_shufflevector(
7291 __builtin_convertvector((__v8si)__A, __v8qi),
7292 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
7293 12, 13, 14, 15);
7294}
7295
7296static __inline__ __m128i __DEFAULT_FN_ATTRS256
7297_mm256_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A) {
7298 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7299 (__v16qi) __O, __M);
7300}
7301
7302static __inline__ __m128i __DEFAULT_FN_ATTRS256
7304{
7305 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7306 (__v16qi) _mm_setzero_si128 (),
7307 __M);
7308}
7309
7310static __inline__ void __DEFAULT_FN_ATTRS256
7312{
7313 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7314}
7315
7316static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
7318 return (__m128i)__builtin_shufflevector(
7319 __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7320 2, 3, 4, 5, 6, 7);
7321}
7322
7323static __inline__ __m128i __DEFAULT_FN_ATTRS128
7324_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7325{
7326 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7327 (__v8hi) __O, __M);
7328}
7329
7330static __inline__ __m128i __DEFAULT_FN_ATTRS128
7332{
7333 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7334 (__v8hi) _mm_setzero_si128 (),
7335 __M);
7336}
7337
7338static __inline__ void __DEFAULT_FN_ATTRS128
7340{
7341 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7342}
7343
7344static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
7346 return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
7347}
7348
7349static __inline__ __m128i __DEFAULT_FN_ATTRS256
7350_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7351{
7352 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7353 (__v8hi) __O, __M);
7354}
7355
7356static __inline__ __m128i __DEFAULT_FN_ATTRS256
7358{
7359 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7360 (__v8hi) _mm_setzero_si128 (),
7361 __M);
7362}
7363
7364static __inline__ void __DEFAULT_FN_ATTRS256
7366{
7367 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7368}
7369
7370static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
7371_mm_cvtepi64_epi8(__m128i __A) {
7372 return (__m128i)__builtin_shufflevector(
7373 __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
7374 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
7375}
7376
7377static __inline__ __m128i __DEFAULT_FN_ATTRS128
7378_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7379{
7380 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7381 (__v16qi) __O, __M);
7382}
7383
7384static __inline__ __m128i __DEFAULT_FN_ATTRS128
7386{
7387 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7388 (__v16qi) _mm_setzero_si128 (),
7389 __M);
7390}
7391
7392static __inline__ void __DEFAULT_FN_ATTRS128
7394{
7395 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7396}
7397
7398static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
7400 return (__m128i)__builtin_shufflevector(
7401 __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7402 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7403}
7404
7405static __inline__ __m128i __DEFAULT_FN_ATTRS256
7406_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7407{
7408 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7409 (__v16qi) __O, __M);
7410}
7411
7412static __inline__ __m128i __DEFAULT_FN_ATTRS256
7414{
7415 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7416 (__v16qi) _mm_setzero_si128 (),
7417 __M);
7418}
7419
7420static __inline__ void __DEFAULT_FN_ATTRS256
7422{
7423 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7424}
7425
7426static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
7428 return (__m128i)__builtin_shufflevector(
7429 __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
7430}
7431
7432static __inline__ __m128i __DEFAULT_FN_ATTRS128
7433_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7434{
7435 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7436 (__v4si) __O, __M);
7437}
7438
7439static __inline__ __m128i __DEFAULT_FN_ATTRS128
7441{
7442 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7443 (__v4si) _mm_setzero_si128 (),
7444 __M);
7445}
7446
7447static __inline__ void __DEFAULT_FN_ATTRS128
7449{
7450 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7451}
7452
7453static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
7455 return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
7456}
7457
7458static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
7459_mm256_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A) {
7460 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7461 (__v4si)_mm256_cvtepi64_epi32(__A),
7462 (__v4si)__O);
7463}
7464
7465static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
7467 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7468 (__v4si)_mm256_cvtepi64_epi32(__A),
7469 (__v4si)_mm_setzero_si128());
7470}
7471
7472static __inline__ void __DEFAULT_FN_ATTRS256
7474{
7475 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7476}
7477
7478static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
7480 return (__m128i)__builtin_shufflevector(
7481 __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
7482 3, 3, 3, 3);
7483}
7484
7485static __inline__ __m128i __DEFAULT_FN_ATTRS128
7486_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7487{
7488 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7489 (__v8hi)__O,
7490 __M);
7491}
7492
7493static __inline__ __m128i __DEFAULT_FN_ATTRS128
7495{
7496 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7497 (__v8hi) _mm_setzero_si128 (),
7498 __M);
7499}
7500
7501static __inline__ void __DEFAULT_FN_ATTRS128
7503{
7504 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7505}
7506
7507static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
7509 return (__m128i)__builtin_shufflevector(
7510 __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7511 2, 3, 4, 5, 6, 7);
7512}
7513
7514static __inline__ __m128i __DEFAULT_FN_ATTRS256
7515_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7516{
7517 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7518 (__v8hi) __O, __M);
7519}
7520
7521static __inline__ __m128i __DEFAULT_FN_ATTRS256
7523{
7524 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7525 (__v8hi) _mm_setzero_si128 (),
7526 __M);
7527}
7528
7529static __inline__ void __DEFAULT_FN_ATTRS256
7531{
7532 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7533}
7534
7535#define _mm256_extractf32x4_ps(A, imm) \
7536 ((__m128)__builtin_ia32_extractf32x4_256_mask( \
7537 (__v8sf)(__m256)(A), (int)(imm), (__v4sf)_mm_setzero_ps(), \
7538 (__mmask8) - 1))
7539
7540#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
7541 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7542 (int)(imm), \
7543 (__v4sf)(__m128)(W), \
7544 (__mmask8)(U)))
7545
7546#define _mm256_maskz_extractf32x4_ps(U, A, imm) \
7547 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7548 (int)(imm), \
7549 (__v4sf)_mm_setzero_ps(), \
7550 (__mmask8)(U)))
7551
7552#define _mm256_extracti32x4_epi32(A, imm) \
7553 ((__m128i)__builtin_ia32_extracti32x4_256_mask( \
7554 (__v8si)(__m256i)(A), (int)(imm), (__v4si)_mm_setzero_si128(), \
7555 (__mmask8) - 1))
7556
7557#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
7558 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7559 (int)(imm), \
7560 (__v4si)(__m128i)(W), \
7561 (__mmask8)(U)))
7562
7563#define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
7564 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7565 (int)(imm), \
7566 (__v4si)_mm_setzero_si128(), \
7567 (__mmask8)(U)))
7568
7569#define _mm256_insertf32x4(A, B, imm) \
7570 ((__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
7571 (__v4sf)(__m128)(B), (int)(imm)))
7572
7573#define _mm256_mask_insertf32x4(W, U, A, B, imm) \
7574 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7575 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7576 (__v8sf)(__m256)(W)))
7577
7578#define _mm256_maskz_insertf32x4(U, A, B, imm) \
7579 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7580 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7581 (__v8sf)_mm256_setzero_ps()))
7582
7583#define _mm256_inserti32x4(A, B, imm) \
7584 ((__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
7585 (__v4si)(__m128i)(B), (int)(imm)))
7586
7587#define _mm256_mask_inserti32x4(W, U, A, B, imm) \
7588 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7589 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7590 (__v8si)(__m256i)(W)))
7591
7592#define _mm256_maskz_inserti32x4(U, A, B, imm) \
7593 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7594 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7595 (__v8si)_mm256_setzero_si256()))
7596
7597#define _mm_getmant_pd(A, B, C) \
7598 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7599 (int)(((C)<<2) | (B)), \
7600 (__v2df)_mm_setzero_pd(), \
7601 (__mmask8)-1))
7602
7603#define _mm_mask_getmant_pd(W, U, A, B, C) \
7604 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7605 (int)(((C)<<2) | (B)), \
7606 (__v2df)(__m128d)(W), \
7607 (__mmask8)(U)))
7608
7609#define _mm_maskz_getmant_pd(U, A, B, C) \
7610 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7611 (int)(((C)<<2) | (B)), \
7612 (__v2df)_mm_setzero_pd(), \
7613 (__mmask8)(U)))
7614
7615#define _mm256_getmant_pd(A, B, C) \
7616 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7617 (int)(((C)<<2) | (B)), \
7618 (__v4df)_mm256_setzero_pd(), \
7619 (__mmask8)-1))
7620
7621#define _mm256_mask_getmant_pd(W, U, A, B, C) \
7622 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7623 (int)(((C)<<2) | (B)), \
7624 (__v4df)(__m256d)(W), \
7625 (__mmask8)(U)))
7626
7627#define _mm256_maskz_getmant_pd(U, A, B, C) \
7628 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7629 (int)(((C)<<2) | (B)), \
7630 (__v4df)_mm256_setzero_pd(), \
7631 (__mmask8)(U)))
7632
7633#define _mm_getmant_ps(A, B, C) \
7634 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7635 (int)(((C)<<2) | (B)), \
7636 (__v4sf)_mm_setzero_ps(), \
7637 (__mmask8)-1))
7638
7639#define _mm_mask_getmant_ps(W, U, A, B, C) \
7640 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7641 (int)(((C)<<2) | (B)), \
7642 (__v4sf)(__m128)(W), \
7643 (__mmask8)(U)))
7644
7645#define _mm_maskz_getmant_ps(U, A, B, C) \
7646 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7647 (int)(((C)<<2) | (B)), \
7648 (__v4sf)_mm_setzero_ps(), \
7649 (__mmask8)(U)))
7650
7651#define _mm256_getmant_ps(A, B, C) \
7652 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7653 (int)(((C)<<2) | (B)), \
7654 (__v8sf)_mm256_setzero_ps(), \
7655 (__mmask8)-1))
7656
7657#define _mm256_mask_getmant_ps(W, U, A, B, C) \
7658 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7659 (int)(((C)<<2) | (B)), \
7660 (__v8sf)(__m256)(W), \
7661 (__mmask8)(U)))
7662
7663#define _mm256_maskz_getmant_ps(U, A, B, C) \
7664 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7665 (int)(((C)<<2) | (B)), \
7666 (__v8sf)_mm256_setzero_ps(), \
7667 (__mmask8)(U)))
7668
7669#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7670 ((__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
7671 (void const *)(addr), \
7672 (__v2di)(__m128i)(index), \
7673 (__mmask8)(mask), (int)(scale)))
7674
7675#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7676 ((__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
7677 (void const *)(addr), \
7678 (__v2di)(__m128i)(index), \
7679 (__mmask8)(mask), (int)(scale)))
7680
7681#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7682 ((__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
7683 (void const *)(addr), \
7684 (__v4di)(__m256i)(index), \
7685 (__mmask8)(mask), (int)(scale)))
7686
7687#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7688 ((__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
7689 (void const *)(addr), \
7690 (__v4di)(__m256i)(index), \
7691 (__mmask8)(mask), (int)(scale)))
7692
7693#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7694 ((__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
7695 (void const *)(addr), \
7696 (__v2di)(__m128i)(index), \
7697 (__mmask8)(mask), (int)(scale)))
7698
7699#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7700 ((__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
7701 (void const *)(addr), \
7702 (__v2di)(__m128i)(index), \
7703 (__mmask8)(mask), (int)(scale)))
7704
7705#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7706 ((__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
7707 (void const *)(addr), \
7708 (__v4di)(__m256i)(index), \
7709 (__mmask8)(mask), (int)(scale)))
7710
7711#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7712 ((__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
7713 (void const *)(addr), \
7714 (__v4di)(__m256i)(index), \
7715 (__mmask8)(mask), (int)(scale)))
7716
7717#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
7718 ((__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
7719 (void const *)(addr), \
7720 (__v4si)(__m128i)(index), \
7721 (__mmask8)(mask), (int)(scale)))
7722
7723#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7724 ((__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
7725 (void const *)(addr), \
7726 (__v4si)(__m128i)(index), \
7727 (__mmask8)(mask), (int)(scale)))
7728
7729#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
7730 ((__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
7731 (void const *)(addr), \
7732 (__v4si)(__m128i)(index), \
7733 (__mmask8)(mask), (int)(scale)))
7734
7735#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7736 ((__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
7737 (void const *)(addr), \
7738 (__v4si)(__m128i)(index), \
7739 (__mmask8)(mask), (int)(scale)))
7740
7741#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
7742 ((__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
7743 (void const *)(addr), \
7744 (__v4si)(__m128i)(index), \
7745 (__mmask8)(mask), (int)(scale)))
7746
7747#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7748 ((__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
7749 (void const *)(addr), \
7750 (__v4si)(__m128i)(index), \
7751 (__mmask8)(mask), (int)(scale)))
7752
7753#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
7754 ((__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
7755 (void const *)(addr), \
7756 (__v8si)(__m256i)(index), \
7757 (__mmask8)(mask), (int)(scale)))
7758
7759#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7760 ((__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
7761 (void const *)(addr), \
7762 (__v8si)(__m256i)(index), \
7763 (__mmask8)(mask), (int)(scale)))
7764
7765#define _mm256_permutex_pd(X, C) \
7766 ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C)))
7767
7768#define _mm256_mask_permutex_pd(W, U, X, C) \
7769 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7770 (__v4df)_mm256_permutex_pd((X), (C)), \
7771 (__v4df)(__m256d)(W)))
7772
7773#define _mm256_maskz_permutex_pd(U, X, C) \
7774 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7775 (__v4df)_mm256_permutex_pd((X), (C)), \
7776 (__v4df)_mm256_setzero_pd()))
7777
7778#define _mm256_permutex_epi64(X, C) \
7779 ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C)))
7780
7781#define _mm256_mask_permutex_epi64(W, U, X, C) \
7782 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7783 (__v4di)_mm256_permutex_epi64((X), (C)), \
7784 (__v4di)(__m256i)(W)))
7785
7786#define _mm256_maskz_permutex_epi64(U, X, C) \
7787 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7788 (__v4di)_mm256_permutex_epi64((X), (C)), \
7789 (__v4di)_mm256_setzero_si256()))
7790
7791static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
7792_mm256_permutexvar_pd(__m256i __X, __m256d __Y) {
7793 return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X);
7794}
7795
7796static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
7797_mm256_mask_permutexvar_pd(__m256d __W, __mmask8 __U, __m256i __X,
7798 __m256d __Y) {
7799 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
7800 (__v4df)_mm256_permutexvar_pd(__X, __Y),
7801 (__v4df)__W);
7802}
7803
7804static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
7805_mm256_maskz_permutexvar_pd(__mmask8 __U, __m256i __X, __m256d __Y) {
7806 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
7807 (__v4df)_mm256_permutexvar_pd(__X, __Y),
7808 (__v4df)_mm256_setzero_pd());
7809}
7810
7811static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
7812_mm256_permutexvar_epi64(__m256i __X, __m256i __Y) {
7813 return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X);
7814}
7815
7816static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
7817_mm256_maskz_permutexvar_epi64(__mmask8 __M, __m256i __X, __m256i __Y) {
7818 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
7819 (__v4di)_mm256_permutexvar_epi64(__X, __Y),
7820 (__v4di)_mm256_setzero_si256());
7821}
7822
7823static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
7824_mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X,
7825 __m256i __Y) {
7826 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
7827 (__v4di)_mm256_permutexvar_epi64(__X, __Y),
7828 (__v4di)__W);
7829}
7830
7831#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
7832
7833static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
7834_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) {
7835 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
7836 (__v8sf)_mm256_permutexvar_ps(__X, __Y),
7837 (__v8sf)__W);
7838}
7839
7840static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
7841_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) {
7842 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
7843 (__v8sf)_mm256_permutexvar_ps(__X, __Y),
7844 (__v8sf)_mm256_setzero_ps());
7845}
7846
7847#define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
7848
7849static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
7850_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X,
7851 __m256i __Y) {
7852 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
7853 (__v8si)_mm256_permutexvar_epi32(__X, __Y),
7854 (__v8si)__W);
7855}
7856
7857static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
7858_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) {
7859 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
7860 (__v8si)_mm256_permutexvar_epi32(__X, __Y),
7861 (__v8si)_mm256_setzero_si256());
7862}
7863
7864#define _mm_alignr_epi32(A, B, imm) \
7865 ((__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
7866 (__v4si)(__m128i)(B), (int)(imm)))
7867
7868#define _mm_mask_alignr_epi32(W, U, A, B, imm) \
7869 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7870 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
7871 (__v4si)(__m128i)(W)))
7872
7873#define _mm_maskz_alignr_epi32(U, A, B, imm) \
7874 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7875 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
7876 (__v4si)_mm_setzero_si128()))
7877
7878#define _mm256_alignr_epi32(A, B, imm) \
7879 ((__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
7880 (__v8si)(__m256i)(B), (int)(imm)))
7881
7882#define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
7883 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7884 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
7885 (__v8si)(__m256i)(W)))
7886
7887#define _mm256_maskz_alignr_epi32(U, A, B, imm) \
7888 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7889 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
7890 (__v8si)_mm256_setzero_si256()))
7891
7892#define _mm_alignr_epi64(A, B, imm) \
7893 ((__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
7894 (__v2di)(__m128i)(B), (int)(imm)))
7895
7896#define _mm_mask_alignr_epi64(W, U, A, B, imm) \
7897 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
7898 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
7899 (__v2di)(__m128i)(W)))
7900
7901#define _mm_maskz_alignr_epi64(U, A, B, imm) \
7902 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
7903 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
7904 (__v2di)_mm_setzero_si128()))
7905
7906#define _mm256_alignr_epi64(A, B, imm) \
7907 ((__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
7908 (__v4di)(__m256i)(B), (int)(imm)))
7909
7910#define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
7911 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7912 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
7913 (__v4di)(__m256i)(W)))
7914
7915#define _mm256_maskz_alignr_epi64(U, A, B, imm) \
7916 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7917 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
7918 (__v4di)_mm256_setzero_si256()))
7919
7920static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
7921_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
7922 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
7923 (__v4sf)_mm_movehdup_ps(__A),
7924 (__v4sf)__W);
7925}
7926
7927static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
7929 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
7930 (__v4sf)_mm_movehdup_ps(__A),
7931 (__v4sf)_mm_setzero_ps());
7932}
7933
7934static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
7935_mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
7936 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
7937 (__v8sf)_mm256_movehdup_ps(__A),
7938 (__v8sf)__W);
7939}
7940
7941static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
7943 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
7944 (__v8sf)_mm256_movehdup_ps(__A),
7945 (__v8sf)_mm256_setzero_ps());
7946}
7947
7948static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
7949_mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
7950 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
7951 (__v4sf)_mm_moveldup_ps(__A),
7952 (__v4sf)__W);
7953}
7954
7955static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
7957 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
7958 (__v4sf)_mm_moveldup_ps(__A),
7959 (__v4sf)_mm_setzero_ps());
7960}
7961
7962static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
7963_mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
7964 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
7965 (__v8sf)_mm256_moveldup_ps(__A),
7966 (__v8sf)__W);
7967}
7968
7969static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
7971 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
7972 (__v8sf)_mm256_moveldup_ps(__A),
7973 (__v8sf)_mm256_setzero_ps());
7974}
7975
7976#define _mm256_mask_shuffle_epi32(W, U, A, I) \
7977 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7978 (__v8si)_mm256_shuffle_epi32((A), (I)), \
7979 (__v8si)(__m256i)(W)))
7980
7981#define _mm256_maskz_shuffle_epi32(U, A, I) \
7982 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7983 (__v8si)_mm256_shuffle_epi32((A), (I)), \
7984 (__v8si)_mm256_setzero_si256()))
7985
7986#define _mm_mask_shuffle_epi32(W, U, A, I) \
7987 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7988 (__v4si)_mm_shuffle_epi32((A), (I)), \
7989 (__v4si)(__m128i)(W)))
7990
7991#define _mm_maskz_shuffle_epi32(U, A, I) \
7992 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7993 (__v4si)_mm_shuffle_epi32((A), (I)), \
7994 (__v4si)_mm_setzero_si128()))
7995
7996static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
7997_mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A) {
7998 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)__A,
7999 (__v2df)__W);
8000}
8001
8002static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
8003_mm_maskz_mov_pd(__mmask8 __U, __m128d __A) {
8004 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)__A,
8005 (__v2df)_mm_setzero_pd());
8006}
8007
8008static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
8009_mm256_mask_mov_pd(__m256d __W, __mmask8 __U, __m256d __A) {
8010 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)__A,
8011 (__v4df)__W);
8012}
8013
8014static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
8016 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)__A,
8017 (__v4df)_mm256_setzero_pd());
8018}
8019
8020static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8021_mm_mask_mov_ps(__m128 __W, __mmask8 __U, __m128 __A) {
8022 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)__A,
8023 (__v4sf)__W);
8024}
8025
8026static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8027_mm_maskz_mov_ps(__mmask8 __U, __m128 __A) {
8028 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)__A,
8029 (__v4sf)_mm_setzero_ps());
8030}
8031
8032static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
8033_mm256_mask_mov_ps(__m256 __W, __mmask8 __U, __m256 __A) {
8034 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)__A,
8035 (__v8sf)__W);
8036}
8037
8038static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
8040 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)__A,
8041 (__v8sf)_mm256_setzero_ps());
8042}
8043
8044static __inline__ __m128 __DEFAULT_FN_ATTRS128
8045_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8046{
8047 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8048 (__v4sf) __W,
8049 (__mmask8) __U);
8050}
8051
8052static __inline__ __m128 __DEFAULT_FN_ATTRS128
8054{
8055 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8056 (__v4sf)
8057 _mm_setzero_ps (),
8058 (__mmask8) __U);
8059}
8060
8061static __inline__ __m256 __DEFAULT_FN_ATTRS256
8062_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8063{
8064 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8065 (__v8sf) __W,
8066 (__mmask8) __U);
8067}
8068
8069static __inline__ __m256 __DEFAULT_FN_ATTRS256
8071{
8072 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8073 (__v8sf)
8075 (__mmask8) __U);
8076}
8077
8078#define _mm_mask_cvt_roundps_ph(W, U, A, I) \
8079 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8080 (__v8hi)(__m128i)(W), \
8081 (__mmask8)(U)))
8082
8083#define _mm_maskz_cvt_roundps_ph(U, A, I) \
8084 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8085 (__v8hi)_mm_setzero_si128(), \
8086 (__mmask8)(U)))
8087
8088#define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph
8089#define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph
8090
8091#define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
8092 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8093 (__v8hi)(__m128i)(W), \
8094 (__mmask8)(U)))
8095
8096#define _mm256_maskz_cvt_roundps_ph(U, A, I) \
8097 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8098 (__v8hi)_mm_setzero_si128(), \
8099 (__mmask8)(U)))
8100
8101#define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph
8102#define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph
8103
8104#undef __DEFAULT_FN_ATTRS128
8105#undef __DEFAULT_FN_ATTRS256
8106#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
8107#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
8108
8109#endif /* __AVX512VLINTRIN_H */
static __inline__ vector float vector float __b
Definition altivec.h:578
return __v
Definition arm_acle.h:88
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi32(__m256i __a, __m128i __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by the number of bits give...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sllv_epi32(__m256i __X, __m256i __Y)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __X left by the number of bits given...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi32_epi64(__m128i __V)
Sign-extends 32-bit elements from the 128-bit vector of [4 x i32] in __V and returns the 64-bit value...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_max_epi32(__m256i __a, __m256i __b)
Compares the corresponding signed 32-bit integers in the two 256-bit vectors of [8 x i32] in __a and ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mul_epu32(__m256i __a, __m256i __b)
Multiplies unsigned 32-bit integers from even-numered elements of two 256-bit vectors of [8 x i32] an...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi32(__m256i __a, __m128i __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a left by the number of bits given...
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srav_epi32(__m128i __X, __m128i __Y)
Shifts each 32-bit element of the 128-bit vector of [4 x i32] in __X right by the number of bits give...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpackhi_epi32(__m256i __a, __m256i __b)
Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors of [8 x i32] in __a and __b...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi64(__m256i __a, __m128i __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a left by the number of bits given...
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
Definition avx2intrin.h:30
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_broadcastq_epi64(__m128i __X)
Broadcasts the low element from the 128-bit vector of [2 x i64] in __X to both elements of the result...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_slli_epi64(__m256i __a, int __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a left by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_abs_epi32(__m256i __a)
Computes the absolute value of each signed 32-bit element in the 256-bit vector of [8 x i32] in __a a...
Definition avx2intrin.h:139
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srav_epi32(__m256i __X, __m256i __Y)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __X right by the number of bits give...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu8_epi64(__m128i __V)
Zero-extends the first four bytes from the 128-bit integer vector in __V and returns the 64-bit value...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu32_epi64(__m128i __V)
Zero-extends 32-bit elements from the 128-bit vector of [4 x i32] in __V and returns the 64-bit value...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu8_epi32(__m128i __V)
Zero-extends bytes from the lower half of the 128-bit integer vector in __V and returns the 32-bit va...
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srlv_epi32(__m128i __X, __m128i __Y)
Shifts each 32-bit element of the 128-bit vector of [4 x i32] in __X right by the number of bits give...
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcastss_ps(__m128 __X)
Broadcasts the 32-bit floating-point value from the low element of the 128-bit vector of [4 x float] ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srli_epi64(__m256i __a, int __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a right by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors of [4 x i64] in __a and __b...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi8_epi32(__m128i __V)
Sign-extends bytes from the lower half of the 128-bit integer vector in __V and returns the 32-bit va...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcastq_epi64(__m128i __X)
Broadcasts the low element from the 128-bit vector of [2 x i64] in __X to all elements of the result'...
#define __DEFAULT_FN_ATTRS256_CONSTEXPR
Definition avx2intrin.h:29
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mul_epi32(__m256i __a, __m256i __b)
Multiplies signed 32-bit integers from even-numbered elements of two 256-bit vectors of [8 x i32] and...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mullo_epi32(__m256i __a, __m256i __b)
Multiplies signed 32-bit integer elements of two 256-bit vectors of [8 x i32], and returns the lower ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_min_epi32(__m256i __a, __m256i __b)
Compares the corresponding signed 32-bit integers in the two 256-bit vectors of [8 x i32] in __a and ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sub_epi32(__m256i __a, __m256i __b)
Subtracts 32-bit integers from corresponding elements of two 256-bit vectors of [8 x i32].
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpackhi_epi64(__m256i __a, __m256i __b)
Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors of [4 x i64] in __a and __b...
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_broadcastss_ps(__m128 __X)
Broadcasts the 32-bit floating-point value from the low element of the 128-bit vector of [4 x float] ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi32(__m256i __a, __m128i __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by the number of bits give...
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srlv_epi64(__m128i __X, __m128i __Y)
Shifts each 64-bit element of the 128-bit vector of [2 x i64] in __X right by the number of bits give...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srlv_epi32(__m256i __X, __m256i __Y)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __X right by the number of bits give...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu16_epi64(__m128i __V)
Zero-extends 16-bit elements from the lower half of the 128-bit vector of [8 x i16] in __V and return...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpacklo_epi32(__m256i __a, __m256i __b)
Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors of [8 x i32] in __a and __b...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi8_epi64(__m128i __V)
Sign-extends the first four bytes from the 128-bit integer vector in __V and returns the 64-bit value...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srli_epi32(__m256i __a, int __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_add_epi64(__m256i __a, __m256i __b)
Adds 64-bit integers from corresponding elements of two 256-bit vectors of [4 x i64] and returns the ...
Definition avx2intrin.h:333
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi16_epi64(__m128i __V)
Sign-extends 16-bit elements from the lower half of the 128-bit vector of [8 x i16] in __V and return...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_and_si256(__m256i __a, __m256i __b)
Computes the bitwise AND of the 256-bit integer vectors in __a and __b.
Definition avx2intrin.h:448
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_slli_epi32(__m256i __a, int __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a left by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srai_epi32(__m256i __a, int __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_add_epi32(__m256i __a, __m256i __b)
Adds 32-bit integers from corresponding elements of two 256-bit vectors of [8 x i32] and returns the ...
Definition avx2intrin.h:315
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcastsd_pd(__m128d __X)
Broadcasts the 64-bit floating-point value from the low element of the 128-bit vector of [2 x double]...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu16_epi32(__m128i __V)
Zero-extends 16-bit elements from the 128-bit vector of [8 x i16] in __V and returns the 32-bit value...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_max_epu32(__m256i __a, __m256i __b)
Compares the corresponding unsigned 32-bit integers in the two 256-bit vectors of [8 x i32] in __a an...
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sllv_epi64(__m128i __X, __m128i __Y)
Shifts each 64-bit element of the 128-bit vector of [2 x i64] in __X left by the number of bits given...
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sllv_epi32(__m128i __X, __m128i __Y)
Shifts each 32-bit element of the 128-bit vector of [4 x i32] in __X left by the number of bits given...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi64(__m256i __a, __m128i __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a right by the number of bits give...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_min_epu32(__m256i __a, __m256i __b)
Compares the corresponding unsigned 32-bit integers in the two 256-bit vectors of [8 x i32] in __a an...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi16_epi32(__m128i __V)
Sign-extends 16-bit elements from the 128-bit vector of [8 x i16] in __V and returns the 32-bit value...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sub_epi64(__m256i __a, __m256i __b)
Subtracts 64-bit integers from corresponding elements of two 256-bit vectors of [4 x i64].
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_broadcastd_epi32(__m128i __X)
Broadcasts the low element from the 128-bit vector of [4 x i32] in __X to all elements of the result'...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcastd_epi32(__m128i __X)
Broadcasts the low element from the 128-bit vector of [4 x i32] in __X to all elements of the result'...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srlv_epi64(__m256i __X, __m256i __Y)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __X right by the number of bits give...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sllv_epi64(__m256i __X, __m256i __Y)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __X left by the number of bits given...
unsigned char __mmask8
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_rorv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu32(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu32(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
#define _mm_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_rolv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_epi64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_cvtepi32_epi16(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_cvtepi64_epi8(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expand_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi16(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_pd(__mmask8 __U, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtepi32_ps(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi32_mask(__m128i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_store_epi32(void *__P, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
#define _mm256_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_pd(__m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_xor_epi32(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srav_epi64(__m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_movedup_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi32(__mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_rolv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_rolv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_min_epi64(__m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi64(void *__P, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi32_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_cvtepi64_epi32(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi64(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_max_epi64(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_pd(__mmask8 __U, __m256d __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi16(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_epi64(__m256i __a, __m256i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mov_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rcp14_pd(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_cvtepu32_pd(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mov_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu32(__mmask8 __U, __m256 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_cvtepi64_epi16(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B)
#define _mm256_cmpneq_epi32_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_rorv_epi32(__m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_cvtepu32_ps(__m128i __A)
#define _mm256_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi32(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_or_epi64(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_compress_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ps(__mmask8 __U, __m128 __A, __m128 __B)
#define _mm_cmpeq_epi64_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_pd(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_pd(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_movedup_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mov_pd(__mmask8 __U, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_load_epi32(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_rorv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi32(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi8(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_rolv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtepu32_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_load_epi64(void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_pd(__mmask8 __U, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu32(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_getexp_pd(__m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_load_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu32(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_store_epi32(void *__P, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
#define _mm256_permutexvar_epi32(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srai_epi64(__m128i __A, unsigned int __imm)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_epi32(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_movedup_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_loadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_set1_epi32(__mmask8 __M, int __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi32_mask(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_epi64(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi32(void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_epi64(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcast_f32x4(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_pd(__mmask8 __U, __m256d __A)
#define _mm256_cmpeq_epi64_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi16(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
#define _mm256_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_scalef_pd(__m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __imm)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mov_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expand_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_and_epi32(__m256i __a, __m256i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_andnot_epi64(__m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcast_i32x4(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_min_epu64(__m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mov_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_compress_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi64(void *__P, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
#define _mm_cmpneq_epi64_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi32(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_pd(__m128d __W, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_load_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expand_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_rorv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_rorv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepi32_ps(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
#define _mm256_cmpeq_epi32_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi64_epi16(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
#define _mm_cmpeq_epi32_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi32_mask(__m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srav_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_and_epi32(__m128i __a, __m128i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ps(__m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_rorv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_min_epi64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_rolv_epi32(__m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_set1_epi32(__mmask8 __M, int __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_rolv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi32(__mmask8 __U, __m256 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_compress_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu32_ps(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu32(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi16(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_pd(void *__P, __mmask8 __U, __m128d __A)
short __v2hi __attribute__((__vector_size__(4)))
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_or_epi32(__m128i __a, __m128i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_rolv_epi32(__m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_scalef_ps(__m256 __A, __m256 __B)
#define _mm256_permutexvar_ps(A, B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi16(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi16(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_pd(__m256d __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_abs_epi64(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_rorv_epi32(__m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutexvar_pd(__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mov_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_abs_epi64(__mmask8 __U, __m128i __A)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_load_epi64(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rcp14_ps(__m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_and_epi64(__m128i __a, __m128i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_pd(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_compress_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_ps(__m256 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_movedup_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A)
#define _mm_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_abs_epi64(__m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu32(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_ps(__mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi64_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_ps(__m256 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi32(void *__P, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi32_epi16(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi16(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_rorv_epi64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ps(__mmask8 __U, __m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_max_epu64(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_rolv_epi64(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_rolv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_loadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_epi32(__m256i __a, __m256i __b)
#define _mm_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi64_mask(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_rolv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_max_epu64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi32_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expand_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sra_epi64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srav_epi64(__m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
#define _mm_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expand_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
#define _mm256_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi64(__m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi64_epi32(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_andnot_epi32(__m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi16(__mmask8 __M, __m128i __A)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_store_epi64(void *__P, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_epi32(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ps(__m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_pd(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_rorv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ps(__m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu32(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutexvar_pd(__m256i __X, __m256d __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_getexp_ps(__m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu32_pd(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_rolv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srai_epi64(__m256i __A, unsigned int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_max_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi32(__mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi64_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu32(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_compress_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_min_epu64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi32(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_rorv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi32(__m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_load_epi32(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_cvtepi32_epi8(__m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_rorv_epi64(__m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutexvar_epi64(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi32(void *__P, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ps(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi64(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu32(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_store_epi64(void *__P, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
#define _mm256_cmpneq_epi64_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi32(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_rolv_epi64(__m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
#define _mm_cmpneq_epi32_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutexvar_pd(__mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutexvar_epi64(__m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi32(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mov_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi16(__m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_abs_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_rorv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_xor_epi64(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_pd(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_permutevar_pd(__m256d __a, __m256i __c)
Copies the values in a 256-bit vector of [4 x double] as specified by the 256-bit integer vector oper...
Definition avxintrin.h:829
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvtpd_epi32(__m256d __a)
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x i32].
Definition avxintrin.h:2269
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sqrt_ps(__m256 __a)
Calculates the square roots of the values in a 256-bit vector of [8 x float].
Definition avxintrin.h:354
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_pd(__m256d __a, __m256d __b)
Subtracts two 256-bit vectors of [4 x double].
Definition avxintrin.h:116
static __inline __m128 __DEFAULT_FN_ATTRS _mm256_cvtpd_ps(__m256d __a)
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x float].
Definition avxintrin.h:2194
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_mul_pd(__m256d __a, __m256d __b)
Multiplies two 256-bit vectors of [4 x double].
Definition avxintrin.h:304
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_permutevar_ps(__m256 __a, __m256i __c)
Copies the values stored in a 256-bit vector of [8 x float] as specified by the 256-bit integer vecto...
Definition avxintrin.h:973
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_div_pd(__m256d __a, __m256d __b)
Divides two 256-bit vectors of [4 x double].
Definition avxintrin.h:186
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_movehdup_ps(__m256 __a)
Moves and duplicates odd-indexed values from a 256-bit vector of [8 x float] to float values in a 256...
Definition avxintrin.h:2361
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a, __m256 __b)
Subtracts two 256-bit vectors of [8 x float].
Definition avxintrin.h:132
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_cvtepi32_ps(__m256i __a)
Converts a vector of [8 x i32] into a vector of [8 x float].
Definition avxintrin.h:2179
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_unpacklo_ps(__m256 __a, __m256 __b)
Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the two 256-bit vectors of [8 x float] ...
Definition avxintrin.h:2504
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32(__m256d __a)
Converts a 256-bit vector of [4 x double] into four signed truncated (rounded toward zero) 32-bit int...
Definition avxintrin.h:2249
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_cvtps_pd(__m128 __a)
Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4 x double].
Definition avxintrin.h:2229
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvtps_epi32(__m256 __a)
Converts a vector of [8 x float] into a vector of [8 x i32].
Definition avxintrin.h:2213
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_unpackhi_ps(__m256 __a, __m256 __b)
Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the two 256-bit vectors of [8 x float] ...
Definition avxintrin.h:2478
static __inline __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutevar_pd(__m128d __a, __m128i __c)
Copies the values in a 128-bit vector of [2 x double] as specified by the 128-bit integer vector oper...
Definition avxintrin.h:791
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_min_ps(__m256 __a, __m256 __b)
Compares two 256-bit vectors of [8 x float] and returns the lesser of each pair of values.
Definition avxintrin.h:286
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_cvtepi32_pd(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x double].
Definition avxintrin.h:2165
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32(__m256 __a)
Converts a vector of [8 x float] into eight signed truncated (rounded toward zero) 32-bit integers re...
Definition avxintrin.h:2289
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_max_ps(__m256 __a, __m256 __b)
Compares two 256-bit vectors of [8 x float] and returns the greater of each pair of values.
Definition avxintrin.h:244
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition avxintrin.h:4298
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi32(int __i)
Constructs a 256-bit integer vector of [8 x i32], with each of the 32-bit integral vector elements se...
Definition avxintrin.h:4215
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_add_pd(__m256d __a, __m256d __b)
Adds two 256-bit vectors of [4 x double].
Definition avxintrin.h:82
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sqrt_pd(__m256d __a)
Calculates the square roots of the values in a 256-bit vector of [4 x double].
Definition avxintrin.h:337
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi64x(long long __q)
Constructs a 256-bit integer vector of [4 x i64], with each of the 64-bit integral vector elements se...
Definition avxintrin.h:4272
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_add_ps(__m256 __a, __m256 __b)
Adds two 256-bit vectors of [8 x float].
Definition avxintrin.h:98
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_moveldup_ps(__m256 __a)
Moves and duplicates even-indexed values from a 256-bit vector of [8 x float] to float values in a 25...
Definition avxintrin.h:2386
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_movedup_pd(__m256d __a)
Moves and duplicates double-precision floating point values from a 256-bit vector of [4 x double] to ...
Definition avxintrin.h:2408
static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutevar_ps(__m128 __a, __m128i __c)
Copies the values stored in a 128-bit vector of [4 x float] as specified by the 128-bit integer vecto...
Definition avxintrin.h:883
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition avxintrin.h:4286
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_mul_ps(__m256 __a, __m256 __b)
Multiplies two 256-bit vectors of [8 x float].
Definition avxintrin.h:320
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_min_pd(__m256d __a, __m256d __b)
Compares two 256-bit vectors of [4 x double] and returns the lesser of each pair of values.
Definition avxintrin.h:265
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition avxintrin.h:4310
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_div_ps(__m256 __a, __m256 __b)
Divides two 256-bit vectors of [8 x float].
Definition avxintrin.h:202
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_unpacklo_pd(__m256d __a, __m256d __b)
Unpacks the even-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves the...
Definition avxintrin.h:2452
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_unpackhi_pd(__m256d __a, __m256d __b)
Unpacks the odd-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves them...
Definition avxintrin.h:2431
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_max_pd(__m256d __a, __m256d __b)
Compares two 256-bit vectors of [4 x double] and returns the greater of each pair of values.
Definition avxintrin.h:223
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_pd(__m128d __a, __m128d __b)
Performs an element-by-element division of two 128-bit vectors of [2 x double].
Definition emmintrin.h:218
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_srli_epi64(__m128i __a, int __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
Definition emmintrin.h:3050
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, __m128i __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2822
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns a vecto...
Definition emmintrin.h:304
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, __m128i __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
Definition emmintrin.h:3031
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_epi32(__m128i __a, __m128i __b)
Subtracts the corresponding 32-bit integer values in the operands.
Definition emmintrin.h:2536
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_slli_epi64(__m128i __a, int __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2841
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi32_pd(__m128i __a)
Converts the lower two integer elements of a 128-bit vector of [4 x i32] into two double-precision fl...
Definition emmintrin.h:1323
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition emmintrin.h:3878
static __inline__ void int __a
Definition emmintrin.h:4077
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_epi64(__m128i __a, __m128i __b)
Subtracts the corresponding elements of two [2 x i64] vectors.
Definition emmintrin.h:2571
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a)
Calculates the square root of the each of two values stored in a 128-bit vector of [2 x double].
Definition emmintrin.h:259
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_pd(__m128d __a, __m128d __b)
Multiplies two 128-bit vectors of [2 x double].
Definition emmintrin.h:177
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition emmintrin.h:3493
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi64x(long long __q)
Initializes both values in a 128-bit integer vector with the specified 64-bit integer value.
Definition emmintrin.h:3674
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpacklo_epi32(__m128i __a, __m128i __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x i32] and interleaves them i...
Definition emmintrin.h:4576
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_srli_epi32(__m128i __a, int __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
Definition emmintrin.h:3014
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, __m128i __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2858
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpacklo_epi64(__m128i __a, __m128i __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them into...
Definition emmintrin.h:4597
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition emmintrin.h:1867
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_epu32(__m128i __a, __m128i __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the corresponding elements o...
Definition emmintrin.h:2464
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_epi32(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [4 x i32], saving the lower 32 bits of each...
Definition emmintrin.h:2105
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, __m128i __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
Definition emmintrin.h:3067
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtps_pd(__m128 __a)
Converts the lower two single-precision floating-point elements of a 128-bit vector of [4 x float] in...
Definition emmintrin.h:1301
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a)
Converts a vector of [4 x float] into four signed truncated (rounded toward zero) 32-bit integers,...
Definition emmintrin.h:3362
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpackhi_epi32(__m128i __a, __m128i __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x i32] and interleaves them ...
Definition emmintrin.h:4469
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_pd(__m128d __a, __m128d __b)
Adds two 128-bit vectors of [2 x double].
Definition emmintrin.h:98
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpackhi_pd(__m128d __a, __m128d __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them ...
Definition emmintrin.h:4666
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
Definition emmintrin.h:2674
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns a vecto...
Definition emmintrin.h:350
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpacklo_pd(__m128d __a, __m128d __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them i...
Definition emmintrin.h:4686
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpackhi_epi64(__m128i __a, __m128i __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them int...
Definition emmintrin.h:4490
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_slli_epi32(__m128i __a, int __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2805
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_epi64(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [2 x i64], saving the lower 64 bits of each...
Definition emmintrin.h:2143
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi32_ps(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x float].
Definition emmintrin.h:3325
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value.
Definition emmintrin.h:3709
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, __m128i __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2934
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_pd(__m128d __a, __m128d __b)
Subtracts two 128-bit vectors of [2 x double].
Definition emmintrin.h:138
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_srai_epi32(__m128i __a, int __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2916
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32].
Definition emmintrin.h:3343
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
Computes a negated multiply-add of 128-bit vectors of [4 x float].
Definition fmaintrin.h:248
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
Computes a multiply-add of 128-bit vectors of [4 x float].
Definition fmaintrin.h:48
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
Computes a negated multiply-add of 128-bit vectors of [2 x double].
Definition fmaintrin.h:269
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
Computes a multiply-subtract of 256-bit vectors of [4 x double].
Definition fmaintrin.h:615
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
Computes a multiply-subtract of 256-bit vectors of [8 x float].
Definition fmaintrin.h:594
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
Computes a multiply-subtract of 128-bit vectors of [4 x float].
Definition fmaintrin.h:148
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
Computes a negated multiply-add of 256-bit vectors of [4 x double].
Definition fmaintrin.h:657
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
Computes a multiply-add of 256-bit vectors of [8 x float].
Definition fmaintrin.h:552
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
Computes a negated multiply-subtract of 256-bit vectors of [4 x double].
Definition fmaintrin.h:699
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
Computes a negated multiply-subtract of 256-bit vectors of [8 x float].
Definition fmaintrin.h:678
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
Computes a multiply-add of 128-bit vectors of [2 x double].
Definition fmaintrin.h:69
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
Computes a multiply-add of 256-bit vectors of [4 x double].
Definition fmaintrin.h:573
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
Computes a negated multiply-subtract of 128-bit vectors of [4 x float].
Definition fmaintrin.h:348
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
Computes a negated multiply-add of 256-bit vectors of [8 x float].
Definition fmaintrin.h:636
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
Computes a multiply-subtract of 128-bit vectors of [2 x double].
Definition fmaintrin.h:169
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
Computes a negated multiply-subtract of 128-bit vectors of [2 x double].
Definition fmaintrin.h:369
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movedup_pd(__m128d __a)
Moves and duplicates the double-precision value in the lower bits of a 128-bit vector of [2 x double]...
Definition pmmintrin.h:248
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_moveldup_ps(__m128 __a)
Duplicates even-indexed values from a 128-bit vector of [4 x float] to float values stored in a 128-b...
Definition pmmintrin.h:150
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movehdup_ps(__m128 __a)
Moves and duplicates odd-indexed values from a 128-bit vector of [4 x float] to float values stored i...
Definition pmmintrin.h:129
__inline unsigned int unsigned int unsigned int * __P
Definition bmi2intrin.h:25
__inline unsigned int unsigned int __Y
Definition bmi2intrin.h:19
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_epi32(__m128i __V1, __m128i __V2)
Multiplies corresponding even-indexed elements of two 128-bit vectors of [4 x i32] and returns a 128-...
Definition smmintrin.h:562
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_epi32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x i32] and returns a 128-bit vector ...
Definition smmintrin.h:760
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepu16_epi64(__m128i __V)
Zero-extends each of the lower two 16-bit integer elements of a 128-bit integer vector of [8 x i16] t...
Definition smmintrin.h:1426
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_epu32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x u32] and returns a 128-bit vector ...
Definition smmintrin.h:796
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepu32_epi64(__m128i __V)
Zero-extends each of the lower two 32-bit integer elements of a 128-bit integer vector of [4 x i32] t...
Definition smmintrin.h:1445
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi8_epi32(__m128i __V)
Sign-extends each of the lower four 8-bit integer elements of a 128-bit vector of [16 x i8] to 32-bit...
Definition smmintrin.h:1248
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi16_epi64(__m128i __V)
Sign-extends each of the lower two 16-bit integer elements of a 128-bit integer vector of [8 x i16] t...
Definition smmintrin.h:1309
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepu8_epi32(__m128i __V)
Zero-extends each of the lower four 8-bit integer elements of a 128-bit vector of [16 x i8] to 32-bit...
Definition smmintrin.h:1369
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi8_epi64(__m128i __V)
Sign-extends each of the lower two 8-bit integer elements of a 128-bit integer vector of [16 x i8] to...
Definition smmintrin.h:1269
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_epi32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x i32] and returns a 128-bit vector ...
Definition smmintrin.h:742
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mullo_epi32(__m128i __V1, __m128i __V2)
Multiples corresponding elements of two 128-bit vectors of [4 x i32] and returns the lower 32 bits of...
Definition smmintrin.h:543
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi16_epi32(__m128i __V)
Sign-extends each of the lower four 16-bit integer elements of a 128-bit integer vector of [8 x i16] ...
Definition smmintrin.h:1290
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_epu32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x u32] and returns a 128-bit vector ...
Definition smmintrin.h:778
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi32_epi64(__m128i __V)
Sign-extends each of the lower two 32-bit integer elements of a 128-bit integer vector of [4 x i32] t...
Definition smmintrin.h:1328
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepu16_epi32(__m128i __V)
Zero-extends each of the lower four 16-bit integer elements of a 128-bit integer vector of [8 x i16] ...
Definition smmintrin.h:1407
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepu8_epi64(__m128i __V)
Zero-extends each of the lower two 8-bit integer elements of a 128-bit integer vector of [16 x i8] to...
Definition smmintrin.h:1388
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition tmmintrin.h:131
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpacklo_ps(__m128 __a, __m128 __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x float] and interleaves them...
Definition xmmintrin.h:2783
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_ps(__m128 __a, __m128 __b)
Adds two 128-bit vectors of [4 x float], and returns the results of the addition.
Definition xmmintrin.h:98
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpackhi_ps(__m128 __a, __m128 __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x float] and interleaves the...
Definition xmmintrin.h:2762
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_ps(__m128 __a, __m128 __b)
Divides two 128-bit vectors of [4 x float].
Definition xmmintrin.h:218
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the greater of each pair of values.
Definition xmmintrin.h:415
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the lesser of each pair of values.
Definition xmmintrin.h:369
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ps(__m128 __a)
Calculates the square roots of the values stored in a 128-bit vector of [4 x float].
Definition xmmintrin.h:252
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition xmmintrin.h:2021
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_ps(__m128 __a, __m128 __b)
Subtracts each of the values of the second operand from the first operand, both of which are 128-bit ...
Definition xmmintrin.h:139
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_ps(__m128 __a, __m128 __b)
Multiplies two 128-bit vectors of [4 x float] and returns the results of the multiplication.
Definition xmmintrin.h:179