clang 22.0.0git
avx512vlintrin.h
Go to the documentation of this file.
1/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __AVX512VLINTRIN_H
15#define __AVX512VLINTRIN_H
16
17#define __DEFAULT_FN_ATTRS128 \
18 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), \
19 __min_vector_width__(128)))
20#define __DEFAULT_FN_ATTRS256 \
21 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), \
22 __min_vector_width__(256)))
23
24#if defined(__cplusplus) && (__cplusplus >= 201103L)
25#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
26#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
27#else
28#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
29#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
30#endif
31
32typedef short __v2hi __attribute__((__vector_size__(4)));
33typedef char __v4qi __attribute__((__vector_size__(4)));
34typedef char __v2qi __attribute__((__vector_size__(2)));
35
36/* Integer compare */
37
38#define _mm_cmpeq_epi32_mask(A, B) \
39 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
40#define _mm_mask_cmpeq_epi32_mask(k, A, B) \
41 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
42#define _mm_cmpge_epi32_mask(A, B) \
43 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
44#define _mm_mask_cmpge_epi32_mask(k, A, B) \
45 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
46#define _mm_cmpgt_epi32_mask(A, B) \
47 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
48#define _mm_mask_cmpgt_epi32_mask(k, A, B) \
49 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
50#define _mm_cmple_epi32_mask(A, B) \
51 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
52#define _mm_mask_cmple_epi32_mask(k, A, B) \
53 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
54#define _mm_cmplt_epi32_mask(A, B) \
55 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
56#define _mm_mask_cmplt_epi32_mask(k, A, B) \
57 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
58#define _mm_cmpneq_epi32_mask(A, B) \
59 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
60#define _mm_mask_cmpneq_epi32_mask(k, A, B) \
61 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
62
63#define _mm256_cmpeq_epi32_mask(A, B) \
64 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
65#define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
66 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
67#define _mm256_cmpge_epi32_mask(A, B) \
68 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
69#define _mm256_mask_cmpge_epi32_mask(k, A, B) \
70 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
71#define _mm256_cmpgt_epi32_mask(A, B) \
72 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
73#define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
74 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
75#define _mm256_cmple_epi32_mask(A, B) \
76 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
77#define _mm256_mask_cmple_epi32_mask(k, A, B) \
78 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
79#define _mm256_cmplt_epi32_mask(A, B) \
80 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
81#define _mm256_mask_cmplt_epi32_mask(k, A, B) \
82 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
83#define _mm256_cmpneq_epi32_mask(A, B) \
84 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
85#define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
86 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
87
88#define _mm_cmpeq_epu32_mask(A, B) \
89 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
90#define _mm_mask_cmpeq_epu32_mask(k, A, B) \
91 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
92#define _mm_cmpge_epu32_mask(A, B) \
93 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
94#define _mm_mask_cmpge_epu32_mask(k, A, B) \
95 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
96#define _mm_cmpgt_epu32_mask(A, B) \
97 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
98#define _mm_mask_cmpgt_epu32_mask(k, A, B) \
99 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
100#define _mm_cmple_epu32_mask(A, B) \
101 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
102#define _mm_mask_cmple_epu32_mask(k, A, B) \
103 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
104#define _mm_cmplt_epu32_mask(A, B) \
105 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
106#define _mm_mask_cmplt_epu32_mask(k, A, B) \
107 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
108#define _mm_cmpneq_epu32_mask(A, B) \
109 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
110#define _mm_mask_cmpneq_epu32_mask(k, A, B) \
111 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
112
113#define _mm256_cmpeq_epu32_mask(A, B) \
114 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
115#define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
116 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
117#define _mm256_cmpge_epu32_mask(A, B) \
118 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
119#define _mm256_mask_cmpge_epu32_mask(k, A, B) \
120 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
121#define _mm256_cmpgt_epu32_mask(A, B) \
122 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
123#define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
124 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
125#define _mm256_cmple_epu32_mask(A, B) \
126 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
127#define _mm256_mask_cmple_epu32_mask(k, A, B) \
128 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
129#define _mm256_cmplt_epu32_mask(A, B) \
130 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
131#define _mm256_mask_cmplt_epu32_mask(k, A, B) \
132 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
133#define _mm256_cmpneq_epu32_mask(A, B) \
134 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
135#define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
136 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
137
138#define _mm_cmpeq_epi64_mask(A, B) \
139 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
140#define _mm_mask_cmpeq_epi64_mask(k, A, B) \
141 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
142#define _mm_cmpge_epi64_mask(A, B) \
143 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
144#define _mm_mask_cmpge_epi64_mask(k, A, B) \
145 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
146#define _mm_cmpgt_epi64_mask(A, B) \
147 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
148#define _mm_mask_cmpgt_epi64_mask(k, A, B) \
149 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
150#define _mm_cmple_epi64_mask(A, B) \
151 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
152#define _mm_mask_cmple_epi64_mask(k, A, B) \
153 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
154#define _mm_cmplt_epi64_mask(A, B) \
155 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
156#define _mm_mask_cmplt_epi64_mask(k, A, B) \
157 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
158#define _mm_cmpneq_epi64_mask(A, B) \
159 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
160#define _mm_mask_cmpneq_epi64_mask(k, A, B) \
161 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
162
163#define _mm256_cmpeq_epi64_mask(A, B) \
164 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
165#define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
166 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
167#define _mm256_cmpge_epi64_mask(A, B) \
168 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
169#define _mm256_mask_cmpge_epi64_mask(k, A, B) \
170 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
171#define _mm256_cmpgt_epi64_mask(A, B) \
172 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
173#define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
174 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
175#define _mm256_cmple_epi64_mask(A, B) \
176 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
177#define _mm256_mask_cmple_epi64_mask(k, A, B) \
178 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
179#define _mm256_cmplt_epi64_mask(A, B) \
180 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
181#define _mm256_mask_cmplt_epi64_mask(k, A, B) \
182 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
183#define _mm256_cmpneq_epi64_mask(A, B) \
184 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
185#define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
186 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
187
188#define _mm_cmpeq_epu64_mask(A, B) \
189 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
190#define _mm_mask_cmpeq_epu64_mask(k, A, B) \
191 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
192#define _mm_cmpge_epu64_mask(A, B) \
193 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
194#define _mm_mask_cmpge_epu64_mask(k, A, B) \
195 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
196#define _mm_cmpgt_epu64_mask(A, B) \
197 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
198#define _mm_mask_cmpgt_epu64_mask(k, A, B) \
199 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
200#define _mm_cmple_epu64_mask(A, B) \
201 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
202#define _mm_mask_cmple_epu64_mask(k, A, B) \
203 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
204#define _mm_cmplt_epu64_mask(A, B) \
205 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
206#define _mm_mask_cmplt_epu64_mask(k, A, B) \
207 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
208#define _mm_cmpneq_epu64_mask(A, B) \
209 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
210#define _mm_mask_cmpneq_epu64_mask(k, A, B) \
211 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
212
213#define _mm256_cmpeq_epu64_mask(A, B) \
214 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
215#define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
216 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
217#define _mm256_cmpge_epu64_mask(A, B) \
218 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
219#define _mm256_mask_cmpge_epu64_mask(k, A, B) \
220 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
221#define _mm256_cmpgt_epu64_mask(A, B) \
222 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
223#define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
224 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
225#define _mm256_cmple_epu64_mask(A, B) \
226 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
227#define _mm256_mask_cmple_epu64_mask(k, A, B) \
228 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
229#define _mm256_cmplt_epu64_mask(A, B) \
230 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
231#define _mm256_mask_cmplt_epu64_mask(k, A, B) \
232 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
233#define _mm256_cmpneq_epu64_mask(A, B) \
234 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
235#define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
236 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
237
238static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
239_mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
240 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
241 (__v8si)_mm256_add_epi32(__A, __B),
242 (__v8si)__W);
243}
244
245static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
246_mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
247 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
248 (__v8si)_mm256_add_epi32(__A, __B),
249 (__v8si)_mm256_setzero_si256());
250}
251
252static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
253_mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
254 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
255 (__v4di)_mm256_add_epi64(__A, __B),
256 (__v4di)__W);
257}
258
259static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
260_mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
261 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
262 (__v4di)_mm256_add_epi64(__A, __B),
263 (__v4di)_mm256_setzero_si256());
264}
265
266static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
267_mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
268 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
269 (__v8si)_mm256_sub_epi32(__A, __B),
270 (__v8si)__W);
271}
272
273static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
274_mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
275 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
276 (__v8si)_mm256_sub_epi32(__A, __B),
277 (__v8si)_mm256_setzero_si256());
278}
279
280static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
281_mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
282 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
283 (__v4di)_mm256_sub_epi64(__A, __B),
284 (__v4di)__W);
285}
286
287static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
288_mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
289 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
290 (__v4di)_mm256_sub_epi64(__A, __B),
291 (__v4di)_mm256_setzero_si256());
292}
293
294static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
295_mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
296 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
297 (__v4si)_mm_add_epi32(__A, __B),
298 (__v4si)__W);
299}
300
301static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
302_mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
303 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
304 (__v4si)_mm_add_epi32(__A, __B),
305 (__v4si)_mm_setzero_si128());
306}
307
308static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
309_mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
310 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
311 (__v2di)_mm_add_epi64(__A, __B),
312 (__v2di)__W);
313}
314
315static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
316_mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
317 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
318 (__v2di)_mm_add_epi64(__A, __B),
319 (__v2di)_mm_setzero_si128());
320}
321
322static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
323_mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
324 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
325 (__v4si)_mm_sub_epi32(__A, __B),
326 (__v4si)__W);
327}
328
329static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
330_mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
331 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
332 (__v4si)_mm_sub_epi32(__A, __B),
333 (__v4si)_mm_setzero_si128());
334}
335
336static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
337_mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
338 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
339 (__v2di)_mm_sub_epi64(__A, __B),
340 (__v2di)__W);
341}
342
343static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
344_mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
345 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
346 (__v2di)_mm_sub_epi64(__A, __B),
347 (__v2di)_mm_setzero_si128());
348}
349
350static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
351_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
352 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
353 (__v4di)_mm256_mul_epi32(__X, __Y),
354 (__v4di)__W);
355}
356
357static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
358_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) {
359 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
360 (__v4di)_mm256_mul_epi32(__X, __Y),
361 (__v4di)_mm256_setzero_si256());
362}
363
364static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
365_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
366 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
367 (__v2di)_mm_mul_epi32(__X, __Y),
368 (__v2di)__W);
369}
370
371static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
372_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) {
373 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
374 (__v2di)_mm_mul_epi32(__X, __Y),
375 (__v2di)_mm_setzero_si128());
376}
377
378static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
379_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
380 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
381 (__v4di)_mm256_mul_epu32(__X, __Y),
382 (__v4di)__W);
383}
384
385static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
386_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) {
387 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
388 (__v4di)_mm256_mul_epu32(__X, __Y),
389 (__v4di)_mm256_setzero_si256());
390}
391
392static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
393_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
394 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
395 (__v2di)_mm_mul_epu32(__X, __Y),
396 (__v2di)__W);
397}
398
399static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
400_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) {
401 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
402 (__v2di)_mm_mul_epu32(__X, __Y),
403 (__v2di)_mm_setzero_si128());
404}
405
406static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
407_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
408 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
409 (__v8si)_mm256_mullo_epi32(__A, __B),
410 (__v8si)_mm256_setzero_si256());
411}
412
413static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
414_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
415 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
416 (__v8si)_mm256_mullo_epi32(__A, __B),
417 (__v8si)__W);
418}
419
420static __inline__ __m128i __DEFAULT_FN_ATTRS128
421_mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
422{
423 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
424 (__v4si)_mm_mullo_epi32(__A, __B),
425 (__v4si)_mm_setzero_si128());
426}
427
428static __inline__ __m128i __DEFAULT_FN_ATTRS128
429_mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
430{
431 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
432 (__v4si)_mm_mullo_epi32(__A, __B),
433 (__v4si)__W);
434}
435
436static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
437_mm256_and_epi32(__m256i __a, __m256i __b) {
438 return (__m256i)((__v8su)__a & (__v8su)__b);
439}
440
441static __inline__ __m256i __DEFAULT_FN_ATTRS256
442_mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
443{
444 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
445 (__v8si)_mm256_and_epi32(__A, __B),
446 (__v8si)__W);
447}
448
449static __inline__ __m256i __DEFAULT_FN_ATTRS256
450_mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
451{
452 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
453}
454
455static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
456_mm_and_epi32(__m128i __a, __m128i __b) {
457 return (__m128i)((__v4su)__a & (__v4su)__b);
458}
459
460static __inline__ __m128i __DEFAULT_FN_ATTRS128
461_mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
462{
463 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
464 (__v4si)_mm_and_epi32(__A, __B),
465 (__v4si)__W);
466}
467
468static __inline__ __m128i __DEFAULT_FN_ATTRS128
469_mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
470{
471 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
472}
473
474static __inline__ __m256i __DEFAULT_FN_ATTRS256
475_mm256_andnot_epi32(__m256i __A, __m256i __B)
476{
477 return (__m256i)(~(__v8su)__A & (__v8su)__B);
478}
479
480static __inline__ __m256i __DEFAULT_FN_ATTRS256
481_mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
482{
483 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
484 (__v8si)_mm256_andnot_epi32(__A, __B),
485 (__v8si)__W);
486}
487
488static __inline__ __m256i __DEFAULT_FN_ATTRS256
489_mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
490{
492 __U, __A, __B);
493}
494
495static __inline__ __m128i __DEFAULT_FN_ATTRS128
496_mm_andnot_epi32(__m128i __A, __m128i __B)
497{
498 return (__m128i)(~(__v4su)__A & (__v4su)__B);
499}
500
501static __inline__ __m128i __DEFAULT_FN_ATTRS128
502_mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
503{
504 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
505 (__v4si)_mm_andnot_epi32(__A, __B),
506 (__v4si)__W);
507}
508
509static __inline__ __m128i __DEFAULT_FN_ATTRS128
510_mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
511{
512 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
513}
514
515static __inline__ __m256i __DEFAULT_FN_ATTRS256
516_mm256_or_epi32(__m256i __a, __m256i __b)
517{
518 return (__m256i)((__v8su)__a | (__v8su)__b);
519}
520
521static __inline__ __m256i __DEFAULT_FN_ATTRS256
522_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
523{
524 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
525 (__v8si)_mm256_or_epi32(__A, __B),
526 (__v8si)__W);
527}
528
529static __inline__ __m256i __DEFAULT_FN_ATTRS256
530_mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
531{
532 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
533}
534
535static __inline__ __m128i __DEFAULT_FN_ATTRS128
536_mm_or_epi32(__m128i __a, __m128i __b)
537{
538 return (__m128i)((__v4su)__a | (__v4su)__b);
539}
540
541static __inline__ __m128i __DEFAULT_FN_ATTRS128
542_mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
543{
544 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
545 (__v4si)_mm_or_epi32(__A, __B),
546 (__v4si)__W);
547}
548
549static __inline__ __m128i __DEFAULT_FN_ATTRS128
550_mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
551{
552 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
553}
554
555static __inline__ __m256i __DEFAULT_FN_ATTRS256
556_mm256_xor_epi32(__m256i __a, __m256i __b)
557{
558 return (__m256i)((__v8su)__a ^ (__v8su)__b);
559}
560
561static __inline__ __m256i __DEFAULT_FN_ATTRS256
562_mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
563{
564 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
565 (__v8si)_mm256_xor_epi32(__A, __B),
566 (__v8si)__W);
567}
568
569static __inline__ __m256i __DEFAULT_FN_ATTRS256
570_mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
571{
572 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
573}
574
575static __inline__ __m128i __DEFAULT_FN_ATTRS128
576_mm_xor_epi32(__m128i __a, __m128i __b)
577{
578 return (__m128i)((__v4su)__a ^ (__v4su)__b);
579}
580
581static __inline__ __m128i __DEFAULT_FN_ATTRS128
582_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
583{
584 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
585 (__v4si)_mm_xor_epi32(__A, __B),
586 (__v4si)__W);
587}
588
589static __inline__ __m128i __DEFAULT_FN_ATTRS128
590_mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
591{
592 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
593}
594
595static __inline__ __m256i __DEFAULT_FN_ATTRS256
596_mm256_and_epi64(__m256i __a, __m256i __b)
597{
598 return (__m256i)((__v4du)__a & (__v4du)__b);
599}
600
601static __inline__ __m256i __DEFAULT_FN_ATTRS256
602_mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
603{
604 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
605 (__v4di)_mm256_and_epi64(__A, __B),
606 (__v4di)__W);
607}
608
609static __inline__ __m256i __DEFAULT_FN_ATTRS256
610_mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
611{
612 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
613}
614
615static __inline__ __m128i __DEFAULT_FN_ATTRS128
616_mm_and_epi64(__m128i __a, __m128i __b)
617{
618 return (__m128i)((__v2du)__a & (__v2du)__b);
619}
620
621static __inline__ __m128i __DEFAULT_FN_ATTRS128
622_mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
623{
624 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
625 (__v2di)_mm_and_epi64(__A, __B),
626 (__v2di)__W);
627}
628
629static __inline__ __m128i __DEFAULT_FN_ATTRS128
630_mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
631{
632 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
633}
634
635static __inline__ __m256i __DEFAULT_FN_ATTRS256
636_mm256_andnot_epi64(__m256i __A, __m256i __B)
637{
638 return (__m256i)(~(__v4du)__A & (__v4du)__B);
639}
640
641static __inline__ __m256i __DEFAULT_FN_ATTRS256
642_mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
643{
644 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
645 (__v4di)_mm256_andnot_epi64(__A, __B),
646 (__v4di)__W);
647}
648
649static __inline__ __m256i __DEFAULT_FN_ATTRS256
650_mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
651{
653 __U, __A, __B);
654}
655
656static __inline__ __m128i __DEFAULT_FN_ATTRS128
657_mm_andnot_epi64(__m128i __A, __m128i __B)
658{
659 return (__m128i)(~(__v2du)__A & (__v2du)__B);
660}
661
662static __inline__ __m128i __DEFAULT_FN_ATTRS128
663_mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
664{
665 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
666 (__v2di)_mm_andnot_epi64(__A, __B),
667 (__v2di)__W);
668}
669
670static __inline__ __m128i __DEFAULT_FN_ATTRS128
671_mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
672{
673 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
674}
675
676static __inline__ __m256i __DEFAULT_FN_ATTRS256
677_mm256_or_epi64(__m256i __a, __m256i __b)
678{
679 return (__m256i)((__v4du)__a | (__v4du)__b);
680}
681
682static __inline__ __m256i __DEFAULT_FN_ATTRS256
683_mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
684{
685 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
686 (__v4di)_mm256_or_epi64(__A, __B),
687 (__v4di)__W);
688}
689
690static __inline__ __m256i __DEFAULT_FN_ATTRS256
691_mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
692{
693 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
694}
695
696static __inline__ __m128i __DEFAULT_FN_ATTRS128
697_mm_or_epi64(__m128i __a, __m128i __b)
698{
699 return (__m128i)((__v2du)__a | (__v2du)__b);
700}
701
702static __inline__ __m128i __DEFAULT_FN_ATTRS128
703_mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
704{
705 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
706 (__v2di)_mm_or_epi64(__A, __B),
707 (__v2di)__W);
708}
709
710static __inline__ __m128i __DEFAULT_FN_ATTRS128
711_mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
712{
713 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
714}
715
716static __inline__ __m256i __DEFAULT_FN_ATTRS256
717_mm256_xor_epi64(__m256i __a, __m256i __b)
718{
719 return (__m256i)((__v4du)__a ^ (__v4du)__b);
720}
721
722static __inline__ __m256i __DEFAULT_FN_ATTRS256
723_mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
724{
725 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
726 (__v4di)_mm256_xor_epi64(__A, __B),
727 (__v4di)__W);
728}
729
730static __inline__ __m256i __DEFAULT_FN_ATTRS256
731_mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
732{
733 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
734}
735
736static __inline__ __m128i __DEFAULT_FN_ATTRS128
737_mm_xor_epi64(__m128i __a, __m128i __b)
738{
739 return (__m128i)((__v2du)__a ^ (__v2du)__b);
740}
741
742static __inline__ __m128i __DEFAULT_FN_ATTRS128
743_mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
744 __m128i __B)
745{
746 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
747 (__v2di)_mm_xor_epi64(__A, __B),
748 (__v2di)__W);
749}
750
751static __inline__ __m128i __DEFAULT_FN_ATTRS128
752_mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
753{
754 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
755}
756
757#define _mm_cmp_epi32_mask(a, b, p) \
758 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
759 (__v4si)(__m128i)(b), (int)(p), \
760 (__mmask8)-1))
761
762#define _mm_mask_cmp_epi32_mask(m, a, b, p) \
763 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
764 (__v4si)(__m128i)(b), (int)(p), \
765 (__mmask8)(m)))
766
767#define _mm_cmp_epu32_mask(a, b, p) \
768 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
769 (__v4si)(__m128i)(b), (int)(p), \
770 (__mmask8)-1))
771
772#define _mm_mask_cmp_epu32_mask(m, a, b, p) \
773 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
774 (__v4si)(__m128i)(b), (int)(p), \
775 (__mmask8)(m)))
776
777#define _mm256_cmp_epi32_mask(a, b, p) \
778 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
779 (__v8si)(__m256i)(b), (int)(p), \
780 (__mmask8)-1))
781
782#define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
783 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
784 (__v8si)(__m256i)(b), (int)(p), \
785 (__mmask8)(m)))
786
787#define _mm256_cmp_epu32_mask(a, b, p) \
788 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
789 (__v8si)(__m256i)(b), (int)(p), \
790 (__mmask8)-1))
791
792#define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
793 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
794 (__v8si)(__m256i)(b), (int)(p), \
795 (__mmask8)(m)))
796
797#define _mm_cmp_epi64_mask(a, b, p) \
798 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
799 (__v2di)(__m128i)(b), (int)(p), \
800 (__mmask8)-1))
801
802#define _mm_mask_cmp_epi64_mask(m, a, b, p) \
803 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
804 (__v2di)(__m128i)(b), (int)(p), \
805 (__mmask8)(m)))
806
807#define _mm_cmp_epu64_mask(a, b, p) \
808 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
809 (__v2di)(__m128i)(b), (int)(p), \
810 (__mmask8)-1))
811
812#define _mm_mask_cmp_epu64_mask(m, a, b, p) \
813 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
814 (__v2di)(__m128i)(b), (int)(p), \
815 (__mmask8)(m)))
816
817#define _mm256_cmp_epi64_mask(a, b, p) \
818 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
819 (__v4di)(__m256i)(b), (int)(p), \
820 (__mmask8)-1))
821
822#define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
823 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
824 (__v4di)(__m256i)(b), (int)(p), \
825 (__mmask8)(m)))
826
827#define _mm256_cmp_epu64_mask(a, b, p) \
828 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
829 (__v4di)(__m256i)(b), (int)(p), \
830 (__mmask8)-1))
831
832#define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
833 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
834 (__v4di)(__m256i)(b), (int)(p), \
835 (__mmask8)(m)))
836
837#define _mm256_cmp_ps_mask(a, b, p) \
838 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
839 (__v8sf)(__m256)(b), (int)(p), \
840 (__mmask8)-1))
841
842#define _mm256_mask_cmp_ps_mask(m, a, b, p) \
843 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
844 (__v8sf)(__m256)(b), (int)(p), \
845 (__mmask8)(m)))
846
847#define _mm256_cmp_pd_mask(a, b, p) \
848 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
849 (__v4df)(__m256d)(b), (int)(p), \
850 (__mmask8)-1))
851
852#define _mm256_mask_cmp_pd_mask(m, a, b, p) \
853 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
854 (__v4df)(__m256d)(b), (int)(p), \
855 (__mmask8)(m)))
856
857#define _mm_cmp_ps_mask(a, b, p) \
858 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
859 (__v4sf)(__m128)(b), (int)(p), \
860 (__mmask8)-1))
861
862#define _mm_mask_cmp_ps_mask(m, a, b, p) \
863 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
864 (__v4sf)(__m128)(b), (int)(p), \
865 (__mmask8)(m)))
866
867#define _mm_cmp_pd_mask(a, b, p) \
868 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
869 (__v2df)(__m128d)(b), (int)(p), \
870 (__mmask8)-1))
871
872#define _mm_mask_cmp_pd_mask(m, a, b, p) \
873 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
874 (__v2df)(__m128d)(b), (int)(p), \
875 (__mmask8)(m)))
876
877static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
878_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
879 return (__m128d)__builtin_ia32_selectpd_128(
880 (__mmask8)__U, (__v2df)_mm_fmadd_pd(__A, __B, __C), (__v2df)__A);
881}
882
883static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
884_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
885 return (__m128d)__builtin_ia32_selectpd_128(
886 (__mmask8)__U, (__v2df)_mm_fmadd_pd(__A, __B, __C), (__v2df)__C);
887}
888
889static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
890_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
891 return (__m128d)__builtin_ia32_selectpd_128(
892 (__mmask8)__U, (__v2df)_mm_fmadd_pd(__A, __B, __C),
893 (__v2df)_mm_setzero_pd());
894}
895
896static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
897_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
898 return (__m128d)__builtin_ia32_selectpd_128(
899 (__mmask8)__U, (__v2df)_mm_fmsub_pd(__A, __B, __C), (__v2df)__A);
900}
901
902static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
903_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
904 return (__m128d)__builtin_ia32_selectpd_128(
905 (__mmask8)__U, (__v2df)_mm_fmsub_pd(__A, __B, __C), (__v2df)__C);
906}
907
908static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
909_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
910 return (__m128d)__builtin_ia32_selectpd_128(
911 (__mmask8)__U, (__v2df)_mm_fmsub_pd(__A, __B, __C),
912 (__v2df)_mm_setzero_pd());
913}
914
915static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
916_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
917 return (__m128d)__builtin_ia32_selectpd_128(
918 (__mmask8)__U, (__v2df)_mm_fnmadd_pd(__A, __B, __C), (__v2df)__A);
919}
920
921static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
922_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
923 return (__m128d)__builtin_ia32_selectpd_128(
924 (__mmask8)__U, (__v2df)_mm_fnmadd_pd(__A, __B, __C), (__v2df)__C);
925}
926
927static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
928_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
929 return (__m128d)__builtin_ia32_selectpd_128(
930 (__mmask8)__U, (__v2df)_mm_fnmadd_pd(__A, __B, __C),
931 (__v2df)_mm_setzero_pd());
932}
933
934static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
935_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
936 return (__m128d)__builtin_ia32_selectpd_128(
937 (__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C), (__v2df)__A);
938}
939
940static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
941_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
942 return (__m128d)__builtin_ia32_selectpd_128(
943 (__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C), (__v2df)__C);
944}
945
946static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
947_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
948 return (__m128d)__builtin_ia32_selectpd_128(
949 (__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C),
950 (__v2df)_mm_setzero_pd());
951}
952
953static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
954_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
955 return (__m256d)__builtin_ia32_selectpd_256(
956 (__mmask8)__U, (__v4df)_mm256_fmadd_pd(__A, __B, __C), (__v4df)__A);
957}
958
959static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
960_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
961 return (__m256d)__builtin_ia32_selectpd_256(
962 (__mmask8)__U, (__v4df)_mm256_fmadd_pd(__A, __B, __C), (__v4df)__C);
963}
964
965static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
966_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
967 return (__m256d)__builtin_ia32_selectpd_256(
968 (__mmask8)__U, (__v4df)_mm256_fmadd_pd(__A, __B, __C),
969 (__v4df)_mm256_setzero_pd());
970}
971
972static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
973_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
974 return (__m256d)__builtin_ia32_selectpd_256(
975 (__mmask8)__U, (__v4df)_mm256_fmsub_pd(__A, __B, __C), (__v4df)__A);
976}
977
978static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
979_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
980 return (__m256d)__builtin_ia32_selectpd_256(
981 (__mmask8)__U, (__v4df)_mm256_fmsub_pd(__A, __B, __C), (__v4df)__C);
982}
983
984static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
985_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
986 return (__m256d)__builtin_ia32_selectpd_256(
987 (__mmask8)__U, (__v4df)_mm256_fmsub_pd(__A, __B, __C),
988 (__v4df)_mm256_setzero_pd());
989}
990
991static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
992_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
993 return (__m256d)__builtin_ia32_selectpd_256(
994 (__mmask8)__U, (__v4df)_mm256_fnmadd_pd(__A, __B, __C), (__v4df)__A);
995}
996
997static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
998_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
999 return (__m256d)__builtin_ia32_selectpd_256(
1000 (__mmask8)__U, (__v4df)_mm256_fnmadd_pd(__A, __B, __C), (__v4df)__C);
1001}
1002
1003static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1004_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
1005 return (__m256d)__builtin_ia32_selectpd_256(
1006 (__mmask8)__U, (__v4df)_mm256_fnmadd_pd(__A, __B, __C),
1007 (__v4df)_mm256_setzero_pd());
1008}
1009
1010static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1011_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
1012 return (__m256d)__builtin_ia32_selectpd_256(
1013 (__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C), (__v4df)__A);
1014}
1015
1016static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1017_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
1018 return (__m256d)__builtin_ia32_selectpd_256(
1019 (__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C), (__v4df)__C);
1020}
1021
1022static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1023_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
1024 return (__m256d)__builtin_ia32_selectpd_256(
1025 (__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C),
1026 (__v4df)_mm256_setzero_pd());
1027}
1028
1029static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1030_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
1031 return (__m128)__builtin_ia32_selectps_128(
1032 (__mmask8)__U, (__v4sf)_mm_fmadd_ps(__A, __B, __C), (__v4sf)__A);
1033}
1034
1035static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1036_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
1037 return (__m128)__builtin_ia32_selectps_128(
1038 (__mmask8)__U, (__v4sf)_mm_fmadd_ps(__A, __B, __C), (__v4sf)__C);
1039}
1040
1041static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1042_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
1043 return (__m128)__builtin_ia32_selectps_128(
1044 (__mmask8)__U, (__v4sf)_mm_fmadd_ps(__A, __B, __C),
1045 (__v4sf)_mm_setzero_ps());
1046}
1047
1048static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1049_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
1050 return (__m128)__builtin_ia32_selectps_128(
1051 (__mmask8)__U, (__v4sf)_mm_fmsub_ps(__A, __B, __C), (__v4sf)__A);
1052}
1053
1054static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1055_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
1056 return (__m128)__builtin_ia32_selectps_128(
1057 (__mmask8)__U, (__v4sf)_mm_fmsub_ps(__A, __B, __C), (__v4sf)__C);
1058}
1059
1060static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1061_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
1062 return (__m128)__builtin_ia32_selectps_128(
1063 (__mmask8)__U, (__v4sf)_mm_fmsub_ps(__A, __B, __C),
1064 (__v4sf)_mm_setzero_ps());
1065}
1066
1067static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1068_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
1069 return (__m128)__builtin_ia32_selectps_128(
1070 (__mmask8)__U, (__v4sf)_mm_fnmadd_ps(__A, __B, __C), (__v4sf)__A);
1071}
1072
1073static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1074_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
1075 return (__m128)__builtin_ia32_selectps_128(
1076 (__mmask8)__U, (__v4sf)_mm_fnmadd_ps(__A, __B, __C), (__v4sf)__C);
1077}
1078
1079static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1080_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
1081 return (__m128)__builtin_ia32_selectps_128(
1082 (__mmask8)__U, (__v4sf)_mm_fnmadd_ps(__A, __B, __C),
1083 (__v4sf)_mm_setzero_ps());
1084}
1085
1086static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1087_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
1088 return (__m128)__builtin_ia32_selectps_128(
1089 (__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C), (__v4sf)__A);
1090}
1091
1092static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1093_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
1094 return (__m128)__builtin_ia32_selectps_128(
1095 (__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C), (__v4sf)__C);
1096}
1097
1098static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1099_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
1100 return (__m128)__builtin_ia32_selectps_128(
1101 (__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C),
1102 (__v4sf)_mm_setzero_ps());
1103}
1104
1105static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1106_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
1107 return (__m256)__builtin_ia32_selectps_256(
1108 (__mmask8)__U, (__v8sf)_mm256_fmadd_ps(__A, __B, __C), (__v8sf)__A);
1109}
1110
1111static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1112_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
1113 return (__m256)__builtin_ia32_selectps_256(
1114 (__mmask8)__U, (__v8sf)_mm256_fmadd_ps(__A, __B, __C), (__v8sf)__C);
1115}
1116
1117static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1118_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
1119 return (__m256)__builtin_ia32_selectps_256(
1120 (__mmask8)__U, (__v8sf)_mm256_fmadd_ps(__A, __B, __C),
1121 (__v8sf)_mm256_setzero_ps());
1122}
1123
1124static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1125_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
1126 return (__m256)__builtin_ia32_selectps_256(
1127 (__mmask8)__U, (__v8sf)_mm256_fmsub_ps(__A, __B, __C), (__v8sf)__A);
1128}
1129
1130static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1131_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
1132 return (__m256)__builtin_ia32_selectps_256(
1133 (__mmask8)__U, (__v8sf)_mm256_fmsub_ps(__A, __B, __C), (__v8sf)__C);
1134}
1135
1136static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1137_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
1138 return (__m256)__builtin_ia32_selectps_256(
1139 (__mmask8)__U, (__v8sf)_mm256_fmsub_ps(__A, __B, __C),
1140 (__v8sf)_mm256_setzero_ps());
1141}
1142
1143static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1144_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
1145 return (__m256)__builtin_ia32_selectps_256(
1146 (__mmask8)__U, (__v8sf)_mm256_fnmadd_ps(__A, __B, __C), (__v8sf)__A);
1147}
1148
1149static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1150_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
1151 return (__m256)__builtin_ia32_selectps_256(
1152 (__mmask8)__U, (__v8sf)_mm256_fnmadd_ps(__A, __B, __C), (__v8sf)__C);
1153}
1154
1155static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1156_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
1157 return (__m256)__builtin_ia32_selectps_256(
1158 (__mmask8)__U, (__v8sf)_mm256_fnmadd_ps(__A, __B, __C),
1159 (__v8sf)_mm256_setzero_ps());
1160}
1161
1162static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1163_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
1164 return (__m256)__builtin_ia32_selectps_256(
1165 (__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C), (__v8sf)__A);
1166}
1167
1168static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1169_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
1170 return (__m256)__builtin_ia32_selectps_256(
1171 (__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C), (__v8sf)__C);
1172}
1173
1174static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1175_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
1176 return (__m256)__builtin_ia32_selectps_256(
1177 (__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C),
1178 (__v8sf)_mm256_setzero_ps());
1179}
1180
1181static __inline__ __m128d __DEFAULT_FN_ATTRS128
1182_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
1183 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1184 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1185 (__v2df) __B,
1186 (__v2df) __C),
1187 (__v2df) __A);
1188}
1189
1190static __inline__ __m128d __DEFAULT_FN_ATTRS128
1191_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1192{
1193 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1194 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1195 (__v2df) __B,
1196 (__v2df) __C),
1197 (__v2df) __C);
1198}
1199
1200static __inline__ __m128d __DEFAULT_FN_ATTRS128
1201_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1202{
1203 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1204 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1205 (__v2df) __B,
1206 (__v2df) __C),
1207 (__v2df)_mm_setzero_pd());
1208}
1209
1210static __inline__ __m128d __DEFAULT_FN_ATTRS128
1211_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1212{
1213 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1214 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1215 (__v2df) __B,
1216 -(__v2df) __C),
1217 (__v2df) __A);
1218}
1219
1220static __inline__ __m128d __DEFAULT_FN_ATTRS128
1221_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1222{
1223 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1224 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1225 (__v2df) __B,
1226 -(__v2df) __C),
1227 (__v2df)_mm_setzero_pd());
1228}
1229
1230static __inline__ __m256d __DEFAULT_FN_ATTRS256
1231_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1232{
1233 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1234 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1235 (__v4df) __B,
1236 (__v4df) __C),
1237 (__v4df) __A);
1238}
1239
1240static __inline__ __m256d __DEFAULT_FN_ATTRS256
1241_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1242{
1243 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1244 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1245 (__v4df) __B,
1246 (__v4df) __C),
1247 (__v4df) __C);
1248}
1249
1250static __inline__ __m256d __DEFAULT_FN_ATTRS256
1251_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1252{
1253 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1254 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1255 (__v4df) __B,
1256 (__v4df) __C),
1257 (__v4df)_mm256_setzero_pd());
1258}
1259
1260static __inline__ __m256d __DEFAULT_FN_ATTRS256
1261_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1262{
1263 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1264 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1265 (__v4df) __B,
1266 -(__v4df) __C),
1267 (__v4df) __A);
1268}
1269
1270static __inline__ __m256d __DEFAULT_FN_ATTRS256
1271_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1272{
1273 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1274 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1275 (__v4df) __B,
1276 -(__v4df) __C),
1277 (__v4df)_mm256_setzero_pd());
1278}
1279
1280static __inline__ __m128 __DEFAULT_FN_ATTRS128
1281_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1282{
1283 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1284 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1285 (__v4sf) __B,
1286 (__v4sf) __C),
1287 (__v4sf) __A);
1288}
1289
1290static __inline__ __m128 __DEFAULT_FN_ATTRS128
1291_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1292{
1293 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1294 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1295 (__v4sf) __B,
1296 (__v4sf) __C),
1297 (__v4sf) __C);
1298}
1299
1300static __inline__ __m128 __DEFAULT_FN_ATTRS128
1301_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1302{
1303 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1304 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1305 (__v4sf) __B,
1306 (__v4sf) __C),
1307 (__v4sf)_mm_setzero_ps());
1308}
1309
1310static __inline__ __m128 __DEFAULT_FN_ATTRS128
1311_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1312{
1313 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1314 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1315 (__v4sf) __B,
1316 -(__v4sf) __C),
1317 (__v4sf) __A);
1318}
1319
1320static __inline__ __m128 __DEFAULT_FN_ATTRS128
1321_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1322{
1323 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1324 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1325 (__v4sf) __B,
1326 -(__v4sf) __C),
1327 (__v4sf)_mm_setzero_ps());
1328}
1329
1330static __inline__ __m256 __DEFAULT_FN_ATTRS256
1331_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
1332 __m256 __C)
1333{
1334 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1335 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1336 (__v8sf) __B,
1337 (__v8sf) __C),
1338 (__v8sf) __A);
1339}
1340
1341static __inline__ __m256 __DEFAULT_FN_ATTRS256
1342_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1343{
1344 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1345 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1346 (__v8sf) __B,
1347 (__v8sf) __C),
1348 (__v8sf) __C);
1349}
1350
1351static __inline__ __m256 __DEFAULT_FN_ATTRS256
1352_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1353{
1354 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1355 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1356 (__v8sf) __B,
1357 (__v8sf) __C),
1358 (__v8sf)_mm256_setzero_ps());
1359}
1360
1361static __inline__ __m256 __DEFAULT_FN_ATTRS256
1362_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1363{
1364 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1365 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1366 (__v8sf) __B,
1367 -(__v8sf) __C),
1368 (__v8sf) __A);
1369}
1370
1371static __inline__ __m256 __DEFAULT_FN_ATTRS256
1372_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1373{
1374 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1375 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1376 (__v8sf) __B,
1377 -(__v8sf) __C),
1378 (__v8sf)_mm256_setzero_ps());
1379}
1380
1381static __inline__ __m128d __DEFAULT_FN_ATTRS128
1382_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1383{
1384 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1385 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1386 (__v2df) __B,
1387 -(__v2df) __C),
1388 (__v2df) __C);
1389}
1390
1391static __inline__ __m256d __DEFAULT_FN_ATTRS256
1392_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1393{
1394 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1395 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1396 (__v4df) __B,
1397 -(__v4df) __C),
1398 (__v4df) __C);
1399}
1400
1401static __inline__ __m128 __DEFAULT_FN_ATTRS128
1402_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1403{
1404 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1405 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1406 (__v4sf) __B,
1407 -(__v4sf) __C),
1408 (__v4sf) __C);
1409}
1410
1411static __inline__ __m256 __DEFAULT_FN_ATTRS256
1412_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1413{
1414 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1415 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1416 (__v8sf) __B,
1417 -(__v8sf) __C),
1418 (__v8sf) __C);
1419}
1420
1421static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1422_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1423 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1424 (__v2df)_mm_add_pd(__A, __B),
1425 (__v2df)__W);
1426}
1427
1428static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1429_mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
1430 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1431 (__v2df)_mm_add_pd(__A, __B),
1432 (__v2df)_mm_setzero_pd());
1433}
1434
1435static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1436_mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
1437 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1438 (__v4df)_mm256_add_pd(__A, __B),
1439 (__v4df)__W);
1440}
1441
1442static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1443_mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
1444 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1445 (__v4df)_mm256_add_pd(__A, __B),
1446 (__v4df)_mm256_setzero_pd());
1447}
1448
1449static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1450_mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1451 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1452 (__v4sf)_mm_add_ps(__A, __B),
1453 (__v4sf)__W);
1454}
1455
1456static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1457_mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
1458 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1459 (__v4sf)_mm_add_ps(__A, __B),
1460 (__v4sf)_mm_setzero_ps());
1461}
1462
1463static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1464_mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
1465 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1466 (__v8sf)_mm256_add_ps(__A, __B),
1467 (__v8sf)__W);
1468}
1469
1470static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1471_mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
1472 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1473 (__v8sf)_mm256_add_ps(__A, __B),
1474 (__v8sf)_mm256_setzero_ps());
1475}
1476
1477static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
1478_mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W) {
1479 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
1480 (__v4si) __W,
1481 (__v4si) __A);
1482}
1483
1484static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
1485_mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W) {
1486 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
1487 (__v8si) __W,
1488 (__v8si) __A);
1489}
1490
1491static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1492_mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W) {
1493 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
1494 (__v2df) __W,
1495 (__v2df) __A);
1496}
1497
1498static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1499_mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W) {
1500 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
1501 (__v4df) __W,
1502 (__v4df) __A);
1503}
1504
1505static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1506_mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W) {
1507 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
1508 (__v4sf) __W,
1509 (__v4sf) __A);
1510}
1511
1512static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1513_mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W) {
1514 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
1515 (__v8sf) __W,
1516 (__v8sf) __A);
1517}
1518
1519static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
1520_mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W) {
1521 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
1522 (__v2di) __W,
1523 (__v2di) __A);
1524}
1525
1526static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
1527_mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) {
1528 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
1529 (__v4di) __W,
1530 (__v4di) __A);
1531}
1532
1533static __inline__ __m128d __DEFAULT_FN_ATTRS128
1534_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
1535 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1536 (__v2df) __W,
1537 (__mmask8) __U);
1538}
1539
1540static __inline__ __m128d __DEFAULT_FN_ATTRS128
1542 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1543 (__v2df)
1544 _mm_setzero_pd (),
1545 (__mmask8) __U);
1546}
1547
1548static __inline__ __m256d __DEFAULT_FN_ATTRS256
1549_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
1550 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1551 (__v4df) __W,
1552 (__mmask8) __U);
1553}
1554
1555static __inline__ __m256d __DEFAULT_FN_ATTRS256
1557 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1558 (__v4df)
1560 (__mmask8) __U);
1561}
1562
1563static __inline__ __m128i __DEFAULT_FN_ATTRS128
1564_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
1565 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1566 (__v2di) __W,
1567 (__mmask8) __U);
1568}
1569
1570static __inline__ __m128i __DEFAULT_FN_ATTRS128
1572 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1573 (__v2di)
1575 (__mmask8) __U);
1576}
1577
1578static __inline__ __m256i __DEFAULT_FN_ATTRS256
1579_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
1580 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1581 (__v4di) __W,
1582 (__mmask8) __U);
1583}
1584
1585static __inline__ __m256i __DEFAULT_FN_ATTRS256
1587 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1588 (__v4di)
1590 (__mmask8) __U);
1591}
1592
1593static __inline__ __m128 __DEFAULT_FN_ATTRS128
1594_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
1595 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1596 (__v4sf) __W,
1597 (__mmask8) __U);
1598}
1599
1600static __inline__ __m128 __DEFAULT_FN_ATTRS128
1602 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1603 (__v4sf)
1604 _mm_setzero_ps (),
1605 (__mmask8) __U);
1606}
1607
1608static __inline__ __m256 __DEFAULT_FN_ATTRS256
1609_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
1610 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1611 (__v8sf) __W,
1612 (__mmask8) __U);
1613}
1614
1615static __inline__ __m256 __DEFAULT_FN_ATTRS256
1617 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1618 (__v8sf)
1620 (__mmask8) __U);
1621}
1622
1623static __inline__ __m128i __DEFAULT_FN_ATTRS128
1624_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
1625 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1626 (__v4si) __W,
1627 (__mmask8) __U);
1628}
1629
1630static __inline__ __m128i __DEFAULT_FN_ATTRS128
1632 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1633 (__v4si)
1635 (__mmask8) __U);
1636}
1637
1638static __inline__ __m256i __DEFAULT_FN_ATTRS256
1639_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
1640 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1641 (__v8si) __W,
1642 (__mmask8) __U);
1643}
1644
1645static __inline__ __m256i __DEFAULT_FN_ATTRS256
1647 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1648 (__v8si)
1650 (__mmask8) __U);
1651}
1652
1653static __inline__ void __DEFAULT_FN_ATTRS128
1654_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
1655 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
1656 (__v2df) __A,
1657 (__mmask8) __U);
1658}
1659
1660static __inline__ void __DEFAULT_FN_ATTRS256
1661_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
1662 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
1663 (__v4df) __A,
1664 (__mmask8) __U);
1665}
1666
1667static __inline__ void __DEFAULT_FN_ATTRS128
1668_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
1669 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
1670 (__v2di) __A,
1671 (__mmask8) __U);
1672}
1673
1674static __inline__ void __DEFAULT_FN_ATTRS256
1676 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
1677 (__v4di) __A,
1678 (__mmask8) __U);
1679}
1680
1681static __inline__ void __DEFAULT_FN_ATTRS128
1682_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
1683 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
1684 (__v4sf) __A,
1685 (__mmask8) __U);
1686}
1687
1688static __inline__ void __DEFAULT_FN_ATTRS256
1690 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
1691 (__v8sf) __A,
1692 (__mmask8) __U);
1693}
1694
1695static __inline__ void __DEFAULT_FN_ATTRS128
1696_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
1697 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
1698 (__v4si) __A,
1699 (__mmask8) __U);
1700}
1701
1702static __inline__ void __DEFAULT_FN_ATTRS256
1704 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
1705 (__v8si) __A,
1706 (__mmask8) __U);
1707}
1708
1709static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1710_mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A) {
1711 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1712 (__v2df)_mm_cvtepi32_pd(__A),
1713 (__v2df)__W);
1714}
1715
1716static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1718 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1719 (__v2df)_mm_cvtepi32_pd(__A),
1720 (__v2df)_mm_setzero_pd());
1721}
1722
1723static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1724_mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A) {
1725 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1726 (__v4df)_mm256_cvtepi32_pd(__A),
1727 (__v4df)__W);
1728}
1729
1730static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1732 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1733 (__v4df)_mm256_cvtepi32_pd(__A),
1734 (__v4df)_mm256_setzero_pd());
1735}
1736
1737static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1738_mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A) {
1739 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1740 (__v4sf)_mm_cvtepi32_ps(__A),
1741 (__v4sf)__W);
1742}
1743
1744static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1746 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1747 (__v4sf)_mm_cvtepi32_ps(__A),
1748 (__v4sf)_mm_setzero_ps());
1749}
1750
1751static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1752_mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A) {
1753 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1754 (__v8sf)_mm256_cvtepi32_ps(__A),
1755 (__v8sf)__W);
1756}
1757
1758static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1760 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1761 (__v8sf)_mm256_cvtepi32_ps(__A),
1762 (__v8sf)_mm256_setzero_ps());
1763}
1764
1765static __inline__ __m128i __DEFAULT_FN_ATTRS128
1766_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
1767 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1768 (__v4si) __W,
1769 (__mmask8) __U);
1770}
1771
1772static __inline__ __m128i __DEFAULT_FN_ATTRS128
1774 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1775 (__v4si)
1777 (__mmask8) __U);
1778}
1779
1780static __inline__ __m128i __DEFAULT_FN_ATTRS256
1781_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
1782 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1783 (__v4si)_mm256_cvtpd_epi32(__A),
1784 (__v4si)__W);
1785}
1786
1787static __inline__ __m128i __DEFAULT_FN_ATTRS256
1789 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1790 (__v4si)_mm256_cvtpd_epi32(__A),
1791 (__v4si)_mm_setzero_si128());
1792}
1793
1794static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1795_mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A) {
1796 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1797 (__v4sf) __W,
1798 (__mmask8) __U);
1799}
1800
1801static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1802_mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A) {
1803 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1804 (__v4sf)
1805 _mm_setzero_ps (),
1806 (__mmask8) __U);
1807}
1808
1809static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
1810_mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A) {
1811 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1812 (__v4sf)_mm256_cvtpd_ps(__A),
1813 (__v4sf)__W);
1814}
1815
1816static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
1818 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1819 (__v4sf)_mm256_cvtpd_ps(__A),
1820 (__v4sf)_mm_setzero_ps());
1821}
1822
1823static __inline__ __m128i __DEFAULT_FN_ATTRS128
1824_mm_cvtpd_epu32 (__m128d __A) {
1825 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1826 (__v4si)
1828 (__mmask8) -1);
1829}
1830
1831static __inline__ __m128i __DEFAULT_FN_ATTRS128
1832_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
1833 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1834 (__v4si) __W,
1835 (__mmask8) __U);
1836}
1837
1838static __inline__ __m128i __DEFAULT_FN_ATTRS128
1840 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1841 (__v4si)
1843 (__mmask8) __U);
1844}
1845
1846static __inline__ __m128i __DEFAULT_FN_ATTRS256
1847_mm256_cvtpd_epu32 (__m256d __A) {
1848 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1849 (__v4si)
1851 (__mmask8) -1);
1852}
1853
1854static __inline__ __m128i __DEFAULT_FN_ATTRS256
1855_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
1856 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1857 (__v4si) __W,
1858 (__mmask8) __U);
1859}
1860
1861static __inline__ __m128i __DEFAULT_FN_ATTRS256
1863 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1864 (__v4si)
1866 (__mmask8) __U);
1867}
1868
1869static __inline__ __m128i __DEFAULT_FN_ATTRS128
1870_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
1871 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1872 (__v4si)_mm_cvtps_epi32(__A),
1873 (__v4si)__W);
1874}
1875
1876static __inline__ __m128i __DEFAULT_FN_ATTRS128
1878 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1879 (__v4si)_mm_cvtps_epi32(__A),
1880 (__v4si)_mm_setzero_si128());
1881}
1882
1883static __inline__ __m256i __DEFAULT_FN_ATTRS256
1884_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
1885 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1886 (__v8si)_mm256_cvtps_epi32(__A),
1887 (__v8si)__W);
1888}
1889
1890static __inline__ __m256i __DEFAULT_FN_ATTRS256
1892 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1893 (__v8si)_mm256_cvtps_epi32(__A),
1894 (__v8si)_mm256_setzero_si256());
1895}
1896
1897static __inline__ __m128d __DEFAULT_FN_ATTRS128
1898_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
1899 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1900 (__v2df)_mm_cvtps_pd(__A),
1901 (__v2df)__W);
1902}
1903
1904static __inline__ __m128d __DEFAULT_FN_ATTRS128
1905_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
1906 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1907 (__v2df)_mm_cvtps_pd(__A),
1908 (__v2df)_mm_setzero_pd());
1909}
1910
1911static __inline__ __m256d __DEFAULT_FN_ATTRS256
1912_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
1913 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1914 (__v4df)_mm256_cvtps_pd(__A),
1915 (__v4df)__W);
1916}
1917
1918static __inline__ __m256d __DEFAULT_FN_ATTRS256
1920 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1921 (__v4df)_mm256_cvtps_pd(__A),
1922 (__v4df)_mm256_setzero_pd());
1923}
1924
1925static __inline__ __m128i __DEFAULT_FN_ATTRS128
1926_mm_cvtps_epu32 (__m128 __A) {
1927 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
1928 (__v4si)
1930 (__mmask8) -1);
1931}
1932
1933static __inline__ __m128i __DEFAULT_FN_ATTRS128
1934_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
1935 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
1936 (__v4si) __W,
1937 (__mmask8) __U);
1938}
1939
1940static __inline__ __m128i __DEFAULT_FN_ATTRS128
1942 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
1943 (__v4si)
1945 (__mmask8) __U);
1946}
1947
1948static __inline__ __m256i __DEFAULT_FN_ATTRS256
1950 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
1951 (__v8si)
1953 (__mmask8) -1);
1954}
1955
1956static __inline__ __m256i __DEFAULT_FN_ATTRS256
1957_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
1958 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
1959 (__v8si) __W,
1960 (__mmask8) __U);
1961}
1962
1963static __inline__ __m256i __DEFAULT_FN_ATTRS256
1965 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
1966 (__v8si)
1968 (__mmask8) __U);
1969}
1970
1971static __inline__ __m128i __DEFAULT_FN_ATTRS128
1972_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
1973 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1974 (__v4si) __W,
1975 (__mmask8) __U);
1976}
1977
1978static __inline__ __m128i __DEFAULT_FN_ATTRS128
1980 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1981 (__v4si)
1983 (__mmask8) __U);
1984}
1985
1986static __inline__ __m128i __DEFAULT_FN_ATTRS256
1987_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
1988 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1989 (__v4si)_mm256_cvttpd_epi32(__A),
1990 (__v4si)__W);
1991}
1992
1993static __inline__ __m128i __DEFAULT_FN_ATTRS256
1995 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1996 (__v4si)_mm256_cvttpd_epi32(__A),
1997 (__v4si)_mm_setzero_si128());
1998}
1999
2000static __inline__ __m128i __DEFAULT_FN_ATTRS128
2001_mm_cvttpd_epu32 (__m128d __A) {
2002 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2003 (__v4si)
2005 (__mmask8) -1);
2006}
2007
2008static __inline__ __m128i __DEFAULT_FN_ATTRS128
2009_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2010 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2011 (__v4si) __W,
2012 (__mmask8) __U);
2013}
2014
2015static __inline__ __m128i __DEFAULT_FN_ATTRS128
2017 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2018 (__v4si)
2020 (__mmask8) __U);
2021}
2022
2023static __inline__ __m128i __DEFAULT_FN_ATTRS256
2024_mm256_cvttpd_epu32 (__m256d __A) {
2025 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2026 (__v4si)
2028 (__mmask8) -1);
2029}
2030
2031static __inline__ __m128i __DEFAULT_FN_ATTRS256
2032_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2033 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2034 (__v4si) __W,
2035 (__mmask8) __U);
2036}
2037
2038static __inline__ __m128i __DEFAULT_FN_ATTRS256
2040 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2041 (__v4si)
2043 (__mmask8) __U);
2044}
2045
2046static __inline__ __m128i __DEFAULT_FN_ATTRS128
2047_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2048 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2049 (__v4si)_mm_cvttps_epi32(__A),
2050 (__v4si)__W);
2051}
2052
2053static __inline__ __m128i __DEFAULT_FN_ATTRS128
2055 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2056 (__v4si)_mm_cvttps_epi32(__A),
2057 (__v4si)_mm_setzero_si128());
2058}
2059
2060static __inline__ __m256i __DEFAULT_FN_ATTRS256
2061_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2062 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2063 (__v8si)_mm256_cvttps_epi32(__A),
2064 (__v8si)__W);
2065}
2066
2067static __inline__ __m256i __DEFAULT_FN_ATTRS256
2069 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2070 (__v8si)_mm256_cvttps_epi32(__A),
2071 (__v8si)_mm256_setzero_si256());
2072}
2073
2074static __inline__ __m128i __DEFAULT_FN_ATTRS128
2075_mm_cvttps_epu32 (__m128 __A) {
2076 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2077 (__v4si)
2079 (__mmask8) -1);
2080}
2081
2082static __inline__ __m128i __DEFAULT_FN_ATTRS128
2083_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2084 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2085 (__v4si) __W,
2086 (__mmask8) __U);
2087}
2088
2089static __inline__ __m128i __DEFAULT_FN_ATTRS128
2091 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2092 (__v4si)
2094 (__mmask8) __U);
2095}
2096
2097static __inline__ __m256i __DEFAULT_FN_ATTRS256
2099 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2100 (__v8si)
2102 (__mmask8) -1);
2103}
2104
2105static __inline__ __m256i __DEFAULT_FN_ATTRS256
2106_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2107 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2108 (__v8si) __W,
2109 (__mmask8) __U);
2110}
2111
2112static __inline__ __m256i __DEFAULT_FN_ATTRS256
2114 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2115 (__v8si)
2117 (__mmask8) __U);
2118}
2119
2120static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2121_mm_cvtepu32_pd(__m128i __A) {
2122 return (__m128d) __builtin_convertvector(
2123 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2124}
2125
2126static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2127_mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A) {
2128 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2129 (__v2df)_mm_cvtepu32_pd(__A),
2130 (__v2df)__W);
2131}
2132
2133static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2135 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2136 (__v2df)_mm_cvtepu32_pd(__A),
2137 (__v2df)_mm_setzero_pd());
2138}
2139
2140static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
2142 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2143}
2144
2145static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
2146_mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A) {
2147 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2148 (__v4df)_mm256_cvtepu32_pd(__A),
2149 (__v4df)__W);
2150}
2151
2152static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
2154 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2155 (__v4df)_mm256_cvtepu32_pd(__A),
2156 (__v4df)_mm256_setzero_pd());
2157}
2158
2159static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2160_mm_cvtepu32_ps(__m128i __A) {
2161 return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
2162}
2163
2164static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2165_mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A) {
2166 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2167 (__v4sf)_mm_cvtepu32_ps(__A),
2168 (__v4sf)__W);
2169}
2170
2171static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2173 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2174 (__v4sf)_mm_cvtepu32_ps(__A),
2175 (__v4sf)_mm_setzero_ps());
2176}
2177
2178static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
2180 return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
2181}
2182
2183static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
2184_mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A) {
2185 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2186 (__v8sf)_mm256_cvtepu32_ps(__A),
2187 (__v8sf)__W);
2188}
2189
2190static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
2192 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2193 (__v8sf)_mm256_cvtepu32_ps(__A),
2194 (__v8sf)_mm256_setzero_ps());
2195}
2196
2197static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2198_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2199 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2200 (__v2df)_mm_div_pd(__A, __B),
2201 (__v2df)__W);
2202}
2203
2204static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2205_mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2206 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2207 (__v2df)_mm_div_pd(__A, __B),
2208 (__v2df)_mm_setzero_pd());
2209}
2210
2211static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
2212_mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2213 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2214 (__v4df)_mm256_div_pd(__A, __B),
2215 (__v4df)__W);
2216}
2217
2218static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
2219_mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2220 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2221 (__v4df)_mm256_div_pd(__A, __B),
2222 (__v4df)_mm256_setzero_pd());
2223}
2224
2225static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2226_mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2227 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2228 (__v4sf)_mm_div_ps(__A, __B),
2229 (__v4sf)__W);
2230}
2231
2232static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2233_mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2234 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2235 (__v4sf)_mm_div_ps(__A, __B),
2236 (__v4sf)_mm_setzero_ps());
2237}
2238
2239static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
2240_mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2241 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2242 (__v8sf)_mm256_div_ps(__A, __B),
2243 (__v8sf)__W);
2244}
2245
2246static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
2247_mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2248 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2249 (__v8sf)_mm256_div_ps(__A, __B),
2250 (__v8sf)_mm256_setzero_ps());
2251}
2252
2253static __inline__ __m128d __DEFAULT_FN_ATTRS128
2254_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2255 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2256 (__v2df) __W,
2257 (__mmask8) __U);
2258}
2259
2260static __inline__ __m128d __DEFAULT_FN_ATTRS128
2261_mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
2262 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2263 (__v2df)
2264 _mm_setzero_pd (),
2265 (__mmask8) __U);
2266}
2267
2268static __inline__ __m256d __DEFAULT_FN_ATTRS256
2269_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2270 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2271 (__v4df) __W,
2272 (__mmask8) __U);
2273}
2274
2275static __inline__ __m256d __DEFAULT_FN_ATTRS256
2277 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2278 (__v4df)
2280 (__mmask8) __U);
2281}
2282
2283static __inline__ __m128i __DEFAULT_FN_ATTRS128
2284_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2285 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2286 (__v2di) __W,
2287 (__mmask8) __U);
2288}
2289
2290static __inline__ __m128i __DEFAULT_FN_ATTRS128
2292 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2293 (__v2di)
2295 (__mmask8) __U);
2296}
2297
2298static __inline__ __m256i __DEFAULT_FN_ATTRS256
2299_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2300 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2301 (__v4di) __W,
2302 (__mmask8) __U);
2303}
2304
2305static __inline__ __m256i __DEFAULT_FN_ATTRS256
2307 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2308 (__v4di)
2310 (__mmask8) __U);
2311}
2312
2313static __inline__ __m128d __DEFAULT_FN_ATTRS128
2314_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
2315 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
2316 (__v2df) __W,
2317 (__mmask8)
2318 __U);
2319}
2320
2321static __inline__ __m128d __DEFAULT_FN_ATTRS128
2323 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
2324 (__v2df)
2325 _mm_setzero_pd (),
2326 (__mmask8)
2327 __U);
2328}
2329
2330static __inline__ __m256d __DEFAULT_FN_ATTRS256
2331_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
2332 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
2333 (__v4df) __W,
2334 (__mmask8)
2335 __U);
2336}
2337
2338static __inline__ __m256d __DEFAULT_FN_ATTRS256
2340 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
2341 (__v4df)
2343 (__mmask8)
2344 __U);
2345}
2346
2347static __inline__ __m128i __DEFAULT_FN_ATTRS128
2348_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
2349 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
2350 (__v2di) __W,
2351 (__mmask8)
2352 __U);
2353}
2354
2355static __inline__ __m128i __DEFAULT_FN_ATTRS128
2357 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
2358 (__v2di)
2360 (__mmask8)
2361 __U);
2362}
2363
2364static __inline__ __m256i __DEFAULT_FN_ATTRS256
2366 void const *__P) {
2367 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
2368 (__v4di) __W,
2369 (__mmask8)
2370 __U);
2371}
2372
2373static __inline__ __m256i __DEFAULT_FN_ATTRS256
2375 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
2376 (__v4di)
2378 (__mmask8)
2379 __U);
2380}
2381
2382static __inline__ __m128 __DEFAULT_FN_ATTRS128
2383_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
2384 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
2385 (__v4sf) __W,
2386 (__mmask8) __U);
2387}
2388
2389static __inline__ __m128 __DEFAULT_FN_ATTRS128
2391 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
2392 (__v4sf)
2393 _mm_setzero_ps (),
2394 (__mmask8)
2395 __U);
2396}
2397
2398static __inline__ __m256 __DEFAULT_FN_ATTRS256
2399_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
2400 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
2401 (__v8sf) __W,
2402 (__mmask8) __U);
2403}
2404
2405static __inline__ __m256 __DEFAULT_FN_ATTRS256
2407 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
2408 (__v8sf)
2410 (__mmask8)
2411 __U);
2412}
2413
2414static __inline__ __m128i __DEFAULT_FN_ATTRS128
2415_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
2416 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
2417 (__v4si) __W,
2418 (__mmask8)
2419 __U);
2420}
2421
2422static __inline__ __m128i __DEFAULT_FN_ATTRS128
2424 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
2425 (__v4si)
2427 (__mmask8) __U);
2428}
2429
2430static __inline__ __m256i __DEFAULT_FN_ATTRS256
2432 void const *__P) {
2433 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
2434 (__v8si) __W,
2435 (__mmask8)
2436 __U);
2437}
2438
2439static __inline__ __m256i __DEFAULT_FN_ATTRS256
2441 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
2442 (__v8si)
2444 (__mmask8)
2445 __U);
2446}
2447
2448static __inline__ __m128 __DEFAULT_FN_ATTRS128
2449_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2450 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2451 (__v4sf) __W,
2452 (__mmask8) __U);
2453}
2454
2455static __inline__ __m128 __DEFAULT_FN_ATTRS128
2457 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2458 (__v4sf)
2459 _mm_setzero_ps (),
2460 (__mmask8) __U);
2461}
2462
2463static __inline__ __m256 __DEFAULT_FN_ATTRS256
2464_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2465 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2466 (__v8sf) __W,
2467 (__mmask8) __U);
2468}
2469
2470static __inline__ __m256 __DEFAULT_FN_ATTRS256
2472 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2473 (__v8sf)
2475 (__mmask8) __U);
2476}
2477
2478static __inline__ __m128i __DEFAULT_FN_ATTRS128
2479_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2480 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2481 (__v4si) __W,
2482 (__mmask8) __U);
2483}
2484
2485static __inline__ __m128i __DEFAULT_FN_ATTRS128
2487 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2488 (__v4si)
2490 (__mmask8) __U);
2491}
2492
2493static __inline__ __m256i __DEFAULT_FN_ATTRS256
2494_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2495 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2496 (__v8si) __W,
2497 (__mmask8) __U);
2498}
2499
2500static __inline__ __m256i __DEFAULT_FN_ATTRS256
2502 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2503 (__v8si)
2505 (__mmask8) __U);
2506}
2507
2508static __inline__ __m128d __DEFAULT_FN_ATTRS128
2509_mm_getexp_pd (__m128d __A) {
2510 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2511 (__v2df)
2512 _mm_setzero_pd (),
2513 (__mmask8) -1);
2514}
2515
2516static __inline__ __m128d __DEFAULT_FN_ATTRS128
2517_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2518 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2519 (__v2df) __W,
2520 (__mmask8) __U);
2521}
2522
2523static __inline__ __m128d __DEFAULT_FN_ATTRS128
2524_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
2525 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2526 (__v2df)
2527 _mm_setzero_pd (),
2528 (__mmask8) __U);
2529}
2530
2531static __inline__ __m256d __DEFAULT_FN_ATTRS256
2532_mm256_getexp_pd (__m256d __A) {
2533 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2534 (__v4df)
2536 (__mmask8) -1);
2537}
2538
2539static __inline__ __m256d __DEFAULT_FN_ATTRS256
2540_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2541 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2542 (__v4df) __W,
2543 (__mmask8) __U);
2544}
2545
2546static __inline__ __m256d __DEFAULT_FN_ATTRS256
2548 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2549 (__v4df)
2551 (__mmask8) __U);
2552}
2553
2554static __inline__ __m128 __DEFAULT_FN_ATTRS128
2555_mm_getexp_ps (__m128 __A) {
2556 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2557 (__v4sf)
2558 _mm_setzero_ps (),
2559 (__mmask8) -1);
2560}
2561
2562static __inline__ __m128 __DEFAULT_FN_ATTRS128
2563_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2564 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2565 (__v4sf) __W,
2566 (__mmask8) __U);
2567}
2568
2569static __inline__ __m128 __DEFAULT_FN_ATTRS128
2571 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2572 (__v4sf)
2573 _mm_setzero_ps (),
2574 (__mmask8) __U);
2575}
2576
2577static __inline__ __m256 __DEFAULT_FN_ATTRS256
2578_mm256_getexp_ps (__m256 __A) {
2579 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2580 (__v8sf)
2582 (__mmask8) -1);
2583}
2584
2585static __inline__ __m256 __DEFAULT_FN_ATTRS256
2586_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2587 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2588 (__v8sf) __W,
2589 (__mmask8) __U);
2590}
2591
2592static __inline__ __m256 __DEFAULT_FN_ATTRS256
2594 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2595 (__v8sf)
2597 (__mmask8) __U);
2598}
2599
2600static __inline__ __m128d __DEFAULT_FN_ATTRS128
2601_mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2602 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2603 (__v2df)_mm_max_pd(__A, __B),
2604 (__v2df)__W);
2605}
2606
2607static __inline__ __m128d __DEFAULT_FN_ATTRS128
2608_mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2609 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2610 (__v2df)_mm_max_pd(__A, __B),
2611 (__v2df)_mm_setzero_pd());
2612}
2613
2614static __inline__ __m256d __DEFAULT_FN_ATTRS256
2615_mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2616 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2617 (__v4df)_mm256_max_pd(__A, __B),
2618 (__v4df)__W);
2619}
2620
2621static __inline__ __m256d __DEFAULT_FN_ATTRS256
2622_mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2623 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2624 (__v4df)_mm256_max_pd(__A, __B),
2625 (__v4df)_mm256_setzero_pd());
2626}
2627
2628static __inline__ __m128 __DEFAULT_FN_ATTRS128
2629_mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2630 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2631 (__v4sf)_mm_max_ps(__A, __B),
2632 (__v4sf)__W);
2633}
2634
2635static __inline__ __m128 __DEFAULT_FN_ATTRS128
2636_mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2637 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2638 (__v4sf)_mm_max_ps(__A, __B),
2639 (__v4sf)_mm_setzero_ps());
2640}
2641
2642static __inline__ __m256 __DEFAULT_FN_ATTRS256
2643_mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2644 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2645 (__v8sf)_mm256_max_ps(__A, __B),
2646 (__v8sf)__W);
2647}
2648
2649static __inline__ __m256 __DEFAULT_FN_ATTRS256
2650_mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2651 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2652 (__v8sf)_mm256_max_ps(__A, __B),
2653 (__v8sf)_mm256_setzero_ps());
2654}
2655
2656static __inline__ __m128d __DEFAULT_FN_ATTRS128
2657_mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2658 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2659 (__v2df)_mm_min_pd(__A, __B),
2660 (__v2df)__W);
2661}
2662
2663static __inline__ __m128d __DEFAULT_FN_ATTRS128
2664_mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2665 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2666 (__v2df)_mm_min_pd(__A, __B),
2667 (__v2df)_mm_setzero_pd());
2668}
2669
2670static __inline__ __m256d __DEFAULT_FN_ATTRS256
2671_mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2672 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2673 (__v4df)_mm256_min_pd(__A, __B),
2674 (__v4df)__W);
2675}
2676
2677static __inline__ __m256d __DEFAULT_FN_ATTRS256
2678_mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2679 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2680 (__v4df)_mm256_min_pd(__A, __B),
2681 (__v4df)_mm256_setzero_pd());
2682}
2683
2684static __inline__ __m128 __DEFAULT_FN_ATTRS128
2685_mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2686 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2687 (__v4sf)_mm_min_ps(__A, __B),
2688 (__v4sf)__W);
2689}
2690
2691static __inline__ __m128 __DEFAULT_FN_ATTRS128
2692_mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2693 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2694 (__v4sf)_mm_min_ps(__A, __B),
2695 (__v4sf)_mm_setzero_ps());
2696}
2697
2698static __inline__ __m256 __DEFAULT_FN_ATTRS256
2699_mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2700 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2701 (__v8sf)_mm256_min_ps(__A, __B),
2702 (__v8sf)__W);
2703}
2704
2705static __inline__ __m256 __DEFAULT_FN_ATTRS256
2706_mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2707 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2708 (__v8sf)_mm256_min_ps(__A, __B),
2709 (__v8sf)_mm256_setzero_ps());
2710}
2711
2712static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2713_mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2714 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2715 (__v2df)_mm_mul_pd(__A, __B),
2716 (__v2df)__W);
2717}
2718
2719static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2720_mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2721 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2722 (__v2df)_mm_mul_pd(__A, __B),
2723 (__v2df)_mm_setzero_pd());
2724}
2725
2726static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
2727_mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2728 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2729 (__v4df)_mm256_mul_pd(__A, __B),
2730 (__v4df)__W);
2731}
2732
2733static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
2734_mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2735 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2736 (__v4df)_mm256_mul_pd(__A, __B),
2737 (__v4df)_mm256_setzero_pd());
2738}
2739
2740static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2741_mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2742 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2743 (__v4sf)_mm_mul_ps(__A, __B),
2744 (__v4sf)__W);
2745}
2746
2747static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2748_mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2749 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2750 (__v4sf)_mm_mul_ps(__A, __B),
2751 (__v4sf)_mm_setzero_ps());
2752}
2753
2754static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
2755_mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2756 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2757 (__v8sf)_mm256_mul_ps(__A, __B),
2758 (__v8sf)__W);
2759}
2760
2761static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
2762_mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2763 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2764 (__v8sf)_mm256_mul_ps(__A, __B),
2765 (__v8sf)_mm256_setzero_ps());
2766}
2767
2768static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2769_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
2770 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2771 (__v4si)_mm_abs_epi32(__A),
2772 (__v4si)__W);
2773}
2774
2775static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2777 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2778 (__v4si)_mm_abs_epi32(__A),
2779 (__v4si)_mm_setzero_si128());
2780}
2781
2782static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2783_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
2784 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2785 (__v8si)_mm256_abs_epi32(__A),
2786 (__v8si)__W);
2787}
2788
2789static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2791 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2792 (__v8si)_mm256_abs_epi32(__A),
2793 (__v8si)_mm256_setzero_si256());
2794}
2795
2796static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2797_mm_abs_epi64(__m128i __A) {
2798 return (__m128i)__builtin_elementwise_abs((__v2di)__A);
2799}
2800
2801static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2802_mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
2803 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
2804 (__v2di)_mm_abs_epi64(__A),
2805 (__v2di)__W);
2806}
2807
2808static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2810 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
2811 (__v2di)_mm_abs_epi64(__A),
2812 (__v2di)_mm_setzero_si128());
2813}
2814
2815static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2816_mm256_abs_epi64(__m256i __A) {
2817 return (__m256i)__builtin_elementwise_abs((__v4di)__A);
2818}
2819
2820static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2821_mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
2822 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
2823 (__v4di)_mm256_abs_epi64(__A),
2824 (__v4di)__W);
2825}
2826
2827static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2829 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
2830 (__v4di)_mm256_abs_epi64(__A),
2831 (__v4di)_mm256_setzero_si256());
2832}
2833
2834static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2835_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
2836 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2837 (__v4si)_mm_max_epi32(__A, __B),
2838 (__v4si)_mm_setzero_si128());
2839}
2840
2841static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2842_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
2843 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2844 (__v4si)_mm_max_epi32(__A, __B),
2845 (__v4si)__W);
2846}
2847
2848static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2849_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
2850 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
2851 (__v8si)_mm256_max_epi32(__A, __B),
2852 (__v8si)_mm256_setzero_si256());
2853}
2854
2855static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2856_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
2857 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
2858 (__v8si)_mm256_max_epi32(__A, __B),
2859 (__v8si)__W);
2860}
2861
2862static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2863_mm_max_epi64(__m128i __A, __m128i __B) {
2864 return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B);
2865}
2866
2867static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2868_mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B) {
2869 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
2870 (__v2di)_mm_max_epi64(__A, __B),
2871 (__v2di)_mm_setzero_si128());
2872}
2873
2874static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2875_mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
2876 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
2877 (__v2di)_mm_max_epi64(__A, __B),
2878 (__v2di)__W);
2879}
2880
2881static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2882_mm256_max_epi64(__m256i __A, __m256i __B) {
2883 return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B);
2884}
2885
2886static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2887_mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B) {
2888 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
2889 (__v4di)_mm256_max_epi64(__A, __B),
2890 (__v4di)_mm256_setzero_si256());
2891}
2892
2893static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2894_mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
2895 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
2896 (__v4di)_mm256_max_epi64(__A, __B),
2897 (__v4di)__W);
2898}
2899
2900static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2901_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
2902 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2903 (__v4si)_mm_max_epu32(__A, __B),
2904 (__v4si)_mm_setzero_si128());
2905}
2906
2907static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2908_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
2909 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2910 (__v4si)_mm_max_epu32(__A, __B),
2911 (__v4si)__W);
2912}
2913
2914static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2915_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
2916 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
2917 (__v8si)_mm256_max_epu32(__A, __B),
2918 (__v8si)_mm256_setzero_si256());
2919}
2920
2921static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2922_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
2923 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
2924 (__v8si)_mm256_max_epu32(__A, __B),
2925 (__v8si)__W);
2926}
2927
2928static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2929_mm_max_epu64(__m128i __A, __m128i __B) {
2930 return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B);
2931}
2932
2933static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2934_mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B) {
2935 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
2936 (__v2di)_mm_max_epu64(__A, __B),
2937 (__v2di)_mm_setzero_si128());
2938}
2939
2940static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2941_mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
2942 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
2943 (__v2di)_mm_max_epu64(__A, __B),
2944 (__v2di)__W);
2945}
2946
2947static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2948_mm256_max_epu64(__m256i __A, __m256i __B) {
2949 return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B);
2950}
2951
2952static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2953_mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B) {
2954 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
2955 (__v4di)_mm256_max_epu64(__A, __B),
2956 (__v4di)_mm256_setzero_si256());
2957}
2958
2959static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2960_mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
2961 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
2962 (__v4di)_mm256_max_epu64(__A, __B),
2963 (__v4di)__W);
2964}
2965
2966static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2967_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
2968 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2969 (__v4si)_mm_min_epi32(__A, __B),
2970 (__v4si)_mm_setzero_si128());
2971}
2972
2973static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2974_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
2975 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2976 (__v4si)_mm_min_epi32(__A, __B),
2977 (__v4si)__W);
2978}
2979
2980static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2981_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
2982 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
2983 (__v8si)_mm256_min_epi32(__A, __B),
2984 (__v8si)_mm256_setzero_si256());
2985}
2986
2987static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2988_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
2989 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
2990 (__v8si)_mm256_min_epi32(__A, __B),
2991 (__v8si)__W);
2992}
2993
2994static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2995_mm_min_epi64(__m128i __A, __m128i __B) {
2996 return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B);
2997}
2998
2999static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3000_mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3001 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3002 (__v2di)_mm_min_epi64(__A, __B),
3003 (__v2di)__W);
3004}
3005
3006static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3007_mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B) {
3008 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3009 (__v2di)_mm_min_epi64(__A, __B),
3010 (__v2di)_mm_setzero_si128());
3011}
3012
3013static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3014_mm256_min_epi64(__m256i __A, __m256i __B) {
3015 return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B);
3016}
3017
3018static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3019_mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3020 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3021 (__v4di)_mm256_min_epi64(__A, __B),
3022 (__v4di)__W);
3023}
3024
3025static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3026_mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B) {
3027 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3028 (__v4di)_mm256_min_epi64(__A, __B),
3029 (__v4di)_mm256_setzero_si256());
3030}
3031
3032static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3033_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3034 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3035 (__v4si)_mm_min_epu32(__A, __B),
3036 (__v4si)_mm_setzero_si128());
3037}
3038
3039static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3040_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3041 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3042 (__v4si)_mm_min_epu32(__A, __B),
3043 (__v4si)__W);
3044}
3045
3046static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3047_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3048 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3049 (__v8si)_mm256_min_epu32(__A, __B),
3050 (__v8si)_mm256_setzero_si256());
3051}
3052
3053static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3054_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3055 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3056 (__v8si)_mm256_min_epu32(__A, __B),
3057 (__v8si)__W);
3058}
3059
3060static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3061_mm_min_epu64(__m128i __A, __m128i __B) {
3062 return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B);
3063}
3064
3065static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3066_mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3067 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3068 (__v2di)_mm_min_epu64(__A, __B),
3069 (__v2di)__W);
3070}
3071
3072static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3073_mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B) {
3074 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3075 (__v2di)_mm_min_epu64(__A, __B),
3076 (__v2di)_mm_setzero_si128());
3077}
3078
3079static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3080_mm256_min_epu64(__m256i __A, __m256i __B) {
3081 return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B);
3082}
3083
3084static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3085_mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3086 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3087 (__v4di)_mm256_min_epu64(__A, __B),
3088 (__v4di)__W);
3089}
3090
3091static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3092_mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B) {
3093 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3094 (__v4di)_mm256_min_epu64(__A, __B),
3095 (__v4di)_mm256_setzero_si256());
3096}
3097
3098#define _mm_roundscale_pd(A, imm) \
3099 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3100 (int)(imm), \
3101 (__v2df)_mm_setzero_pd(), \
3102 (__mmask8)-1))
3103
3104
3105#define _mm_mask_roundscale_pd(W, U, A, imm) \
3106 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3107 (int)(imm), \
3108 (__v2df)(__m128d)(W), \
3109 (__mmask8)(U)))
3110
3111
3112#define _mm_maskz_roundscale_pd(U, A, imm) \
3113 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3114 (int)(imm), \
3115 (__v2df)_mm_setzero_pd(), \
3116 (__mmask8)(U)))
3117
3118
3119#define _mm256_roundscale_pd(A, imm) \
3120 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3121 (int)(imm), \
3122 (__v4df)_mm256_setzero_pd(), \
3123 (__mmask8)-1))
3124
3125
3126#define _mm256_mask_roundscale_pd(W, U, A, imm) \
3127 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3128 (int)(imm), \
3129 (__v4df)(__m256d)(W), \
3130 (__mmask8)(U)))
3131
3132
3133#define _mm256_maskz_roundscale_pd(U, A, imm) \
3134 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3135 (int)(imm), \
3136 (__v4df)_mm256_setzero_pd(), \
3137 (__mmask8)(U)))
3138
3139#define _mm_roundscale_ps(A, imm) \
3140 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3141 (__v4sf)_mm_setzero_ps(), \
3142 (__mmask8)-1))
3143
3144
3145#define _mm_mask_roundscale_ps(W, U, A, imm) \
3146 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3147 (__v4sf)(__m128)(W), \
3148 (__mmask8)(U)))
3149
3150
3151#define _mm_maskz_roundscale_ps(U, A, imm) \
3152 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3153 (__v4sf)_mm_setzero_ps(), \
3154 (__mmask8)(U)))
3155
3156#define _mm256_roundscale_ps(A, imm) \
3157 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3158 (__v8sf)_mm256_setzero_ps(), \
3159 (__mmask8)-1))
3160
3161#define _mm256_mask_roundscale_ps(W, U, A, imm) \
3162 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3163 (__v8sf)(__m256)(W), \
3164 (__mmask8)(U)))
3165
3166
3167#define _mm256_maskz_roundscale_ps(U, A, imm) \
3168 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3169 (__v8sf)_mm256_setzero_ps(), \
3170 (__mmask8)(U)))
3171
3172static __inline__ __m128d __DEFAULT_FN_ATTRS128
3173_mm_scalef_pd (__m128d __A, __m128d __B) {
3174 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3175 (__v2df) __B,
3176 (__v2df)
3177 _mm_setzero_pd (),
3178 (__mmask8) -1);
3179}
3180
3181static __inline__ __m128d __DEFAULT_FN_ATTRS128
3182_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3183 __m128d __B) {
3184 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3185 (__v2df) __B,
3186 (__v2df) __W,
3187 (__mmask8) __U);
3188}
3189
3190static __inline__ __m128d __DEFAULT_FN_ATTRS128
3191_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3192 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3193 (__v2df) __B,
3194 (__v2df)
3195 _mm_setzero_pd (),
3196 (__mmask8) __U);
3197}
3198
3199static __inline__ __m256d __DEFAULT_FN_ATTRS256
3200_mm256_scalef_pd (__m256d __A, __m256d __B) {
3201 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3202 (__v4df) __B,
3203 (__v4df)
3205 (__mmask8) -1);
3206}
3207
3208static __inline__ __m256d __DEFAULT_FN_ATTRS256
3209_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3210 __m256d __B) {
3211 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3212 (__v4df) __B,
3213 (__v4df) __W,
3214 (__mmask8) __U);
3215}
3216
3217static __inline__ __m256d __DEFAULT_FN_ATTRS256
3218_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3219 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3220 (__v4df) __B,
3221 (__v4df)
3223 (__mmask8) __U);
3224}
3225
3226static __inline__ __m128 __DEFAULT_FN_ATTRS128
3227_mm_scalef_ps (__m128 __A, __m128 __B) {
3228 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3229 (__v4sf) __B,
3230 (__v4sf)
3231 _mm_setzero_ps (),
3232 (__mmask8) -1);
3233}
3234
3235static __inline__ __m128 __DEFAULT_FN_ATTRS128
3236_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3237 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3238 (__v4sf) __B,
3239 (__v4sf) __W,
3240 (__mmask8) __U);
3241}
3242
3243static __inline__ __m128 __DEFAULT_FN_ATTRS128
3244_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3245 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3246 (__v4sf) __B,
3247 (__v4sf)
3248 _mm_setzero_ps (),
3249 (__mmask8) __U);
3250}
3251
3252static __inline__ __m256 __DEFAULT_FN_ATTRS256
3253_mm256_scalef_ps (__m256 __A, __m256 __B) {
3254 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3255 (__v8sf) __B,
3256 (__v8sf)
3258 (__mmask8) -1);
3259}
3260
3261static __inline__ __m256 __DEFAULT_FN_ATTRS256
3262_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3263 __m256 __B) {
3264 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3265 (__v8sf) __B,
3266 (__v8sf) __W,
3267 (__mmask8) __U);
3268}
3269
3270static __inline__ __m256 __DEFAULT_FN_ATTRS256
3271_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3272 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3273 (__v8sf) __B,
3274 (__v8sf)
3276 (__mmask8) __U);
3277}
3278
3279#define _mm_i64scatter_pd(addr, index, v1, scale) \
3280 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
3281 (__v2di)(__m128i)(index), \
3282 (__v2df)(__m128d)(v1), (int)(scale))
3283
3284#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3285 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
3286 (__v2di)(__m128i)(index), \
3287 (__v2df)(__m128d)(v1), (int)(scale))
3288
3289#define _mm_i64scatter_epi64(addr, index, v1, scale) \
3290 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
3291 (__v2di)(__m128i)(index), \
3292 (__v2di)(__m128i)(v1), (int)(scale))
3293
3294#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3295 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
3296 (__v2di)(__m128i)(index), \
3297 (__v2di)(__m128i)(v1), (int)(scale))
3298
3299#define _mm256_i64scatter_pd(addr, index, v1, scale) \
3300 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
3301 (__v4di)(__m256i)(index), \
3302 (__v4df)(__m256d)(v1), (int)(scale))
3303
3304#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3305 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
3306 (__v4di)(__m256i)(index), \
3307 (__v4df)(__m256d)(v1), (int)(scale))
3308
3309#define _mm256_i64scatter_epi64(addr, index, v1, scale) \
3310 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
3311 (__v4di)(__m256i)(index), \
3312 (__v4di)(__m256i)(v1), (int)(scale))
3313
3314#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3315 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
3316 (__v4di)(__m256i)(index), \
3317 (__v4di)(__m256i)(v1), (int)(scale))
3318
3319#define _mm_i64scatter_ps(addr, index, v1, scale) \
3320 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
3321 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3322 (int)(scale))
3323
3324#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3325 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
3326 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3327 (int)(scale))
3328
3329#define _mm_i64scatter_epi32(addr, index, v1, scale) \
3330 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
3331 (__v2di)(__m128i)(index), \
3332 (__v4si)(__m128i)(v1), (int)(scale))
3333
3334#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3335 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
3336 (__v2di)(__m128i)(index), \
3337 (__v4si)(__m128i)(v1), (int)(scale))
3338
3339#define _mm256_i64scatter_ps(addr, index, v1, scale) \
3340 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
3341 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3342 (int)(scale))
3343
3344#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3345 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
3346 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3347 (int)(scale))
3348
3349#define _mm256_i64scatter_epi32(addr, index, v1, scale) \
3350 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
3351 (__v4di)(__m256i)(index), \
3352 (__v4si)(__m128i)(v1), (int)(scale))
3353
3354#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3355 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
3356 (__v4di)(__m256i)(index), \
3357 (__v4si)(__m128i)(v1), (int)(scale))
3358
3359#define _mm_i32scatter_pd(addr, index, v1, scale) \
3360 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
3361 (__v4si)(__m128i)(index), \
3362 (__v2df)(__m128d)(v1), (int)(scale))
3363
3364#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3365 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
3366 (__v4si)(__m128i)(index), \
3367 (__v2df)(__m128d)(v1), (int)(scale))
3368
3369#define _mm_i32scatter_epi64(addr, index, v1, scale) \
3370 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
3371 (__v4si)(__m128i)(index), \
3372 (__v2di)(__m128i)(v1), (int)(scale))
3373
3374#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3375 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
3376 (__v4si)(__m128i)(index), \
3377 (__v2di)(__m128i)(v1), (int)(scale))
3378
3379#define _mm256_i32scatter_pd(addr, index, v1, scale) \
3380 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
3381 (__v4si)(__m128i)(index), \
3382 (__v4df)(__m256d)(v1), (int)(scale))
3383
3384#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3385 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
3386 (__v4si)(__m128i)(index), \
3387 (__v4df)(__m256d)(v1), (int)(scale))
3388
3389#define _mm256_i32scatter_epi64(addr, index, v1, scale) \
3390 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
3391 (__v4si)(__m128i)(index), \
3392 (__v4di)(__m256i)(v1), (int)(scale))
3393
3394#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3395 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
3396 (__v4si)(__m128i)(index), \
3397 (__v4di)(__m256i)(v1), (int)(scale))
3398
3399#define _mm_i32scatter_ps(addr, index, v1, scale) \
3400 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
3401 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3402 (int)(scale))
3403
3404#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3405 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
3406 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3407 (int)(scale))
3408
3409#define _mm_i32scatter_epi32(addr, index, v1, scale) \
3410 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
3411 (__v4si)(__m128i)(index), \
3412 (__v4si)(__m128i)(v1), (int)(scale))
3413
3414#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3415 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
3416 (__v4si)(__m128i)(index), \
3417 (__v4si)(__m128i)(v1), (int)(scale))
3418
3419#define _mm256_i32scatter_ps(addr, index, v1, scale) \
3420 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
3421 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3422 (int)(scale))
3423
3424#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3425 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
3426 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3427 (int)(scale))
3428
3429#define _mm256_i32scatter_epi32(addr, index, v1, scale) \
3430 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
3431 (__v8si)(__m256i)(index), \
3432 (__v8si)(__m256i)(v1), (int)(scale))
3433
3434#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3435 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
3436 (__v8si)(__m256i)(index), \
3437 (__v8si)(__m256i)(v1), (int)(scale))
3438
3439 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3440 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
3441 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3442 (__v2df)_mm_sqrt_pd(__A),
3443 (__v2df)__W);
3444 }
3445
3446 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3447 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
3448 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3449 (__v2df)_mm_sqrt_pd(__A),
3450 (__v2df)_mm_setzero_pd());
3451 }
3452
3453 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3454 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
3455 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3456 (__v4df)_mm256_sqrt_pd(__A),
3457 (__v4df)__W);
3458 }
3459
3460 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3461 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
3462 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3463 (__v4df)_mm256_sqrt_pd(__A),
3464 (__v4df)_mm256_setzero_pd());
3465 }
3466
3467 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3468 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
3469 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3470 (__v4sf)_mm_sqrt_ps(__A),
3471 (__v4sf)__W);
3472 }
3473
3474 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3475 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
3476 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3477 (__v4sf)_mm_sqrt_ps(__A),
3478 (__v4sf)_mm_setzero_ps());
3479 }
3480
3481 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3482 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
3483 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3484 (__v8sf)_mm256_sqrt_ps(__A),
3485 (__v8sf)__W);
3486 }
3487
3488 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3490 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3491 (__v8sf)_mm256_sqrt_ps(__A),
3492 (__v8sf)_mm256_setzero_ps());
3493 }
3494
3495 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
3496 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3497 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3498 (__v2df)_mm_sub_pd(__A, __B),
3499 (__v2df)__W);
3500 }
3501
3502 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
3503 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3504 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3505 (__v2df)_mm_sub_pd(__A, __B),
3506 (__v2df)_mm_setzero_pd());
3507 }
3508
3509 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
3510 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3511 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3512 (__v4df)_mm256_sub_pd(__A, __B),
3513 (__v4df)__W);
3514 }
3515
3516 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
3517 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3518 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3519 (__v4df)_mm256_sub_pd(__A, __B),
3520 (__v4df)_mm256_setzero_pd());
3521 }
3522
3523 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
3524 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3525 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3526 (__v4sf)_mm_sub_ps(__A, __B),
3527 (__v4sf)__W);
3528 }
3529
3530 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
3531 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3532 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3533 (__v4sf)_mm_sub_ps(__A, __B),
3534 (__v4sf)_mm_setzero_ps());
3535 }
3536
3537 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
3538 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3539 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3540 (__v8sf)_mm256_sub_ps(__A, __B),
3541 (__v8sf)__W);
3542 }
3543
3544 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
3545 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3546 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3547 (__v8sf)_mm256_sub_ps(__A, __B),
3548 (__v8sf)_mm256_setzero_ps());
3549 }
3550
3551 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3552 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
3553 return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
3554 (__v4si)__B);
3555 }
3556
3557 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3558 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I,
3559 __m128i __B) {
3560 return (__m128i)__builtin_ia32_selectd_128(__U,
3561 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3562 (__v4si)__A);
3563 }
3564
3565 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3566 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,
3567 __m128i __B) {
3568 return (__m128i)__builtin_ia32_selectd_128(__U,
3569 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3570 (__v4si)__I);
3571 }
3572
3573 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3574 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,
3575 __m128i __B) {
3576 return (__m128i)__builtin_ia32_selectd_128(__U,
3577 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3578 (__v4si)_mm_setzero_si128());
3579 }
3580
3581 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3582 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
3583 return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
3584 (__v8si) __B);
3585 }
3586
3587 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3588 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I,
3589 __m256i __B) {
3590 return (__m256i)__builtin_ia32_selectd_256(__U,
3591 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3592 (__v8si)__A);
3593 }
3594
3595 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3596 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U,
3597 __m256i __B) {
3598 return (__m256i)__builtin_ia32_selectd_256(__U,
3599 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3600 (__v8si)__I);
3601 }
3602
3603 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3604 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I,
3605 __m256i __B) {
3606 return (__m256i)__builtin_ia32_selectd_256(__U,
3607 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3608 (__v8si)_mm256_setzero_si256());
3609 }
3610
3611 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
3612 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
3613 return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
3614 (__v2df)__B);
3615 }
3616
3617 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
3618 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I,
3619 __m128d __B) {
3620 return (__m128d)__builtin_ia32_selectpd_128(__U,
3621 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3622 (__v2df)__A);
3623 }
3624
3625 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
3626 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U,
3627 __m128d __B) {
3628 return (__m128d)__builtin_ia32_selectpd_128(__U,
3629 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3630 (__v2df)(__m128d)__I);
3631 }
3632
3633 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
3634 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I,
3635 __m128d __B) {
3636 return (__m128d)__builtin_ia32_selectpd_128(__U,
3637 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3638 (__v2df)_mm_setzero_pd());
3639 }
3640
3641 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
3642 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
3643 return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
3644 (__v4df)__B);
3645 }
3646
3647 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
3648 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I,
3649 __m256d __B) {
3650 return (__m256d)__builtin_ia32_selectpd_256(__U,
3651 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3652 (__v4df)__A);
3653 }
3654
3655 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
3656 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,
3657 __m256d __B) {
3658 return (__m256d)__builtin_ia32_selectpd_256(__U,
3659 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3660 (__v4df)(__m256d)__I);
3661 }
3662
3663 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
3664 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,
3665 __m256d __B) {
3666 return (__m256d)__builtin_ia32_selectpd_256(__U,
3667 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3668 (__v4df)_mm256_setzero_pd());
3669 }
3670
3671 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
3672 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
3673 return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
3674 (__v4sf)__B);
3675 }
3676
3677 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
3678 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
3679 return (__m128)__builtin_ia32_selectps_128(__U,
3680 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3681 (__v4sf)__A);
3682 }
3683
3684 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
3685 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
3686 return (__m128)__builtin_ia32_selectps_128(__U,
3687 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3688 (__v4sf)(__m128)__I);
3689 }
3690
3691 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
3692 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
3693 return (__m128)__builtin_ia32_selectps_128(__U,
3694 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3695 (__v4sf)_mm_setzero_ps());
3696 }
3697
3698 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
3699 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
3700 return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
3701 (__v8sf) __B);
3702 }
3703
3704 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
3705 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I,
3706 __m256 __B) {
3707 return (__m256)__builtin_ia32_selectps_256(__U,
3708 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3709 (__v8sf)__A);
3710 }
3711
3712 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
3713 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,
3714 __m256 __B) {
3715 return (__m256)__builtin_ia32_selectps_256(__U,
3716 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3717 (__v8sf)(__m256)__I);
3718 }
3719
3720 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
3721 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I,
3722 __m256 __B) {
3723 return (__m256)__builtin_ia32_selectps_256(__U,
3724 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3725 (__v8sf)_mm256_setzero_ps());
3726 }
3727
3728 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3729 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
3730 return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
3731 (__v2di)__B);
3732 }
3733
3734 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3735 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I,
3736 __m128i __B) {
3737 return (__m128i)__builtin_ia32_selectq_128(__U,
3738 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3739 (__v2di)__A);
3740 }
3741
3742 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3743 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
3744 __m128i __B) {
3745 return (__m128i)__builtin_ia32_selectq_128(__U,
3746 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3747 (__v2di)__I);
3748 }
3749
3750 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
3751 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I,
3752 __m128i __B) {
3753 return (__m128i)__builtin_ia32_selectq_128(__U,
3754 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3755 (__v2di)_mm_setzero_si128());
3756 }
3757
3758 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3759 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
3760 return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
3761 (__v4di) __B);
3762 }
3763
3764 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3765 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I,
3766 __m256i __B) {
3767 return (__m256i)__builtin_ia32_selectq_256(__U,
3768 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3769 (__v4di)__A);
3770 }
3771
3772 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3773 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U,
3774 __m256i __B) {
3775 return (__m256i)__builtin_ia32_selectq_256(__U,
3776 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3777 (__v4di)__I);
3778 }
3779
3780 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
3781 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I,
3782 __m256i __B) {
3783 return (__m256i)__builtin_ia32_selectq_256(__U,
3784 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3785 (__v4di)_mm256_setzero_si256());
3786 }
3787
3788 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3789 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3790 {
3791 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3792 (__v4si)_mm_cvtepi8_epi32(__A),
3793 (__v4si)__W);
3794 }
3795
3796 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3798 {
3799 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3800 (__v4si)_mm_cvtepi8_epi32(__A),
3801 (__v4si)_mm_setzero_si128());
3802 }
3803
3804 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3805 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3806 {
3807 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3808 (__v8si)_mm256_cvtepi8_epi32(__A),
3809 (__v8si)__W);
3810 }
3811
3812 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3814 {
3815 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3816 (__v8si)_mm256_cvtepi8_epi32(__A),
3817 (__v8si)_mm256_setzero_si256());
3818 }
3819
3820 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3821 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
3822 {
3823 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3824 (__v2di)_mm_cvtepi8_epi64(__A),
3825 (__v2di)__W);
3826 }
3827
3828 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3830 {
3831 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3832 (__v2di)_mm_cvtepi8_epi64(__A),
3833 (__v2di)_mm_setzero_si128());
3834 }
3835
3836 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3837 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
3838 {
3839 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3840 (__v4di)_mm256_cvtepi8_epi64(__A),
3841 (__v4di)__W);
3842 }
3843
3844 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3846 {
3847 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3848 (__v4di)_mm256_cvtepi8_epi64(__A),
3849 (__v4di)_mm256_setzero_si256());
3850 }
3851
3852 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3853 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
3854 {
3855 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3856 (__v2di)_mm_cvtepi32_epi64(__X),
3857 (__v2di)__W);
3858 }
3859
3860 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3862 {
3863 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3864 (__v2di)_mm_cvtepi32_epi64(__X),
3865 (__v2di)_mm_setzero_si128());
3866 }
3867
3868 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3869 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
3870 {
3871 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3872 (__v4di)_mm256_cvtepi32_epi64(__X),
3873 (__v4di)__W);
3874 }
3875
3876 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3878 {
3879 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3880 (__v4di)_mm256_cvtepi32_epi64(__X),
3881 (__v4di)_mm256_setzero_si256());
3882 }
3883
3884 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3885 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3886 {
3887 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3888 (__v4si)_mm_cvtepi16_epi32(__A),
3889 (__v4si)__W);
3890 }
3891
3892 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3894 {
3895 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3896 (__v4si)_mm_cvtepi16_epi32(__A),
3897 (__v4si)_mm_setzero_si128());
3898 }
3899
3900 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3901 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
3902 {
3903 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3904 (__v8si)_mm256_cvtepi16_epi32(__A),
3905 (__v8si)__W);
3906 }
3907
3908 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3910 {
3911 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3912 (__v8si)_mm256_cvtepi16_epi32(__A),
3913 (__v8si)_mm256_setzero_si256());
3914 }
3915
3916 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3917 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
3918 {
3919 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3920 (__v2di)_mm_cvtepi16_epi64(__A),
3921 (__v2di)__W);
3922 }
3923
3924 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3926 {
3927 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3928 (__v2di)_mm_cvtepi16_epi64(__A),
3929 (__v2di)_mm_setzero_si128());
3930 }
3931
3932 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3933 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
3934 {
3935 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3936 (__v4di)_mm256_cvtepi16_epi64(__A),
3937 (__v4di)__W);
3938 }
3939
3940 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3942 {
3943 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3944 (__v4di)_mm256_cvtepi16_epi64(__A),
3945 (__v4di)_mm256_setzero_si256());
3946 }
3947
3948
3949 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3950 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3951 {
3952 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3953 (__v4si)_mm_cvtepu8_epi32(__A),
3954 (__v4si)__W);
3955 }
3956
3957 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3959 {
3960 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3961 (__v4si)_mm_cvtepu8_epi32(__A),
3962 (__v4si)_mm_setzero_si128());
3963 }
3964
3965 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3966 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
3967 {
3968 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3969 (__v8si)_mm256_cvtepu8_epi32(__A),
3970 (__v8si)__W);
3971 }
3972
3973 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3975 {
3976 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3977 (__v8si)_mm256_cvtepu8_epi32(__A),
3978 (__v8si)_mm256_setzero_si256());
3979 }
3980
3981 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3982 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
3983 {
3984 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3985 (__v2di)_mm_cvtepu8_epi64(__A),
3986 (__v2di)__W);
3987 }
3988
3989 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3991 {
3992 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3993 (__v2di)_mm_cvtepu8_epi64(__A),
3994 (__v2di)_mm_setzero_si128());
3995 }
3996
3997 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3998 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
3999 {
4000 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4001 (__v4di)_mm256_cvtepu8_epi64(__A),
4002 (__v4di)__W);
4003 }
4004
4005 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4007 {
4008 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4009 (__v4di)_mm256_cvtepu8_epi64(__A),
4010 (__v4di)_mm256_setzero_si256());
4011 }
4012
4013 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4014 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4015 {
4016 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4017 (__v2di)_mm_cvtepu32_epi64(__X),
4018 (__v2di)__W);
4019 }
4020
4021 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4023 {
4024 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4025 (__v2di)_mm_cvtepu32_epi64(__X),
4026 (__v2di)_mm_setzero_si128());
4027 }
4028
4029 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4030 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4031 {
4032 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4033 (__v4di)_mm256_cvtepu32_epi64(__X),
4034 (__v4di)__W);
4035 }
4036
4037 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4039 {
4040 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4041 (__v4di)_mm256_cvtepu32_epi64(__X),
4042 (__v4di)_mm256_setzero_si256());
4043 }
4044
4045 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4046 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4047 {
4048 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4049 (__v4si)_mm_cvtepu16_epi32(__A),
4050 (__v4si)__W);
4051 }
4052
4053 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4055 {
4056 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4057 (__v4si)_mm_cvtepu16_epi32(__A),
4058 (__v4si)_mm_setzero_si128());
4059 }
4060
4061 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4062 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4063 {
4064 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4065 (__v8si)_mm256_cvtepu16_epi32(__A),
4066 (__v8si)__W);
4067 }
4068
4069 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4071 {
4072 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4073 (__v8si)_mm256_cvtepu16_epi32(__A),
4074 (__v8si)_mm256_setzero_si256());
4075 }
4076
4077 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4078 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4079 {
4080 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4081 (__v2di)_mm_cvtepu16_epi64(__A),
4082 (__v2di)__W);
4083 }
4084
4085 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4087 {
4088 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4089 (__v2di)_mm_cvtepu16_epi64(__A),
4090 (__v2di)_mm_setzero_si128());
4091 }
4092
4093 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4094 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4095 {
4096 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4097 (__v4di)_mm256_cvtepu16_epi64(__A),
4098 (__v4di)__W);
4099 }
4100
4101 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4103 {
4104 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4105 (__v4di)_mm256_cvtepu16_epi64(__A),
4106 (__v4di)_mm256_setzero_si256());
4107 }
4108
4109
4110#define _mm_rol_epi32(a, b) \
4111 ((__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b)))
4112
4113#define _mm_mask_rol_epi32(w, u, a, b) \
4114 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4115 (__v4si)_mm_rol_epi32((a), (b)), \
4116 (__v4si)(__m128i)(w)))
4117
4118#define _mm_maskz_rol_epi32(u, a, b) \
4119 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4120 (__v4si)_mm_rol_epi32((a), (b)), \
4121 (__v4si)_mm_setzero_si128()))
4122
4123#define _mm256_rol_epi32(a, b) \
4124 ((__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b)))
4125
4126#define _mm256_mask_rol_epi32(w, u, a, b) \
4127 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4128 (__v8si)_mm256_rol_epi32((a), (b)), \
4129 (__v8si)(__m256i)(w)))
4130
4131#define _mm256_maskz_rol_epi32(u, a, b) \
4132 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4133 (__v8si)_mm256_rol_epi32((a), (b)), \
4134 (__v8si)_mm256_setzero_si256()))
4135
4136#define _mm_rol_epi64(a, b) \
4137 ((__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b)))
4138
4139#define _mm_mask_rol_epi64(w, u, a, b) \
4140 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4141 (__v2di)_mm_rol_epi64((a), (b)), \
4142 (__v2di)(__m128i)(w)))
4143
4144#define _mm_maskz_rol_epi64(u, a, b) \
4145 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4146 (__v2di)_mm_rol_epi64((a), (b)), \
4147 (__v2di)_mm_setzero_si128()))
4148
4149#define _mm256_rol_epi64(a, b) \
4150 ((__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b)))
4151
4152#define _mm256_mask_rol_epi64(w, u, a, b) \
4153 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4154 (__v4di)_mm256_rol_epi64((a), (b)), \
4155 (__v4di)(__m256i)(w)))
4156
4157#define _mm256_maskz_rol_epi64(u, a, b) \
4158 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4159 (__v4di)_mm256_rol_epi64((a), (b)), \
4160 (__v4di)_mm256_setzero_si256()))
4161
4162static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4163_mm_rolv_epi32 (__m128i __A, __m128i __B)
4164{
4165 return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B);
4166}
4167
4168static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4169_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4170{
4171 return (__m128i)__builtin_ia32_selectd_128(__U,
4172 (__v4si)_mm_rolv_epi32(__A, __B),
4173 (__v4si)__W);
4174}
4175
4176static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4177_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4178{
4179 return (__m128i)__builtin_ia32_selectd_128(__U,
4180 (__v4si)_mm_rolv_epi32(__A, __B),
4181 (__v4si)_mm_setzero_si128());
4182}
4183
4184static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4185_mm256_rolv_epi32 (__m256i __A, __m256i __B)
4186{
4187 return (__m256i)__builtin_elementwise_fshl((__v8su)__A, (__v8su)__A, (__v8su)__B);
4188}
4189
4190static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4191_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4192{
4193 return (__m256i)__builtin_ia32_selectd_256(__U,
4194 (__v8si)_mm256_rolv_epi32(__A, __B),
4195 (__v8si)__W);
4196}
4197
4198static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4199_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4200{
4201 return (__m256i)__builtin_ia32_selectd_256(__U,
4202 (__v8si)_mm256_rolv_epi32(__A, __B),
4203 (__v8si)_mm256_setzero_si256());
4204}
4205
4206static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4207_mm_rolv_epi64 (__m128i __A, __m128i __B)
4208{
4209 return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B);
4210}
4211
4212static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4213_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4214{
4215 return (__m128i)__builtin_ia32_selectq_128(__U,
4216 (__v2di)_mm_rolv_epi64(__A, __B),
4217 (__v2di)__W);
4218}
4219
4220static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4221_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4222{
4223 return (__m128i)__builtin_ia32_selectq_128(__U,
4224 (__v2di)_mm_rolv_epi64(__A, __B),
4225 (__v2di)_mm_setzero_si128());
4226}
4227
4228static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4229_mm256_rolv_epi64 (__m256i __A, __m256i __B)
4230{
4231 return (__m256i)__builtin_elementwise_fshl((__v4du)__A, (__v4du)__A, (__v4du)__B);
4232}
4233
4234static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4235_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4236{
4237 return (__m256i)__builtin_ia32_selectq_256(__U,
4238 (__v4di)_mm256_rolv_epi64(__A, __B),
4239 (__v4di)__W);
4240}
4241
4242static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4243_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4244{
4245 return (__m256i)__builtin_ia32_selectq_256(__U,
4246 (__v4di)_mm256_rolv_epi64(__A, __B),
4247 (__v4di)_mm256_setzero_si256());
4248}
4249
4250#define _mm_ror_epi32(a, b) \
4251 ((__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b)))
4252
4253#define _mm_mask_ror_epi32(w, u, a, b) \
4254 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4255 (__v4si)_mm_ror_epi32((a), (b)), \
4256 (__v4si)(__m128i)(w)))
4257
4258#define _mm_maskz_ror_epi32(u, a, b) \
4259 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4260 (__v4si)_mm_ror_epi32((a), (b)), \
4261 (__v4si)_mm_setzero_si128()))
4262
4263#define _mm256_ror_epi32(a, b) \
4264 ((__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b)))
4265
4266#define _mm256_mask_ror_epi32(w, u, a, b) \
4267 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4268 (__v8si)_mm256_ror_epi32((a), (b)), \
4269 (__v8si)(__m256i)(w)))
4270
4271#define _mm256_maskz_ror_epi32(u, a, b) \
4272 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4273 (__v8si)_mm256_ror_epi32((a), (b)), \
4274 (__v8si)_mm256_setzero_si256()))
4275
4276#define _mm_ror_epi64(a, b) \
4277 ((__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b)))
4278
4279#define _mm_mask_ror_epi64(w, u, a, b) \
4280 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4281 (__v2di)_mm_ror_epi64((a), (b)), \
4282 (__v2di)(__m128i)(w)))
4283
4284#define _mm_maskz_ror_epi64(u, a, b) \
4285 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4286 (__v2di)_mm_ror_epi64((a), (b)), \
4287 (__v2di)_mm_setzero_si128()))
4288
4289#define _mm256_ror_epi64(a, b) \
4290 ((__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b)))
4291
4292#define _mm256_mask_ror_epi64(w, u, a, b) \
4293 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4294 (__v4di)_mm256_ror_epi64((a), (b)), \
4295 (__v4di)(__m256i)(w)))
4296
4297#define _mm256_maskz_ror_epi64(u, a, b) \
4298 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4299 (__v4di)_mm256_ror_epi64((a), (b)), \
4300 (__v4di)_mm256_setzero_si256()))
4301
4302static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4303_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
4304 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4305 (__v4si)_mm_sll_epi32(__A, __B),
4306 (__v4si)__W);
4307}
4308
4309static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4310_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
4311 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4312 (__v4si)_mm_sll_epi32(__A, __B),
4313 (__v4si)_mm_setzero_si128());
4314}
4315
4316static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4317_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
4318 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4319 (__v8si)_mm256_sll_epi32(__A, __B),
4320 (__v8si)__W);
4321}
4322
4323static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4324_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) {
4325 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4326 (__v8si)_mm256_sll_epi32(__A, __B),
4327 (__v8si)_mm256_setzero_si256());
4328}
4329
4330static __inline__ __m128i __DEFAULT_FN_ATTRS128
4331_mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4332{
4333 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4334 (__v4si)_mm_slli_epi32(__A, (int)__B),
4335 (__v4si)__W);
4336}
4337
4338static __inline__ __m128i __DEFAULT_FN_ATTRS128
4339_mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
4340{
4341 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4342 (__v4si)_mm_slli_epi32(__A, (int)__B),
4343 (__v4si)_mm_setzero_si128());
4344}
4345
4346static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4347_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A,
4348 unsigned int __B) {
4349 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4350 (__v8si)_mm256_slli_epi32(__A, (int)__B),
4351 (__v8si)__W);
4352}
4353
4354static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4355_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) {
4356 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4357 (__v8si)_mm256_slli_epi32(__A, (int)__B),
4358 (__v8si)_mm256_setzero_si256());
4359}
4360
4361static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4362_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
4363 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4364 (__v2di)_mm_sll_epi64(__A, __B),
4365 (__v2di)__W);
4366}
4367
4368static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4369_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
4370 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4371 (__v2di)_mm_sll_epi64(__A, __B),
4372 (__v2di)_mm_setzero_si128());
4373}
4374
4375static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4376_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
4377 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4378 (__v4di)_mm256_sll_epi64(__A, __B),
4379 (__v4di)__W);
4380}
4381
4382static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4383_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) {
4384 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4385 (__v4di)_mm256_sll_epi64(__A, __B),
4386 (__v4di)_mm256_setzero_si256());
4387}
4388
4389static __inline__ __m128i __DEFAULT_FN_ATTRS128
4390_mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4391{
4392 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4393 (__v2di)_mm_slli_epi64(__A, (int)__B),
4394 (__v2di)__W);
4395}
4396
4397static __inline__ __m128i __DEFAULT_FN_ATTRS128
4398_mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
4399{
4400 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4401 (__v2di)_mm_slli_epi64(__A, (int)__B),
4402 (__v2di)_mm_setzero_si128());
4403}
4404
4405static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4406_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A,
4407 unsigned int __B) {
4408 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4409 (__v4di)_mm256_slli_epi64(__A, (int)__B),
4410 (__v4di)__W);
4411}
4412
4413static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4414_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) {
4415 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4416 (__v4di)_mm256_slli_epi64(__A, (int)__B),
4417 (__v4di)_mm256_setzero_si256());
4418}
4419
4420static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4421_mm_rorv_epi32 (__m128i __A, __m128i __B)
4422{
4423 return (__m128i)__builtin_elementwise_fshr((__v4su)__A, (__v4su)__A, (__v4su)__B);
4424}
4425
4426static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4427_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4428{
4429 return (__m128i)__builtin_ia32_selectd_128(__U,
4430 (__v4si)_mm_rorv_epi32(__A, __B),
4431 (__v4si)__W);
4432}
4433
4434static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4435_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4436{
4437 return (__m128i)__builtin_ia32_selectd_128(__U,
4438 (__v4si)_mm_rorv_epi32(__A, __B),
4439 (__v4si)_mm_setzero_si128());
4440}
4441
4442static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4443_mm256_rorv_epi32 (__m256i __A, __m256i __B)
4444{
4445 return (__m256i)__builtin_elementwise_fshr((__v8su)__A, (__v8su)__A, (__v8su)__B);
4446}
4447
4448static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4449_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4450{
4451 return (__m256i)__builtin_ia32_selectd_256(__U,
4452 (__v8si)_mm256_rorv_epi32(__A, __B),
4453 (__v8si)__W);
4454}
4455
4456static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4457_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4458{
4459 return (__m256i)__builtin_ia32_selectd_256(__U,
4460 (__v8si)_mm256_rorv_epi32(__A, __B),
4461 (__v8si)_mm256_setzero_si256());
4462}
4463
4464static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4465_mm_rorv_epi64 (__m128i __A, __m128i __B)
4466{
4467 return (__m128i)__builtin_elementwise_fshr((__v2du)__A, (__v2du)__A, (__v2du)__B);
4468}
4469
4470static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4471_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4472{
4473 return (__m128i)__builtin_ia32_selectq_128(__U,
4474 (__v2di)_mm_rorv_epi64(__A, __B),
4475 (__v2di)__W);
4476}
4477
4478static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4479_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4480{
4481 return (__m128i)__builtin_ia32_selectq_128(__U,
4482 (__v2di)_mm_rorv_epi64(__A, __B),
4483 (__v2di)_mm_setzero_si128());
4484}
4485
4486static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4487_mm256_rorv_epi64 (__m256i __A, __m256i __B)
4488{
4489 return (__m256i)__builtin_elementwise_fshr((__v4du)__A, (__v4du)__A, (__v4du)__B);
4490}
4491
4492static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4493_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4494{
4495 return (__m256i)__builtin_ia32_selectq_256(__U,
4496 (__v4di)_mm256_rorv_epi64(__A, __B),
4497 (__v4di)__W);
4498}
4499
4500static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4501_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4502{
4503 return (__m256i)__builtin_ia32_selectq_256(__U,
4504 (__v4di)_mm256_rorv_epi64(__A, __B),
4505 (__v4di)_mm256_setzero_si256());
4506}
4507
4508static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4509_mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4510{
4511 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4512 (__v2di)_mm_sllv_epi64(__X, __Y),
4513 (__v2di)__W);
4514}
4515
4516static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4517_mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4518{
4519 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4520 (__v2di)_mm_sllv_epi64(__X, __Y),
4521 (__v2di)_mm_setzero_si128());
4522}
4523
4524static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4525_mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4526{
4527 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4528 (__v4di)_mm256_sllv_epi64(__X, __Y),
4529 (__v4di)__W);
4530}
4531
4532static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4533_mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4534{
4535 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4536 (__v4di)_mm256_sllv_epi64(__X, __Y),
4537 (__v4di)_mm256_setzero_si256());
4538}
4539
4540static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4541_mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4542{
4543 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4544 (__v4si)_mm_sllv_epi32(__X, __Y),
4545 (__v4si)__W);
4546}
4547
4548static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4549_mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4550{
4551 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4552 (__v4si)_mm_sllv_epi32(__X, __Y),
4553 (__v4si)_mm_setzero_si128());
4554}
4555
4556static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4557_mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4558{
4559 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4560 (__v8si)_mm256_sllv_epi32(__X, __Y),
4561 (__v8si)__W);
4562}
4563
4564static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4565_mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4566{
4567 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4568 (__v8si)_mm256_sllv_epi32(__X, __Y),
4569 (__v8si)_mm256_setzero_si256());
4570}
4571
4572static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4573_mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4574{
4575 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4576 (__v2di)_mm_srlv_epi64(__X, __Y),
4577 (__v2di)__W);
4578}
4579
4580static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4581_mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4582{
4583 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4584 (__v2di)_mm_srlv_epi64(__X, __Y),
4585 (__v2di)_mm_setzero_si128());
4586}
4587
4588static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4589_mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4590{
4591 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4592 (__v4di)_mm256_srlv_epi64(__X, __Y),
4593 (__v4di)__W);
4594}
4595
4596static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4597_mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4598{
4599 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4600 (__v4di)_mm256_srlv_epi64(__X, __Y),
4601 (__v4di)_mm256_setzero_si256());
4602}
4603
4604static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4605_mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4606{
4607 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4608 (__v4si)_mm_srlv_epi32(__X, __Y),
4609 (__v4si)__W);
4610}
4611
4612static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4613_mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4614{
4615 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4616 (__v4si)_mm_srlv_epi32(__X, __Y),
4617 (__v4si)_mm_setzero_si128());
4618}
4619
4620static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4621_mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4622{
4623 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4624 (__v8si)_mm256_srlv_epi32(__X, __Y),
4625 (__v8si)__W);
4626}
4627
4628static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4629_mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4630{
4631 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4632 (__v8si)_mm256_srlv_epi32(__X, __Y),
4633 (__v8si)_mm256_setzero_si256());
4634}
4635
4636static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4637_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
4638 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4639 (__v4si)_mm_srl_epi32(__A, __B),
4640 (__v4si)__W);
4641}
4642
4643static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4644_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
4645 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4646 (__v4si)_mm_srl_epi32(__A, __B),
4647 (__v4si)_mm_setzero_si128());
4648}
4649
4650static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4651_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
4652 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4653 (__v8si)_mm256_srl_epi32(__A, __B),
4654 (__v8si)__W);
4655}
4656
4657static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4658_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) {
4659 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4660 (__v8si)_mm256_srl_epi32(__A, __B),
4661 (__v8si)_mm256_setzero_si256());
4662}
4663
4664static __inline__ __m128i __DEFAULT_FN_ATTRS128
4665_mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4666{
4667 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4668 (__v4si)_mm_srli_epi32(__A, (int)__B),
4669 (__v4si)__W);
4670}
4671
4672static __inline__ __m128i __DEFAULT_FN_ATTRS128
4673_mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
4674{
4675 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4676 (__v4si)_mm_srli_epi32(__A, (int)__B),
4677 (__v4si)_mm_setzero_si128());
4678}
4679
4680static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4681_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A,
4682 unsigned int __B) {
4683 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4684 (__v8si)_mm256_srli_epi32(__A, (int)__B),
4685 (__v8si)__W);
4686}
4687
4688static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4689_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) {
4690 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4691 (__v8si)_mm256_srli_epi32(__A, (int)__B),
4692 (__v8si)_mm256_setzero_si256());
4693}
4694
4695static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4696_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
4697 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4698 (__v2di)_mm_srl_epi64(__A, __B),
4699 (__v2di)__W);
4700}
4701
4702static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4703_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
4704 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4705 (__v2di)_mm_srl_epi64(__A, __B),
4706 (__v2di)_mm_setzero_si128());
4707}
4708
4709static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4710_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
4711 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4712 (__v4di)_mm256_srl_epi64(__A, __B),
4713 (__v4di)__W);
4714}
4715
4716static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4717_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) {
4718 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4719 (__v4di)_mm256_srl_epi64(__A, __B),
4720 (__v4di)_mm256_setzero_si256());
4721}
4722
4723static __inline__ __m128i __DEFAULT_FN_ATTRS128
4724_mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4725{
4726 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4727 (__v2di)_mm_srli_epi64(__A, (int)__B),
4728 (__v2di)__W);
4729}
4730
4731static __inline__ __m128i __DEFAULT_FN_ATTRS128
4732_mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
4733{
4734 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4735 (__v2di)_mm_srli_epi64(__A, (int)__B),
4736 (__v2di)_mm_setzero_si128());
4737}
4738
4739static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4740_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A,
4741 unsigned int __B) {
4742 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4743 (__v4di)_mm256_srli_epi64(__A, (int)__B),
4744 (__v4di)__W);
4745}
4746
4747static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4748_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) {
4749 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4750 (__v4di)_mm256_srli_epi64(__A, (int)__B),
4751 (__v4di)_mm256_setzero_si256());
4752}
4753
4754static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4755_mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4756{
4757 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4758 (__v4si)_mm_srav_epi32(__X, __Y),
4759 (__v4si)__W);
4760}
4761
4762static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4763_mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4764{
4765 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4766 (__v4si)_mm_srav_epi32(__X, __Y),
4767 (__v4si)_mm_setzero_si128());
4768}
4769
4770static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4771_mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4772{
4773 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4774 (__v8si)_mm256_srav_epi32(__X, __Y),
4775 (__v8si)__W);
4776}
4777
4778static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4779_mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4780{
4781 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4782 (__v8si)_mm256_srav_epi32(__X, __Y),
4783 (__v8si)_mm256_setzero_si256());
4784}
4785
4786static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4787_mm_srav_epi64(__m128i __X, __m128i __Y)
4788{
4789 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
4790}
4791
4792static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4793_mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4794{
4795 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4796 (__v2di)_mm_srav_epi64(__X, __Y),
4797 (__v2di)__W);
4798}
4799
4800static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4801_mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4802{
4803 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4804 (__v2di)_mm_srav_epi64(__X, __Y),
4805 (__v2di)_mm_setzero_si128());
4806}
4807
4808static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4809_mm256_srav_epi64(__m256i __X, __m256i __Y)
4810{
4811 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
4812}
4813
4814static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4815_mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4816{
4817 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4818 (__v4di)_mm256_srav_epi64(__X, __Y),
4819 (__v4di)__W);
4820}
4821
4822static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4823_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
4824{
4825 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4826 (__v4di)_mm256_srav_epi64(__X, __Y),
4827 (__v4di)_mm256_setzero_si256());
4828}
4829
4830static __inline__ __m128i __DEFAULT_FN_ATTRS128
4831_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
4832{
4833 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
4834 (__v4si) __A,
4835 (__v4si) __W);
4836}
4837
4838static __inline__ __m128i __DEFAULT_FN_ATTRS128
4840{
4841 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
4842 (__v4si) __A,
4843 (__v4si) _mm_setzero_si128 ());
4844}
4845
4846
4847static __inline__ __m256i __DEFAULT_FN_ATTRS256
4848_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
4849{
4850 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
4851 (__v8si) __A,
4852 (__v8si) __W);
4853}
4854
4855static __inline__ __m256i __DEFAULT_FN_ATTRS256
4857{
4858 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
4859 (__v8si) __A,
4860 (__v8si) _mm256_setzero_si256 ());
4861}
4862
4863static __inline __m128i __DEFAULT_FN_ATTRS128
4864_mm_load_epi32 (void const *__P)
4865{
4866 return *(const __m128i *) __P;
4867}
4868
4869static __inline__ __m128i __DEFAULT_FN_ATTRS128
4870_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
4871{
4872 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
4873 (__v4si) __W,
4874 (__mmask8)
4875 __U);
4876}
4877
4878static __inline__ __m128i __DEFAULT_FN_ATTRS128
4880{
4881 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
4882 (__v4si)
4884 (__mmask8)
4885 __U);
4886}
4887
4888static __inline __m256i __DEFAULT_FN_ATTRS256
4890{
4891 return *(const __m256i *) __P;
4892}
4893
4894static __inline__ __m256i __DEFAULT_FN_ATTRS256
4895_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
4896{
4897 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
4898 (__v8si) __W,
4899 (__mmask8)
4900 __U);
4901}
4902
4903static __inline__ __m256i __DEFAULT_FN_ATTRS256
4905{
4906 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
4907 (__v8si)
4909 (__mmask8)
4910 __U);
4911}
4912
4913static __inline void __DEFAULT_FN_ATTRS128
4914_mm_store_epi32 (void *__P, __m128i __A)
4915{
4916 *(__m128i *) __P = __A;
4917}
4918
4919static __inline__ void __DEFAULT_FN_ATTRS128
4920_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
4921{
4922 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
4923 (__v4si) __A,
4924 (__mmask8) __U);
4925}
4926
4927static __inline void __DEFAULT_FN_ATTRS256
4928_mm256_store_epi32 (void *__P, __m256i __A)
4929{
4930 *(__m256i *) __P = __A;
4931}
4932
4933static __inline__ void __DEFAULT_FN_ATTRS256
4934_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
4935{
4936 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
4937 (__v8si) __A,
4938 (__mmask8) __U);
4939}
4940
4941static __inline__ __m128i __DEFAULT_FN_ATTRS128
4942_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
4943{
4944 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
4945 (__v2di) __A,
4946 (__v2di) __W);
4947}
4948
4949static __inline__ __m128i __DEFAULT_FN_ATTRS128
4951{
4952 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
4953 (__v2di) __A,
4954 (__v2di) _mm_setzero_si128 ());
4955}
4956
4957static __inline__ __m256i __DEFAULT_FN_ATTRS256
4958_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
4959{
4960 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
4961 (__v4di) __A,
4962 (__v4di) __W);
4963}
4964
4965static __inline__ __m256i __DEFAULT_FN_ATTRS256
4967{
4968 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
4969 (__v4di) __A,
4970 (__v4di) _mm256_setzero_si256 ());
4971}
4972
4973static __inline __m128i __DEFAULT_FN_ATTRS128
4974_mm_load_epi64 (void const *__P)
4975{
4976 return *(const __m128i *) __P;
4977}
4978
4979static __inline__ __m128i __DEFAULT_FN_ATTRS128
4980_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
4981{
4982 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
4983 (__v2di) __W,
4984 (__mmask8)
4985 __U);
4986}
4987
4988static __inline__ __m128i __DEFAULT_FN_ATTRS128
4990{
4991 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
4992 (__v2di)
4994 (__mmask8)
4995 __U);
4996}
4997
4998static __inline __m256i __DEFAULT_FN_ATTRS256
5000{
5001 return *(const __m256i *) __P;
5002}
5003
5004static __inline__ __m256i __DEFAULT_FN_ATTRS256
5005_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5006{
5007 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
5008 (__v4di) __W,
5009 (__mmask8)
5010 __U);
5011}
5012
5013static __inline__ __m256i __DEFAULT_FN_ATTRS256
5015{
5016 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
5017 (__v4di)
5019 (__mmask8)
5020 __U);
5021}
5022
5023static __inline void __DEFAULT_FN_ATTRS128
5024_mm_store_epi64 (void *__P, __m128i __A)
5025{
5026 *(__m128i *) __P = __A;
5027}
5028
5029static __inline__ void __DEFAULT_FN_ATTRS128
5030_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
5031{
5032 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5033 (__v2di) __A,
5034 (__mmask8) __U);
5035}
5036
5037static __inline void __DEFAULT_FN_ATTRS256
5038_mm256_store_epi64 (void *__P, __m256i __A)
5039{
5040 *(__m256i *) __P = __A;
5041}
5042
5043static __inline__ void __DEFAULT_FN_ATTRS256
5044_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
5045{
5046 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5047 (__v4di) __A,
5048 (__mmask8) __U);
5049}
5050
5051static __inline__ __m128d __DEFAULT_FN_ATTRS128
5052_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5053{
5054 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5055 (__v2df)_mm_movedup_pd(__A),
5056 (__v2df)__W);
5057}
5058
5059static __inline__ __m128d __DEFAULT_FN_ATTRS128
5061{
5062 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5063 (__v2df)_mm_movedup_pd(__A),
5064 (__v2df)_mm_setzero_pd());
5065}
5066
5067static __inline__ __m256d __DEFAULT_FN_ATTRS256
5068_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5069{
5070 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5071 (__v4df)_mm256_movedup_pd(__A),
5072 (__v4df)__W);
5073}
5074
5075static __inline__ __m256d __DEFAULT_FN_ATTRS256
5077{
5078 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5079 (__v4df)_mm256_movedup_pd(__A),
5080 (__v4df)_mm256_setzero_pd());
5081}
5082
5083static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
5084_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) {
5085 return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si)_mm_set1_epi32(__A),
5086 (__v4si)__O);
5087}
5088
5089static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
5091 return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si)_mm_set1_epi32(__A),
5092 (__v4si)_mm_setzero_si128());
5093}
5094
5095static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
5096_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) {
5097 return (__m256i)__builtin_ia32_selectd_256(
5098 __M, (__v8si)_mm256_set1_epi32(__A), (__v8si)__O);
5099}
5100
5101static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
5103 return (__m256i)__builtin_ia32_selectd_256(
5104 __M, (__v8si)_mm256_set1_epi32(__A), (__v8si)_mm256_setzero_si256());
5105}
5106
5107static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
5108_mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A) {
5109 return (__m128i) __builtin_ia32_selectq_128(__M,
5110 (__v2di) _mm_set1_epi64x(__A),
5111 (__v2di) __O);
5112}
5113
5114static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
5115_mm_maskz_set1_epi64(__mmask8 __M, long long __A) {
5116 return (__m128i) __builtin_ia32_selectq_128(__M,
5117 (__v2di) _mm_set1_epi64x(__A),
5118 (__v2di) _mm_setzero_si128());
5119}
5120
5121static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
5122_mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A) {
5123 return (__m256i) __builtin_ia32_selectq_256(__M,
5124 (__v4di) _mm256_set1_epi64x(__A),
5125 (__v4di) __O) ;
5126}
5127
5128static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
5130 return (__m256i)__builtin_ia32_selectq_256(
5131 __M, (__v4di)_mm256_set1_epi64x(__A), (__v4di)_mm256_setzero_si256());
5132}
5133
5134#define _mm_fixupimm_pd(A, B, C, imm) \
5135 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5136 (__v2df)(__m128d)(B), \
5137 (__v2di)(__m128i)(C), (int)(imm), \
5138 (__mmask8)-1))
5139
5140#define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
5141 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5142 (__v2df)(__m128d)(B), \
5143 (__v2di)(__m128i)(C), (int)(imm), \
5144 (__mmask8)(U)))
5145
5146#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
5147 ((__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5148 (__v2df)(__m128d)(B), \
5149 (__v2di)(__m128i)(C), \
5150 (int)(imm), (__mmask8)(U)))
5151
5152#define _mm256_fixupimm_pd(A, B, C, imm) \
5153 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5154 (__v4df)(__m256d)(B), \
5155 (__v4di)(__m256i)(C), (int)(imm), \
5156 (__mmask8)-1))
5157
5158#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
5159 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5160 (__v4df)(__m256d)(B), \
5161 (__v4di)(__m256i)(C), (int)(imm), \
5162 (__mmask8)(U)))
5163
5164#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
5165 ((__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5166 (__v4df)(__m256d)(B), \
5167 (__v4di)(__m256i)(C), \
5168 (int)(imm), (__mmask8)(U)))
5169
5170#define _mm_fixupimm_ps(A, B, C, imm) \
5171 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5172 (__v4sf)(__m128)(B), \
5173 (__v4si)(__m128i)(C), (int)(imm), \
5174 (__mmask8)-1))
5175
5176#define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
5177 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5178 (__v4sf)(__m128)(B), \
5179 (__v4si)(__m128i)(C), (int)(imm), \
5180 (__mmask8)(U)))
5181
5182#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
5183 ((__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5184 (__v4sf)(__m128)(B), \
5185 (__v4si)(__m128i)(C), (int)(imm), \
5186 (__mmask8)(U)))
5187
5188#define _mm256_fixupimm_ps(A, B, C, imm) \
5189 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5190 (__v8sf)(__m256)(B), \
5191 (__v8si)(__m256i)(C), (int)(imm), \
5192 (__mmask8)-1))
5193
5194#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
5195 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5196 (__v8sf)(__m256)(B), \
5197 (__v8si)(__m256i)(C), (int)(imm), \
5198 (__mmask8)(U)))
5199
5200#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
5201 ((__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5202 (__v8sf)(__m256)(B), \
5203 (__v8si)(__m256i)(C), (int)(imm), \
5204 (__mmask8)(U)))
5205
5206static __inline__ __m128d __DEFAULT_FN_ATTRS128
5207_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
5208{
5209 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
5210 (__v2df) __W,
5211 (__mmask8) __U);
5212}
5213
5214static __inline__ __m128d __DEFAULT_FN_ATTRS128
5216{
5217 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
5218 (__v2df)
5219 _mm_setzero_pd (),
5220 (__mmask8) __U);
5221}
5222
5223static __inline__ __m256d __DEFAULT_FN_ATTRS256
5224_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
5225{
5226 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
5227 (__v4df) __W,
5228 (__mmask8) __U);
5229}
5230
5231static __inline__ __m256d __DEFAULT_FN_ATTRS256
5233{
5234 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
5235 (__v4df)
5237 (__mmask8) __U);
5238}
5239
5240static __inline__ __m128 __DEFAULT_FN_ATTRS128
5241_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
5242{
5243 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
5244 (__v4sf) __W,
5245 (__mmask8) __U);
5246}
5247
5248static __inline__ __m128 __DEFAULT_FN_ATTRS128
5250{
5251 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
5252 (__v4sf)
5253 _mm_setzero_ps (),
5254 (__mmask8) __U);
5255}
5256
5257static __inline__ __m256 __DEFAULT_FN_ATTRS256
5258_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
5259{
5260 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
5261 (__v8sf) __W,
5262 (__mmask8) __U);
5263}
5264
5265static __inline__ __m256 __DEFAULT_FN_ATTRS256
5267{
5268 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
5269 (__v8sf)
5271 (__mmask8) __U);
5272}
5273
5274static __inline __m128i __DEFAULT_FN_ATTRS128
5276{
5277 struct __loadu_epi64 {
5278 __m128i_u __v;
5279 } __attribute__((__packed__, __may_alias__));
5280 return ((const struct __loadu_epi64*)__P)->__v;
5281}
5282
5283static __inline__ __m128i __DEFAULT_FN_ATTRS128
5284_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5285{
5286 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
5287 (__v2di) __W,
5288 (__mmask8) __U);
5289}
5290
5291static __inline__ __m128i __DEFAULT_FN_ATTRS128
5293{
5294 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
5295 (__v2di)
5297 (__mmask8) __U);
5298}
5299
5300static __inline __m256i __DEFAULT_FN_ATTRS256
5302{
5303 struct __loadu_epi64 {
5304 __m256i_u __v;
5305 } __attribute__((__packed__, __may_alias__));
5306 return ((const struct __loadu_epi64*)__P)->__v;
5307}
5308
5309static __inline__ __m256i __DEFAULT_FN_ATTRS256
5310_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5311{
5312 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
5313 (__v4di) __W,
5314 (__mmask8) __U);
5315}
5316
5317static __inline__ __m256i __DEFAULT_FN_ATTRS256
5319{
5320 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
5321 (__v4di)
5323 (__mmask8) __U);
5324}
5325
5326static __inline __m128i __DEFAULT_FN_ATTRS128
5328{
5329 struct __loadu_epi32 {
5330 __m128i_u __v;
5331 } __attribute__((__packed__, __may_alias__));
5332 return ((const struct __loadu_epi32*)__P)->__v;
5333}
5334
5335static __inline__ __m128i __DEFAULT_FN_ATTRS128
5336_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5337{
5338 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
5339 (__v4si) __W,
5340 (__mmask8) __U);
5341}
5342
5343static __inline__ __m128i __DEFAULT_FN_ATTRS128
5345{
5346 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
5347 (__v4si)
5349 (__mmask8) __U);
5350}
5351
5352static __inline __m256i __DEFAULT_FN_ATTRS256
5354{
5355 struct __loadu_epi32 {
5356 __m256i_u __v;
5357 } __attribute__((__packed__, __may_alias__));
5358 return ((const struct __loadu_epi32*)__P)->__v;
5359}
5360
5361static __inline__ __m256i __DEFAULT_FN_ATTRS256
5362_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5363{
5364 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
5365 (__v8si) __W,
5366 (__mmask8) __U);
5367}
5368
5369static __inline__ __m256i __DEFAULT_FN_ATTRS256
5371{
5372 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
5373 (__v8si)
5375 (__mmask8) __U);
5376}
5377
5378static __inline__ __m128d __DEFAULT_FN_ATTRS128
5379_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
5380{
5381 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
5382 (__v2df) __W,
5383 (__mmask8) __U);
5384}
5385
5386static __inline__ __m128d __DEFAULT_FN_ATTRS128
5388{
5389 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
5390 (__v2df)
5391 _mm_setzero_pd (),
5392 (__mmask8) __U);
5393}
5394
5395static __inline__ __m256d __DEFAULT_FN_ATTRS256
5396_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5397{
5398 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
5399 (__v4df) __W,
5400 (__mmask8) __U);
5401}
5402
5403static __inline__ __m256d __DEFAULT_FN_ATTRS256
5405{
5406 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
5407 (__v4df)
5409 (__mmask8) __U);
5410}
5411
5412static __inline__ __m128 __DEFAULT_FN_ATTRS128
5413_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
5414{
5415 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
5416 (__v4sf) __W,
5417 (__mmask8) __U);
5418}
5419
5420static __inline__ __m128 __DEFAULT_FN_ATTRS128
5422{
5423 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
5424 (__v4sf)
5425 _mm_setzero_ps (),
5426 (__mmask8) __U);
5427}
5428
5429static __inline__ __m256 __DEFAULT_FN_ATTRS256
5430_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
5431{
5432 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
5433 (__v8sf) __W,
5434 (__mmask8) __U);
5435}
5436
5437static __inline__ __m256 __DEFAULT_FN_ATTRS256
5439{
5440 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
5441 (__v8sf)
5443 (__mmask8) __U);
5444}
5445
5446static __inline__ void __DEFAULT_FN_ATTRS128
5447_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
5448{
5449 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
5450 (__v2df) __A,
5451 (__mmask8) __U);
5452}
5453
5454static __inline__ void __DEFAULT_FN_ATTRS256
5455_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
5456{
5457 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
5458 (__v4df) __A,
5459 (__mmask8) __U);
5460}
5461
5462static __inline__ void __DEFAULT_FN_ATTRS128
5463_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
5464{
5465 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
5466 (__v4sf) __A,
5467 (__mmask8) __U);
5468}
5469
5470static __inline__ void __DEFAULT_FN_ATTRS256
5471_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
5472{
5473 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
5474 (__v8sf) __A,
5475 (__mmask8) __U);
5476}
5477
5478static __inline void __DEFAULT_FN_ATTRS128
5479_mm_storeu_epi64 (void *__P, __m128i __A)
5480{
5481 struct __storeu_epi64 {
5482 __m128i_u __v;
5483 } __attribute__((__packed__, __may_alias__));
5484 ((struct __storeu_epi64*)__P)->__v = __A;
5485}
5486
5487static __inline__ void __DEFAULT_FN_ATTRS128
5488_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5489{
5490 __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
5491 (__v2di) __A,
5492 (__mmask8) __U);
5493}
5494
5495static __inline void __DEFAULT_FN_ATTRS256
5496_mm256_storeu_epi64 (void *__P, __m256i __A)
5497{
5498 struct __storeu_epi64 {
5499 __m256i_u __v;
5500 } __attribute__((__packed__, __may_alias__));
5501 ((struct __storeu_epi64*)__P)->__v = __A;
5502}
5503
5504static __inline__ void __DEFAULT_FN_ATTRS256
5505_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5506{
5507 __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
5508 (__v4di) __A,
5509 (__mmask8) __U);
5510}
5511
5512static __inline void __DEFAULT_FN_ATTRS128
5513_mm_storeu_epi32 (void *__P, __m128i __A)
5514{
5515 struct __storeu_epi32 {
5516 __m128i_u __v;
5517 } __attribute__((__packed__, __may_alias__));
5518 ((struct __storeu_epi32*)__P)->__v = __A;
5519}
5520
5521static __inline__ void __DEFAULT_FN_ATTRS128
5522_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5523{
5524 __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
5525 (__v4si) __A,
5526 (__mmask8) __U);
5527}
5528
5529static __inline void __DEFAULT_FN_ATTRS256
5530_mm256_storeu_epi32 (void *__P, __m256i __A)
5531{
5532 struct __storeu_epi32 {
5533 __m256i_u __v;
5534 } __attribute__((__packed__, __may_alias__));
5535 ((struct __storeu_epi32*)__P)->__v = __A;
5536}
5537
5538static __inline__ void __DEFAULT_FN_ATTRS256
5539_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5540{
5541 __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
5542 (__v8si) __A,
5543 (__mmask8) __U);
5544}
5545
5546static __inline__ void __DEFAULT_FN_ATTRS128
5547_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
5548{
5549 __builtin_ia32_storeupd128_mask ((__v2df *) __P,
5550 (__v2df) __A,
5551 (__mmask8) __U);
5552}
5553
5554static __inline__ void __DEFAULT_FN_ATTRS256
5555_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
5556{
5557 __builtin_ia32_storeupd256_mask ((__v4df *) __P,
5558 (__v4df) __A,
5559 (__mmask8) __U);
5560}
5561
5562static __inline__ void __DEFAULT_FN_ATTRS128
5563_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
5564{
5565 __builtin_ia32_storeups128_mask ((__v4sf *) __P,
5566 (__v4sf) __A,
5567 (__mmask8) __U);
5568}
5569
5570static __inline__ void __DEFAULT_FN_ATTRS256
5571_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
5572{
5573 __builtin_ia32_storeups256_mask ((__v8sf *) __P,
5574 (__v8sf) __A,
5575 (__mmask8) __U);
5576}
5577
5578static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
5579_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
5580 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5581 (__v2df)_mm_unpackhi_pd(__A, __B),
5582 (__v2df)__W);
5583}
5584
5585static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
5586_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) {
5587 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5588 (__v2df)_mm_unpackhi_pd(__A, __B),
5589 (__v2df)_mm_setzero_pd());
5590}
5591
5592static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
5593_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
5594 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5595 (__v4df)_mm256_unpackhi_pd(__A, __B),
5596 (__v4df)__W);
5597}
5598
5599static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
5600_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) {
5601 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5602 (__v4df)_mm256_unpackhi_pd(__A, __B),
5603 (__v4df)_mm256_setzero_pd());
5604}
5605
5606static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
5607_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
5608 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5609 (__v4sf)_mm_unpackhi_ps(__A, __B),
5610 (__v4sf)__W);
5611}
5612
5613static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
5614_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) {
5615 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5616 (__v4sf)_mm_unpackhi_ps(__A, __B),
5617 (__v4sf)_mm_setzero_ps());
5618}
5619
5620static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
5621_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
5622 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5623 (__v8sf)_mm256_unpackhi_ps(__A, __B),
5624 (__v8sf)__W);
5625}
5626
5627static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
5628_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) {
5629 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5630 (__v8sf)_mm256_unpackhi_ps(__A, __B),
5631 (__v8sf)_mm256_setzero_ps());
5632}
5633
5634static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
5635_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
5636 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5637 (__v2df)_mm_unpacklo_pd(__A, __B),
5638 (__v2df)__W);
5639}
5640
5641static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
5642_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) {
5643 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5644 (__v2df)_mm_unpacklo_pd(__A, __B),
5645 (__v2df)_mm_setzero_pd());
5646}
5647
5648static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
5649_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
5650 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5651 (__v4df)_mm256_unpacklo_pd(__A, __B),
5652 (__v4df)__W);
5653}
5654
5655static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
5656_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) {
5657 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5658 (__v4df)_mm256_unpacklo_pd(__A, __B),
5659 (__v4df)_mm256_setzero_pd());
5660}
5661
5662static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
5663_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
5664 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5665 (__v4sf)_mm_unpacklo_ps(__A, __B),
5666 (__v4sf)__W);
5667}
5668
5669static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
5670_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) {
5671 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5672 (__v4sf)_mm_unpacklo_ps(__A, __B),
5673 (__v4sf)_mm_setzero_ps());
5674}
5675
5676static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
5677_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
5678 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5679 (__v8sf)_mm256_unpacklo_ps(__A, __B),
5680 (__v8sf)__W);
5681}
5682
5683static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
5684_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) {
5685 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5686 (__v8sf)_mm256_unpacklo_ps(__A, __B),
5687 (__v8sf)_mm256_setzero_ps());
5688}
5689
5690static __inline__ __m128d __DEFAULT_FN_ATTRS128
5691_mm_rcp14_pd (__m128d __A)
5692{
5693 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5694 (__v2df)
5695 _mm_setzero_pd (),
5696 (__mmask8) -1);
5697}
5698
5699static __inline__ __m128d __DEFAULT_FN_ATTRS128
5700_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
5701{
5702 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5703 (__v2df) __W,
5704 (__mmask8) __U);
5705}
5706
5707static __inline__ __m128d __DEFAULT_FN_ATTRS128
5709{
5710 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5711 (__v2df)
5712 _mm_setzero_pd (),
5713 (__mmask8) __U);
5714}
5715
5716static __inline__ __m256d __DEFAULT_FN_ATTRS256
5717_mm256_rcp14_pd (__m256d __A)
5718{
5719 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5720 (__v4df)
5722 (__mmask8) -1);
5723}
5724
5725static __inline__ __m256d __DEFAULT_FN_ATTRS256
5726_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
5727{
5728 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5729 (__v4df) __W,
5730 (__mmask8) __U);
5731}
5732
5733static __inline__ __m256d __DEFAULT_FN_ATTRS256
5735{
5736 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5737 (__v4df)
5739 (__mmask8) __U);
5740}
5741
5742static __inline__ __m128 __DEFAULT_FN_ATTRS128
5743_mm_rcp14_ps (__m128 __A)
5744{
5745 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5746 (__v4sf)
5747 _mm_setzero_ps (),
5748 (__mmask8) -1);
5749}
5750
5751static __inline__ __m128 __DEFAULT_FN_ATTRS128
5752_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
5753{
5754 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5755 (__v4sf) __W,
5756 (__mmask8) __U);
5757}
5758
5759static __inline__ __m128 __DEFAULT_FN_ATTRS128
5761{
5762 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5763 (__v4sf)
5764 _mm_setzero_ps (),
5765 (__mmask8) __U);
5766}
5767
5768static __inline__ __m256 __DEFAULT_FN_ATTRS256
5770{
5771 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
5772 (__v8sf)
5774 (__mmask8) -1);
5775}
5776
5777static __inline__ __m256 __DEFAULT_FN_ATTRS256
5778_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
5779{
5780 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
5781 (__v8sf) __W,
5782 (__mmask8) __U);
5783}
5784
5785static __inline__ __m256 __DEFAULT_FN_ATTRS256
5787{
5788 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
5789 (__v8sf)
5791 (__mmask8) __U);
5792}
5793
5794#define _mm_mask_permute_pd(W, U, X, C) \
5795 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
5796 (__v2df)_mm_permute_pd((X), (C)), \
5797 (__v2df)(__m128d)(W)))
5798
5799#define _mm_maskz_permute_pd(U, X, C) \
5800 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
5801 (__v2df)_mm_permute_pd((X), (C)), \
5802 (__v2df)_mm_setzero_pd()))
5803
5804#define _mm256_mask_permute_pd(W, U, X, C) \
5805 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
5806 (__v4df)_mm256_permute_pd((X), (C)), \
5807 (__v4df)(__m256d)(W)))
5808
5809#define _mm256_maskz_permute_pd(U, X, C) \
5810 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
5811 (__v4df)_mm256_permute_pd((X), (C)), \
5812 (__v4df)_mm256_setzero_pd()))
5813
5814#define _mm_mask_permute_ps(W, U, X, C) \
5815 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
5816 (__v4sf)_mm_permute_ps((X), (C)), \
5817 (__v4sf)(__m128)(W)))
5818
5819#define _mm_maskz_permute_ps(U, X, C) \
5820 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
5821 (__v4sf)_mm_permute_ps((X), (C)), \
5822 (__v4sf)_mm_setzero_ps()))
5823
5824#define _mm256_mask_permute_ps(W, U, X, C) \
5825 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
5826 (__v8sf)_mm256_permute_ps((X), (C)), \
5827 (__v8sf)(__m256)(W)))
5828
5829#define _mm256_maskz_permute_ps(U, X, C) \
5830 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
5831 (__v8sf)_mm256_permute_ps((X), (C)), \
5832 (__v8sf)_mm256_setzero_ps()))
5833
5834static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
5835_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) {
5836 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5837 (__v2df)_mm_permutevar_pd(__A, __C),
5838 (__v2df)__W);
5839}
5840
5841static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
5842_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) {
5843 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5844 (__v2df)_mm_permutevar_pd(__A, __C),
5845 (__v2df)_mm_setzero_pd());
5846}
5847
5848static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
5849_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) {
5850 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5851 (__v4df)_mm256_permutevar_pd(__A, __C),
5852 (__v4df)__W);
5853}
5854
5855static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
5856_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) {
5857 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5858 (__v4df)_mm256_permutevar_pd(__A, __C),
5859 (__v4df)_mm256_setzero_pd());
5860}
5861
5862static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
5863_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) {
5864 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5865 (__v4sf)_mm_permutevar_ps(__A, __C),
5866 (__v4sf)__W);
5867}
5868
5869static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
5870_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) {
5871 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5872 (__v4sf)_mm_permutevar_ps(__A, __C),
5873 (__v4sf)_mm_setzero_ps());
5874}
5875
5876static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
5877_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) {
5878 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5879 (__v8sf)_mm256_permutevar_ps(__A, __C),
5880 (__v8sf)__W);
5881}
5882
5883static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
5884_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) {
5885 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5886 (__v8sf)_mm256_permutevar_ps(__A, __C),
5887 (__v8sf)_mm256_setzero_ps());
5888}
5889
5890static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5891_mm_test_epi32_mask (__m128i __A, __m128i __B)
5892{
5894}
5895
5896static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5897_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5898{
5899 return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B),
5901}
5902
5903static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5904_mm256_test_epi32_mask (__m256i __A, __m256i __B)
5905{
5906 return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B),
5908}
5909
5910static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5911_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5912{
5913 return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
5915}
5916
5917static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5918_mm_test_epi64_mask (__m128i __A, __m128i __B)
5919{
5921}
5922
5923static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5924_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5925{
5926 return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B),
5928}
5929
5930static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5931_mm256_test_epi64_mask (__m256i __A, __m256i __B)
5932{
5933 return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B),
5935}
5936
5937static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5938_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5939{
5940 return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
5942}
5943
5944static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5945_mm_testn_epi32_mask (__m128i __A, __m128i __B)
5946{
5948}
5949
5950static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5951_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5952{
5953 return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B),
5955}
5956
5957static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5958_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
5959{
5960 return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B),
5962}
5963
5964static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5965_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5966{
5967 return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
5969}
5970
5971static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5972_mm_testn_epi64_mask (__m128i __A, __m128i __B)
5973{
5975}
5976
5977static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5978_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5979{
5980 return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B),
5982}
5983
5984static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5985_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
5986{
5987 return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B),
5989}
5990
5991static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5992_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5993{
5994 return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
5996}
5997
5998static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
5999_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
6000 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6001 (__v4si)_mm_unpackhi_epi32(__A, __B),
6002 (__v4si)__W);
6003}
6004
6005static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6006_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
6007 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6008 (__v4si)_mm_unpackhi_epi32(__A, __B),
6009 (__v4si)_mm_setzero_si128());
6010}
6011
6012static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6013_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A,
6014 __m256i __B) {
6015 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6016 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6017 (__v8si)__W);
6018}
6019
6020static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6021_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
6022 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6023 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6024 (__v8si)_mm256_setzero_si256());
6025}
6026
6027static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6028_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
6029 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6030 (__v2di)_mm_unpackhi_epi64(__A, __B),
6031 (__v2di)__W);
6032}
6033
6034static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6035_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
6036 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6037 (__v2di)_mm_unpackhi_epi64(__A, __B),
6038 (__v2di)_mm_setzero_si128());
6039}
6040
6041static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6042_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A,
6043 __m256i __B) {
6044 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6045 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6046 (__v4di)__W);
6047}
6048
6049static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6050_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
6051 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6052 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6053 (__v4di)_mm256_setzero_si256());
6054}
6055
6056static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6057_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
6058 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6059 (__v4si)_mm_unpacklo_epi32(__A, __B),
6060 (__v4si)__W);
6061}
6062
6063static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6064_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
6065 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6066 (__v4si)_mm_unpacklo_epi32(__A, __B),
6067 (__v4si)_mm_setzero_si128());
6068}
6069
6070static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6071_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A,
6072 __m256i __B) {
6073 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6074 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6075 (__v8si)__W);
6076}
6077
6078static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6079_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
6080 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6081 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6082 (__v8si)_mm256_setzero_si256());
6083}
6084
6085static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6086_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
6087 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6088 (__v2di)_mm_unpacklo_epi64(__A, __B),
6089 (__v2di)__W);
6090}
6091
6092static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6093_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
6094 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6095 (__v2di)_mm_unpacklo_epi64(__A, __B),
6096 (__v2di)_mm_setzero_si128());
6097}
6098
6099static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6100_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A,
6101 __m256i __B) {
6102 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6103 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6104 (__v4di)__W);
6105}
6106
6107static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6108_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
6109 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6110 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6111 (__v4di)_mm256_setzero_si256());
6112}
6113
6114static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6115_mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
6116 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6117 (__v4si)_mm_sra_epi32(__A, __B),
6118 (__v4si)__W);
6119}
6120
6121static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6122_mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
6123 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6124 (__v4si)_mm_sra_epi32(__A, __B),
6125 (__v4si)_mm_setzero_si128());
6126}
6127
6128static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6129_mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
6130 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6131 (__v8si)_mm256_sra_epi32(__A, __B),
6132 (__v8si)__W);
6133}
6134
6135static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6136_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) {
6137 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6138 (__v8si)_mm256_sra_epi32(__A, __B),
6139 (__v8si)_mm256_setzero_si256());
6140}
6141
6142static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6143_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) {
6144 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6145 (__v4si)_mm_srai_epi32(__A, (int)__B),
6146 (__v4si)__W);
6147}
6148
6149static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6150_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B) {
6151 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6152 (__v4si)_mm_srai_epi32(__A, (int)__B),
6153 (__v4si)_mm_setzero_si128());
6154}
6155
6156static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6157_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A,
6158 unsigned int __B) {
6159 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6160 (__v8si)_mm256_srai_epi32(__A, (int)__B),
6161 (__v8si)__W);
6162}
6163
6164static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6165_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B) {
6166 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6167 (__v8si)_mm256_srai_epi32(__A, (int)__B),
6168 (__v8si)_mm256_setzero_si256());
6169}
6170
6171static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6172_mm_sra_epi64(__m128i __A, __m128i __B) {
6173 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
6174}
6175
6176static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6177_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
6178 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6179 (__v2di)_mm_sra_epi64(__A, __B), \
6180 (__v2di)__W);
6181}
6182
6183static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6184_mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
6185 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6186 (__v2di)_mm_sra_epi64(__A, __B), \
6187 (__v2di)_mm_setzero_si128());
6188}
6189
6190static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6191_mm256_sra_epi64(__m256i __A, __m128i __B) {
6192 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
6193}
6194
6195static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6196_mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
6197 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6198 (__v4di)_mm256_sra_epi64(__A, __B), \
6199 (__v4di)__W);
6200}
6201
6202static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6203_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) {
6204 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6205 (__v4di)_mm256_sra_epi64(__A, __B), \
6206 (__v4di)_mm256_setzero_si256());
6207}
6208
6209static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6210_mm_srai_epi64(__m128i __A, unsigned int __imm) {
6211 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, (int)__imm);
6212}
6213
6215 __m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm) {
6216 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6217 (__v2di)_mm_srai_epi64(__A, __imm), \
6218 (__v2di)__W);
6219}
6220
6221static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6222_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm) {
6223 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6224 (__v2di)_mm_srai_epi64(__A, __imm), \
6225 (__v2di)_mm_setzero_si128());
6226}
6227
6228static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6229_mm256_srai_epi64(__m256i __A, unsigned int __imm) {
6230 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, (int)__imm);
6231}
6232
6233static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6234_mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A,
6235 unsigned int __imm) {
6236 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6237 (__v4di)_mm256_srai_epi64(__A, __imm), \
6238 (__v4di)__W);
6239}
6240
6241static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6242_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm) {
6243 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6244 (__v4di)_mm256_srai_epi64(__A, __imm), \
6245 (__v4di)_mm256_setzero_si256());
6246}
6247
6248#define _mm_ternarylogic_epi32(A, B, C, imm) \
6249 ((__m128i)__builtin_ia32_pternlogd128_mask( \
6250 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \
6251 (unsigned char)(imm), (__mmask8)-1))
6252
6253#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \
6254 ((__m128i)__builtin_ia32_pternlogd128_mask( \
6255 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \
6256 (unsigned char)(imm), (__mmask8)(U)))
6257
6258#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6259 ((__m128i)__builtin_ia32_pternlogd128_maskz( \
6260 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \
6261 (unsigned char)(imm), (__mmask8)(U)))
6262
6263#define _mm256_ternarylogic_epi32(A, B, C, imm) \
6264 ((__m256i)__builtin_ia32_pternlogd256_mask( \
6265 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \
6266 (unsigned char)(imm), (__mmask8)-1))
6267
6268#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \
6269 ((__m256i)__builtin_ia32_pternlogd256_mask( \
6270 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \
6271 (unsigned char)(imm), (__mmask8)(U)))
6272
6273#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6274 ((__m256i)__builtin_ia32_pternlogd256_maskz( \
6275 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \
6276 (unsigned char)(imm), (__mmask8)(U)))
6277
6278#define _mm_ternarylogic_epi64(A, B, C, imm) \
6279 ((__m128i)__builtin_ia32_pternlogq128_mask( \
6280 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \
6281 (unsigned char)(imm), (__mmask8)-1))
6282
6283#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \
6284 ((__m128i)__builtin_ia32_pternlogq128_mask( \
6285 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \
6286 (unsigned char)(imm), (__mmask8)(U)))
6287
6288#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6289 ((__m128i)__builtin_ia32_pternlogq128_maskz( \
6290 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \
6291 (unsigned char)(imm), (__mmask8)(U)))
6292
6293#define _mm256_ternarylogic_epi64(A, B, C, imm) \
6294 ((__m256i)__builtin_ia32_pternlogq256_mask( \
6295 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \
6296 (unsigned char)(imm), (__mmask8)-1))
6297
6298#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \
6299 ((__m256i)__builtin_ia32_pternlogq256_mask( \
6300 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \
6301 (unsigned char)(imm), (__mmask8)(U)))
6302
6303#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6304 ((__m256i)__builtin_ia32_pternlogq256_maskz( \
6305 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \
6306 (unsigned char)(imm), (__mmask8)(U)))
6307
6308#define _mm256_shuffle_f32x4(A, B, imm) \
6309 ((__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
6310 (__v8sf)(__m256)(B), (int)(imm)))
6311
6312#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \
6313 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6314 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6315 (__v8sf)(__m256)(W)))
6316
6317#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \
6318 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6319 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6320 (__v8sf)_mm256_setzero_ps()))
6321
6322#define _mm256_shuffle_f64x2(A, B, imm) \
6323 ((__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
6324 (__v4df)(__m256d)(B), (int)(imm)))
6325
6326#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \
6327 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6328 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6329 (__v4df)(__m256d)(W)))
6330
6331#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \
6332 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6333 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6334 (__v4df)_mm256_setzero_pd()))
6335
6336#define _mm256_shuffle_i32x4(A, B, imm) \
6337 ((__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
6338 (__v8si)(__m256i)(B), (int)(imm)))
6339
6340#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \
6341 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6342 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6343 (__v8si)(__m256i)(W)))
6344
6345#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \
6346 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6347 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6348 (__v8si)_mm256_setzero_si256()))
6349
6350#define _mm256_shuffle_i64x2(A, B, imm) \
6351 ((__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
6352 (__v4di)(__m256i)(B), (int)(imm)))
6353
6354#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \
6355 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6356 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6357 (__v4di)(__m256i)(W)))
6358
6359
6360#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \
6361 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6362 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6363 (__v4di)_mm256_setzero_si256()))
6364
6365#define _mm_mask_shuffle_pd(W, U, A, B, M) \
6366 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6367 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6368 (__v2df)(__m128d)(W)))
6369
6370#define _mm_maskz_shuffle_pd(U, A, B, M) \
6371 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6372 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6373 (__v2df)_mm_setzero_pd()))
6374
6375#define _mm256_mask_shuffle_pd(W, U, A, B, M) \
6376 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6377 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6378 (__v4df)(__m256d)(W)))
6379
6380#define _mm256_maskz_shuffle_pd(U, A, B, M) \
6381 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6382 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6383 (__v4df)_mm256_setzero_pd()))
6384
6385#define _mm_mask_shuffle_ps(W, U, A, B, M) \
6386 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6387 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6388 (__v4sf)(__m128)(W)))
6389
6390#define _mm_maskz_shuffle_ps(U, A, B, M) \
6391 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6392 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6393 (__v4sf)_mm_setzero_ps()))
6394
6395#define _mm256_mask_shuffle_ps(W, U, A, B, M) \
6396 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6397 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6398 (__v8sf)(__m256)(W)))
6399
6400#define _mm256_maskz_shuffle_ps(U, A, B, M) \
6401 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6402 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6403 (__v8sf)_mm256_setzero_ps()))
6404
6405static __inline__ __m128d __DEFAULT_FN_ATTRS128
6406_mm_rsqrt14_pd (__m128d __A)
6407{
6408 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6409 (__v2df)
6410 _mm_setzero_pd (),
6411 (__mmask8) -1);
6412}
6413
6414static __inline__ __m128d __DEFAULT_FN_ATTRS128
6415_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
6416{
6417 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6418 (__v2df) __W,
6419 (__mmask8) __U);
6420}
6421
6422static __inline__ __m128d __DEFAULT_FN_ATTRS128
6424{
6425 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6426 (__v2df)
6427 _mm_setzero_pd (),
6428 (__mmask8) __U);
6429}
6430
6431static __inline__ __m256d __DEFAULT_FN_ATTRS256
6433{
6434 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6435 (__v4df)
6437 (__mmask8) -1);
6438}
6439
6440static __inline__ __m256d __DEFAULT_FN_ATTRS256
6441_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
6442{
6443 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6444 (__v4df) __W,
6445 (__mmask8) __U);
6446}
6447
6448static __inline__ __m256d __DEFAULT_FN_ATTRS256
6450{
6451 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6452 (__v4df)
6454 (__mmask8) __U);
6455}
6456
6457static __inline__ __m128 __DEFAULT_FN_ATTRS128
6458_mm_rsqrt14_ps (__m128 __A)
6459{
6460 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6461 (__v4sf)
6462 _mm_setzero_ps (),
6463 (__mmask8) -1);
6464}
6465
6466static __inline__ __m128 __DEFAULT_FN_ATTRS128
6467_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6468{
6469 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6470 (__v4sf) __W,
6471 (__mmask8) __U);
6472}
6473
6474static __inline__ __m128 __DEFAULT_FN_ATTRS128
6476{
6477 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6478 (__v4sf)
6479 _mm_setzero_ps (),
6480 (__mmask8) __U);
6481}
6482
6483static __inline__ __m256 __DEFAULT_FN_ATTRS256
6485{
6486 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6487 (__v8sf)
6489 (__mmask8) -1);
6490}
6491
6492static __inline__ __m256 __DEFAULT_FN_ATTRS256
6493_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6494{
6495 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6496 (__v8sf) __W,
6497 (__mmask8) __U);
6498}
6499
6500static __inline__ __m256 __DEFAULT_FN_ATTRS256
6502{
6503 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6504 (__v8sf)
6506 (__mmask8) __U);
6507}
6508
6509static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
6511 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6512 0, 1, 2, 3, 0, 1, 2, 3);
6513}
6514
6515static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
6516_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) {
6517 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6518 (__v8sf)_mm256_broadcast_f32x4(__A),
6519 (__v8sf)__O);
6520}
6521
6522static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
6524 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6525 (__v8sf)_mm256_broadcast_f32x4(__A),
6526 (__v8sf)_mm256_setzero_ps());
6527}
6528
6529static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6531 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6532 0, 1, 2, 3, 0, 1, 2, 3);
6533}
6534
6535static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6536_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) {
6537 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6538 (__v8si)_mm256_broadcast_i32x4(__A),
6539 (__v8si)__O);
6540}
6541
6542static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6544 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6545 (__v8si)_mm256_broadcast_i32x4(__A),
6546 (__v8si)_mm256_setzero_si256());
6547}
6548
6549static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
6550_mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A) {
6551 return (__m256d)__builtin_ia32_selectpd_256(__M,
6552 (__v4df) _mm256_broadcastsd_pd(__A),
6553 (__v4df) __O);
6554}
6555
6556static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
6558 return (__m256d)__builtin_ia32_selectpd_256(__M,
6559 (__v4df) _mm256_broadcastsd_pd(__A),
6560 (__v4df) _mm256_setzero_pd());
6561}
6562
6563static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
6564_mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A) {
6565 return (__m128)__builtin_ia32_selectps_128(__M,
6566 (__v4sf) _mm_broadcastss_ps(__A),
6567 (__v4sf) __O);
6568}
6569
6570static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
6572 return (__m128)__builtin_ia32_selectps_128(__M,
6573 (__v4sf) _mm_broadcastss_ps(__A),
6574 (__v4sf) _mm_setzero_ps());
6575}
6576
6577static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
6578_mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A) {
6579 return (__m256)__builtin_ia32_selectps_256(__M,
6580 (__v8sf) _mm256_broadcastss_ps(__A),
6581 (__v8sf) __O);
6582}
6583
6584static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
6586 return (__m256)__builtin_ia32_selectps_256(__M,
6587 (__v8sf) _mm256_broadcastss_ps(__A),
6588 (__v8sf) _mm256_setzero_ps());
6589}
6590
6591static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6592_mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A) {
6593 return (__m128i)__builtin_ia32_selectd_128(__M,
6594 (__v4si) _mm_broadcastd_epi32(__A),
6595 (__v4si) __O);
6596}
6597
6598static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6600 return (__m128i)__builtin_ia32_selectd_128(__M,
6601 (__v4si) _mm_broadcastd_epi32(__A),
6602 (__v4si) _mm_setzero_si128());
6603}
6604
6605static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6606_mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A) {
6607 return (__m256i)__builtin_ia32_selectd_256(__M,
6608 (__v8si) _mm256_broadcastd_epi32(__A),
6609 (__v8si) __O);
6610}
6611
6612static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6614 return (__m256i)__builtin_ia32_selectd_256(__M,
6615 (__v8si) _mm256_broadcastd_epi32(__A),
6616 (__v8si) _mm256_setzero_si256());
6617}
6618
6619static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6620_mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A) {
6621 return (__m128i)__builtin_ia32_selectq_128(__M,
6622 (__v2di) _mm_broadcastq_epi64(__A),
6623 (__v2di) __O);
6624}
6625
6626static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6628 return (__m128i)__builtin_ia32_selectq_128(__M,
6629 (__v2di) _mm_broadcastq_epi64(__A),
6630 (__v2di) _mm_setzero_si128());
6631}
6632
6633static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6634_mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A) {
6635 return (__m256i)__builtin_ia32_selectq_256(__M,
6636 (__v4di) _mm256_broadcastq_epi64(__A),
6637 (__v4di) __O);
6638}
6639
6640static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6642 return (__m256i)__builtin_ia32_selectq_256(__M,
6643 (__v4di) _mm256_broadcastq_epi64(__A),
6644 (__v4di) _mm256_setzero_si256());
6645}
6646
6647static __inline__ __m128i __DEFAULT_FN_ATTRS128
6649{
6650 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6651 (__v16qi)_mm_undefined_si128(),
6652 (__mmask8) -1);
6653}
6654
6655static __inline__ __m128i __DEFAULT_FN_ATTRS128
6656_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6657{
6658 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6659 (__v16qi) __O, __M);
6660}
6661
6662static __inline__ __m128i __DEFAULT_FN_ATTRS128
6664{
6665 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6666 (__v16qi) _mm_setzero_si128 (),
6667 __M);
6668}
6669
6670static __inline__ void __DEFAULT_FN_ATTRS128
6672{
6673 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
6674}
6675
6676static __inline__ __m128i __DEFAULT_FN_ATTRS256
6678{
6679 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6680 (__v16qi)_mm_undefined_si128(),
6681 (__mmask8) -1);
6682}
6683
6684static __inline__ __m128i __DEFAULT_FN_ATTRS256
6685_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
6686{
6687 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6688 (__v16qi) __O, __M);
6689}
6690
6691static __inline__ __m128i __DEFAULT_FN_ATTRS256
6693{
6694 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6695 (__v16qi) _mm_setzero_si128 (),
6696 __M);
6697}
6698
6699static __inline__ void __DEFAULT_FN_ATTRS256
6701{
6702 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
6703}
6704
6705static __inline__ __m128i __DEFAULT_FN_ATTRS128
6707{
6708 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
6709 (__v8hi)_mm_setzero_si128 (),
6710 (__mmask8) -1);
6711}
6712
6713static __inline__ __m128i __DEFAULT_FN_ATTRS128
6714_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
6715{
6716 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
6717 (__v8hi)__O,
6718 __M);
6719}
6720
6721static __inline__ __m128i __DEFAULT_FN_ATTRS128
6723{
6724 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
6725 (__v8hi) _mm_setzero_si128 (),
6726 __M);
6727}
6728
6729static __inline__ void __DEFAULT_FN_ATTRS128
6731{
6732 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
6733}
6734
6735static __inline__ __m128i __DEFAULT_FN_ATTRS256
6737{
6738 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
6739 (__v8hi)_mm_undefined_si128(),
6740 (__mmask8) -1);
6741}
6742
6743static __inline__ __m128i __DEFAULT_FN_ATTRS256
6744_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
6745{
6746 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
6747 (__v8hi) __O, __M);
6748}
6749
6750static __inline__ __m128i __DEFAULT_FN_ATTRS256
6752{
6753 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
6754 (__v8hi) _mm_setzero_si128 (),
6755 __M);
6756}
6757
6758static __inline__ void __DEFAULT_FN_ATTRS256
6760{
6761 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
6762}
6763
6764static __inline__ __m128i __DEFAULT_FN_ATTRS128
6766{
6767 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
6768 (__v16qi)_mm_undefined_si128(),
6769 (__mmask8) -1);
6770}
6771
6772static __inline__ __m128i __DEFAULT_FN_ATTRS128
6773_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6774{
6775 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
6776 (__v16qi) __O, __M);
6777}
6778
6779static __inline__ __m128i __DEFAULT_FN_ATTRS128
6781{
6782 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
6783 (__v16qi) _mm_setzero_si128 (),
6784 __M);
6785}
6786
6787static __inline__ void __DEFAULT_FN_ATTRS128
6789{
6790 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
6791}
6792
6793static __inline__ __m128i __DEFAULT_FN_ATTRS256
6795{
6796 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
6797 (__v16qi)_mm_undefined_si128(),
6798 (__mmask8) -1);
6799}
6800
6801static __inline__ __m128i __DEFAULT_FN_ATTRS256
6802_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
6803{
6804 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
6805 (__v16qi) __O, __M);
6806}
6807
6808static __inline__ __m128i __DEFAULT_FN_ATTRS256
6810{
6811 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
6812 (__v16qi) _mm_setzero_si128 (),
6813 __M);
6814}
6815
6816static __inline__ void __DEFAULT_FN_ATTRS256
6818{
6819 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
6820}
6821
6822static __inline__ __m128i __DEFAULT_FN_ATTRS128
6824{
6825 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
6826 (__v4si)_mm_undefined_si128(),
6827 (__mmask8) -1);
6828}
6829
6830static __inline__ __m128i __DEFAULT_FN_ATTRS128
6831_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
6832{
6833 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
6834 (__v4si) __O, __M);
6835}
6836
6837static __inline__ __m128i __DEFAULT_FN_ATTRS128
6839{
6840 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
6841 (__v4si) _mm_setzero_si128 (),
6842 __M);
6843}
6844
6845static __inline__ void __DEFAULT_FN_ATTRS128
6847{
6848 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
6849}
6850
6851static __inline__ __m128i __DEFAULT_FN_ATTRS256
6853{
6854 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
6855 (__v4si)_mm_undefined_si128(),
6856 (__mmask8) -1);
6857}
6858
6859static __inline__ __m128i __DEFAULT_FN_ATTRS256
6860_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
6861{
6862 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
6863 (__v4si)__O,
6864 __M);
6865}
6866
6867static __inline__ __m128i __DEFAULT_FN_ATTRS256
6869{
6870 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
6871 (__v4si) _mm_setzero_si128 (),
6872 __M);
6873}
6874
6875static __inline__ void __DEFAULT_FN_ATTRS256
6877{
6878 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
6879}
6880
6881static __inline__ __m128i __DEFAULT_FN_ATTRS128
6883{
6884 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
6885 (__v8hi)_mm_undefined_si128(),
6886 (__mmask8) -1);
6887}
6888
6889static __inline__ __m128i __DEFAULT_FN_ATTRS128
6890_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
6891{
6892 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
6893 (__v8hi) __O, __M);
6894}
6895
6896static __inline__ __m128i __DEFAULT_FN_ATTRS128
6898{
6899 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
6900 (__v8hi) _mm_setzero_si128 (),
6901 __M);
6902}
6903
6904static __inline__ void __DEFAULT_FN_ATTRS128
6906{
6907 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
6908}
6909
6910static __inline__ __m128i __DEFAULT_FN_ATTRS256
6912{
6913 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
6914 (__v8hi)_mm_undefined_si128(),
6915 (__mmask8) -1);
6916}
6917
6918static __inline__ __m128i __DEFAULT_FN_ATTRS256
6919_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
6920{
6921 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
6922 (__v8hi) __O, __M);
6923}
6924
6925static __inline__ __m128i __DEFAULT_FN_ATTRS256
6927{
6928 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
6929 (__v8hi) _mm_setzero_si128 (),
6930 __M);
6931}
6932
6933static __inline__ void __DEFAULT_FN_ATTRS256
6935{
6936 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
6937}
6938
6939static __inline__ __m128i __DEFAULT_FN_ATTRS128
6941{
6942 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
6943 (__v16qi)_mm_undefined_si128(),
6944 (__mmask8) -1);
6945}
6946
6947static __inline__ __m128i __DEFAULT_FN_ATTRS128
6948_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6949{
6950 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
6951 (__v16qi) __O,
6952 __M);
6953}
6954
6955static __inline__ __m128i __DEFAULT_FN_ATTRS128
6957{
6958 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
6959 (__v16qi) _mm_setzero_si128 (),
6960 __M);
6961}
6962
6963static __inline__ void __DEFAULT_FN_ATTRS128
6965{
6966 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
6967}
6968
6969static __inline__ __m128i __DEFAULT_FN_ATTRS256
6971{
6972 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
6973 (__v16qi)_mm_undefined_si128(),
6974 (__mmask8) -1);
6975}
6976
6977static __inline__ __m128i __DEFAULT_FN_ATTRS256
6978_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
6979{
6980 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
6981 (__v16qi) __O,
6982 __M);
6983}
6984
6985static __inline__ __m128i __DEFAULT_FN_ATTRS256
6987{
6988 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
6989 (__v16qi) _mm_setzero_si128 (),
6990 __M);
6991}
6992
6993static __inline__ void __DEFAULT_FN_ATTRS256
6995{
6996 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
6997}
6998
6999static __inline__ __m128i __DEFAULT_FN_ATTRS128
7001{
7002 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7003 (__v8hi)_mm_undefined_si128(),
7004 (__mmask8) -1);
7005}
7006
7007static __inline__ __m128i __DEFAULT_FN_ATTRS128
7008_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7009{
7010 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7011 (__v8hi) __O, __M);
7012}
7013
7014static __inline__ __m128i __DEFAULT_FN_ATTRS128
7016{
7017 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7018 (__v8hi) _mm_setzero_si128 (),
7019 __M);
7020}
7021
7022static __inline__ void __DEFAULT_FN_ATTRS128
7024{
7025 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7026}
7027
7028static __inline__ __m128i __DEFAULT_FN_ATTRS256
7030{
7031 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7032 (__v8hi) _mm_undefined_si128(),
7033 (__mmask8) -1);
7034}
7035
7036static __inline__ __m128i __DEFAULT_FN_ATTRS256
7037_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7038{
7039 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7040 (__v8hi) __O, __M);
7041}
7042
7043static __inline__ __m128i __DEFAULT_FN_ATTRS256
7045{
7046 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7047 (__v8hi) _mm_setzero_si128 (),
7048 __M);
7049}
7050
7051static __inline__ void __DEFAULT_FN_ATTRS256
7053{
7054 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7055}
7056
7057static __inline__ __m128i __DEFAULT_FN_ATTRS128
7059{
7060 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7061 (__v16qi)_mm_undefined_si128(),
7062 (__mmask8) -1);
7063}
7064
7065static __inline__ __m128i __DEFAULT_FN_ATTRS128
7066_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7067{
7068 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7069 (__v16qi) __O,
7070 __M);
7071}
7072
7073static __inline__ __m128i __DEFAULT_FN_ATTRS128
7075{
7076 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7077 (__v16qi) _mm_setzero_si128 (),
7078 __M);
7079}
7080
7081static __inline__ void __DEFAULT_FN_ATTRS128
7083{
7084 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7085}
7086
7087static __inline__ __m128i __DEFAULT_FN_ATTRS256
7089{
7090 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7091 (__v16qi)_mm_undefined_si128(),
7092 (__mmask8) -1);
7093}
7094
7095static __inline__ __m128i __DEFAULT_FN_ATTRS256
7096_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7097{
7098 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7099 (__v16qi) __O,
7100 __M);
7101}
7102
7103static __inline__ __m128i __DEFAULT_FN_ATTRS256
7105{
7106 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7107 (__v16qi) _mm_setzero_si128 (),
7108 __M);
7109}
7110
7111static __inline__ void __DEFAULT_FN_ATTRS256
7113{
7114 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7115}
7116
7117static __inline__ __m128i __DEFAULT_FN_ATTRS128
7119{
7120 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7121 (__v4si)_mm_undefined_si128(),
7122 (__mmask8) -1);
7123}
7124
7125static __inline__ __m128i __DEFAULT_FN_ATTRS128
7126_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7127{
7128 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7129 (__v4si) __O, __M);
7130}
7131
7132static __inline__ __m128i __DEFAULT_FN_ATTRS128
7134{
7135 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7136 (__v4si) _mm_setzero_si128 (),
7137 __M);
7138}
7139
7140static __inline__ void __DEFAULT_FN_ATTRS128
7142{
7143 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7144}
7145
7146static __inline__ __m128i __DEFAULT_FN_ATTRS256
7148{
7149 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7150 (__v4si)_mm_undefined_si128(),
7151 (__mmask8) -1);
7152}
7153
7154static __inline__ __m128i __DEFAULT_FN_ATTRS256
7155_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7156{
7157 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7158 (__v4si) __O, __M);
7159}
7160
7161static __inline__ __m128i __DEFAULT_FN_ATTRS256
7163{
7164 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7165 (__v4si) _mm_setzero_si128 (),
7166 __M);
7167}
7168
7169static __inline__ void __DEFAULT_FN_ATTRS256
7171{
7172 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7173}
7174
7175static __inline__ __m128i __DEFAULT_FN_ATTRS128
7177{
7178 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7179 (__v8hi)_mm_undefined_si128(),
7180 (__mmask8) -1);
7181}
7182
7183static __inline__ __m128i __DEFAULT_FN_ATTRS128
7184_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7185{
7186 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7187 (__v8hi) __O, __M);
7188}
7189
7190static __inline__ __m128i __DEFAULT_FN_ATTRS128
7192{
7193 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7194 (__v8hi) _mm_setzero_si128 (),
7195 __M);
7196}
7197
7198static __inline__ void __DEFAULT_FN_ATTRS128
7200{
7201 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7202}
7203
7204static __inline__ __m128i __DEFAULT_FN_ATTRS256
7206{
7207 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7208 (__v8hi)_mm_undefined_si128(),
7209 (__mmask8) -1);
7210}
7211
7212static __inline__ __m128i __DEFAULT_FN_ATTRS256
7213_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7214{
7215 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7216 (__v8hi) __O, __M);
7217}
7218
7219static __inline__ __m128i __DEFAULT_FN_ATTRS256
7221{
7222 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7223 (__v8hi) _mm_setzero_si128 (),
7224 __M);
7225}
7226
7227static __inline__ void __DEFAULT_FN_ATTRS256
7229{
7230 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7231}
7232
7233static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
7234_mm_cvtepi32_epi8(__m128i __A) {
7235 return (__m128i)__builtin_shufflevector(
7236 __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7237 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7238}
7239
7240static __inline__ __m128i __DEFAULT_FN_ATTRS128
7241_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7242{
7243 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7244 (__v16qi) __O, __M);
7245}
7246
7247static __inline__ __m128i __DEFAULT_FN_ATTRS128
7249{
7250 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7251 (__v16qi)
7253 __M);
7254}
7255
7256static __inline__ void __DEFAULT_FN_ATTRS128
7258{
7259 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7260}
7261
7262static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
7264 return (__m128i)__builtin_shufflevector(
7265 __builtin_convertvector((__v8si)__A, __v8qi),
7266 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
7267 12, 13, 14, 15);
7268}
7269
7270static __inline__ __m128i __DEFAULT_FN_ATTRS256
7271_mm256_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A) {
7272 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7273 (__v16qi) __O, __M);
7274}
7275
7276static __inline__ __m128i __DEFAULT_FN_ATTRS256
7278{
7279 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7280 (__v16qi) _mm_setzero_si128 (),
7281 __M);
7282}
7283
7284static __inline__ void __DEFAULT_FN_ATTRS256
7286{
7287 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7288}
7289
7290static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
7292 return (__m128i)__builtin_shufflevector(
7293 __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7294 2, 3, 4, 5, 6, 7);
7295}
7296
7297static __inline__ __m128i __DEFAULT_FN_ATTRS128
7298_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7299{
7300 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7301 (__v8hi) __O, __M);
7302}
7303
7304static __inline__ __m128i __DEFAULT_FN_ATTRS128
7306{
7307 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7308 (__v8hi) _mm_setzero_si128 (),
7309 __M);
7310}
7311
7312static __inline__ void __DEFAULT_FN_ATTRS128
7314{
7315 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7316}
7317
7318static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
7320 return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
7321}
7322
7323static __inline__ __m128i __DEFAULT_FN_ATTRS256
7324_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7325{
7326 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7327 (__v8hi) __O, __M);
7328}
7329
7330static __inline__ __m128i __DEFAULT_FN_ATTRS256
7332{
7333 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7334 (__v8hi) _mm_setzero_si128 (),
7335 __M);
7336}
7337
7338static __inline__ void __DEFAULT_FN_ATTRS256
7340{
7341 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7342}
7343
7344static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
7345_mm_cvtepi64_epi8(__m128i __A) {
7346 return (__m128i)__builtin_shufflevector(
7347 __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
7348 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
7349}
7350
7351static __inline__ __m128i __DEFAULT_FN_ATTRS128
7352_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7353{
7354 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7355 (__v16qi) __O, __M);
7356}
7357
7358static __inline__ __m128i __DEFAULT_FN_ATTRS128
7360{
7361 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7362 (__v16qi) _mm_setzero_si128 (),
7363 __M);
7364}
7365
7366static __inline__ void __DEFAULT_FN_ATTRS128
7368{
7369 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7370}
7371
7372static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
7374 return (__m128i)__builtin_shufflevector(
7375 __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7376 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7377}
7378
7379static __inline__ __m128i __DEFAULT_FN_ATTRS256
7380_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7381{
7382 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7383 (__v16qi) __O, __M);
7384}
7385
7386static __inline__ __m128i __DEFAULT_FN_ATTRS256
7388{
7389 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7390 (__v16qi) _mm_setzero_si128 (),
7391 __M);
7392}
7393
7394static __inline__ void __DEFAULT_FN_ATTRS256
7396{
7397 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7398}
7399
7400static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
7402 return (__m128i)__builtin_shufflevector(
7403 __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
7404}
7405
7406static __inline__ __m128i __DEFAULT_FN_ATTRS128
7407_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7408{
7409 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7410 (__v4si) __O, __M);
7411}
7412
7413static __inline__ __m128i __DEFAULT_FN_ATTRS128
7415{
7416 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7417 (__v4si) _mm_setzero_si128 (),
7418 __M);
7419}
7420
7421static __inline__ void __DEFAULT_FN_ATTRS128
7423{
7424 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7425}
7426
7427static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
7429 return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
7430}
7431
7432static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
7433_mm256_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A) {
7434 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7435 (__v4si)_mm256_cvtepi64_epi32(__A),
7436 (__v4si)__O);
7437}
7438
7439static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
7441 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7442 (__v4si)_mm256_cvtepi64_epi32(__A),
7443 (__v4si)_mm_setzero_si128());
7444}
7445
7446static __inline__ void __DEFAULT_FN_ATTRS256
7448{
7449 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7450}
7451
7452static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
7454 return (__m128i)__builtin_shufflevector(
7455 __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
7456 3, 3, 3, 3);
7457}
7458
7459static __inline__ __m128i __DEFAULT_FN_ATTRS128
7460_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7461{
7462 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7463 (__v8hi)__O,
7464 __M);
7465}
7466
7467static __inline__ __m128i __DEFAULT_FN_ATTRS128
7469{
7470 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7471 (__v8hi) _mm_setzero_si128 (),
7472 __M);
7473}
7474
7475static __inline__ void __DEFAULT_FN_ATTRS128
7477{
7478 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7479}
7480
7481static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
7483 return (__m128i)__builtin_shufflevector(
7484 __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7485 2, 3, 4, 5, 6, 7);
7486}
7487
7488static __inline__ __m128i __DEFAULT_FN_ATTRS256
7489_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7490{
7491 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7492 (__v8hi) __O, __M);
7493}
7494
7495static __inline__ __m128i __DEFAULT_FN_ATTRS256
7497{
7498 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7499 (__v8hi) _mm_setzero_si128 (),
7500 __M);
7501}
7502
7503static __inline__ void __DEFAULT_FN_ATTRS256
7505{
7506 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7507}
7508
7509#define _mm256_extractf32x4_ps(A, imm) \
7510 ((__m128)__builtin_ia32_extractf32x4_256_mask( \
7511 (__v8sf)(__m256)(A), (int)(imm), (__v4sf)_mm_setzero_ps(), \
7512 (__mmask8) - 1))
7513
7514#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
7515 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7516 (int)(imm), \
7517 (__v4sf)(__m128)(W), \
7518 (__mmask8)(U)))
7519
7520#define _mm256_maskz_extractf32x4_ps(U, A, imm) \
7521 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7522 (int)(imm), \
7523 (__v4sf)_mm_setzero_ps(), \
7524 (__mmask8)(U)))
7525
7526#define _mm256_extracti32x4_epi32(A, imm) \
7527 ((__m128i)__builtin_ia32_extracti32x4_256_mask( \
7528 (__v8si)(__m256i)(A), (int)(imm), (__v4si)_mm_setzero_si128(), \
7529 (__mmask8) - 1))
7530
7531#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
7532 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7533 (int)(imm), \
7534 (__v4si)(__m128i)(W), \
7535 (__mmask8)(U)))
7536
7537#define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
7538 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7539 (int)(imm), \
7540 (__v4si)_mm_setzero_si128(), \
7541 (__mmask8)(U)))
7542
7543#define _mm256_insertf32x4(A, B, imm) \
7544 ((__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
7545 (__v4sf)(__m128)(B), (int)(imm)))
7546
7547#define _mm256_mask_insertf32x4(W, U, A, B, imm) \
7548 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7549 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7550 (__v8sf)(__m256)(W)))
7551
7552#define _mm256_maskz_insertf32x4(U, A, B, imm) \
7553 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7554 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7555 (__v8sf)_mm256_setzero_ps()))
7556
7557#define _mm256_inserti32x4(A, B, imm) \
7558 ((__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
7559 (__v4si)(__m128i)(B), (int)(imm)))
7560
7561#define _mm256_mask_inserti32x4(W, U, A, B, imm) \
7562 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7563 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7564 (__v8si)(__m256i)(W)))
7565
7566#define _mm256_maskz_inserti32x4(U, A, B, imm) \
7567 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7568 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7569 (__v8si)_mm256_setzero_si256()))
7570
7571#define _mm_getmant_pd(A, B, C) \
7572 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7573 (int)(((C)<<2) | (B)), \
7574 (__v2df)_mm_setzero_pd(), \
7575 (__mmask8)-1))
7576
7577#define _mm_mask_getmant_pd(W, U, A, B, C) \
7578 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7579 (int)(((C)<<2) | (B)), \
7580 (__v2df)(__m128d)(W), \
7581 (__mmask8)(U)))
7582
7583#define _mm_maskz_getmant_pd(U, A, B, C) \
7584 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7585 (int)(((C)<<2) | (B)), \
7586 (__v2df)_mm_setzero_pd(), \
7587 (__mmask8)(U)))
7588
7589#define _mm256_getmant_pd(A, B, C) \
7590 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7591 (int)(((C)<<2) | (B)), \
7592 (__v4df)_mm256_setzero_pd(), \
7593 (__mmask8)-1))
7594
7595#define _mm256_mask_getmant_pd(W, U, A, B, C) \
7596 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7597 (int)(((C)<<2) | (B)), \
7598 (__v4df)(__m256d)(W), \
7599 (__mmask8)(U)))
7600
7601#define _mm256_maskz_getmant_pd(U, A, B, C) \
7602 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7603 (int)(((C)<<2) | (B)), \
7604 (__v4df)_mm256_setzero_pd(), \
7605 (__mmask8)(U)))
7606
7607#define _mm_getmant_ps(A, B, C) \
7608 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7609 (int)(((C)<<2) | (B)), \
7610 (__v4sf)_mm_setzero_ps(), \
7611 (__mmask8)-1))
7612
7613#define _mm_mask_getmant_ps(W, U, A, B, C) \
7614 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7615 (int)(((C)<<2) | (B)), \
7616 (__v4sf)(__m128)(W), \
7617 (__mmask8)(U)))
7618
7619#define _mm_maskz_getmant_ps(U, A, B, C) \
7620 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7621 (int)(((C)<<2) | (B)), \
7622 (__v4sf)_mm_setzero_ps(), \
7623 (__mmask8)(U)))
7624
7625#define _mm256_getmant_ps(A, B, C) \
7626 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7627 (int)(((C)<<2) | (B)), \
7628 (__v8sf)_mm256_setzero_ps(), \
7629 (__mmask8)-1))
7630
7631#define _mm256_mask_getmant_ps(W, U, A, B, C) \
7632 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7633 (int)(((C)<<2) | (B)), \
7634 (__v8sf)(__m256)(W), \
7635 (__mmask8)(U)))
7636
7637#define _mm256_maskz_getmant_ps(U, A, B, C) \
7638 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7639 (int)(((C)<<2) | (B)), \
7640 (__v8sf)_mm256_setzero_ps(), \
7641 (__mmask8)(U)))
7642
7643#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7644 ((__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
7645 (void const *)(addr), \
7646 (__v2di)(__m128i)(index), \
7647 (__mmask8)(mask), (int)(scale)))
7648
7649#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7650 ((__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
7651 (void const *)(addr), \
7652 (__v2di)(__m128i)(index), \
7653 (__mmask8)(mask), (int)(scale)))
7654
7655#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7656 ((__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
7657 (void const *)(addr), \
7658 (__v4di)(__m256i)(index), \
7659 (__mmask8)(mask), (int)(scale)))
7660
7661#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7662 ((__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
7663 (void const *)(addr), \
7664 (__v4di)(__m256i)(index), \
7665 (__mmask8)(mask), (int)(scale)))
7666
7667#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7668 ((__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
7669 (void const *)(addr), \
7670 (__v2di)(__m128i)(index), \
7671 (__mmask8)(mask), (int)(scale)))
7672
7673#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7674 ((__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
7675 (void const *)(addr), \
7676 (__v2di)(__m128i)(index), \
7677 (__mmask8)(mask), (int)(scale)))
7678
7679#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7680 ((__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
7681 (void const *)(addr), \
7682 (__v4di)(__m256i)(index), \
7683 (__mmask8)(mask), (int)(scale)))
7684
7685#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7686 ((__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
7687 (void const *)(addr), \
7688 (__v4di)(__m256i)(index), \
7689 (__mmask8)(mask), (int)(scale)))
7690
7691#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
7692 ((__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
7693 (void const *)(addr), \
7694 (__v4si)(__m128i)(index), \
7695 (__mmask8)(mask), (int)(scale)))
7696
7697#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7698 ((__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
7699 (void const *)(addr), \
7700 (__v4si)(__m128i)(index), \
7701 (__mmask8)(mask), (int)(scale)))
7702
7703#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
7704 ((__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
7705 (void const *)(addr), \
7706 (__v4si)(__m128i)(index), \
7707 (__mmask8)(mask), (int)(scale)))
7708
7709#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7710 ((__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
7711 (void const *)(addr), \
7712 (__v4si)(__m128i)(index), \
7713 (__mmask8)(mask), (int)(scale)))
7714
7715#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
7716 ((__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
7717 (void const *)(addr), \
7718 (__v4si)(__m128i)(index), \
7719 (__mmask8)(mask), (int)(scale)))
7720
7721#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7722 ((__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
7723 (void const *)(addr), \
7724 (__v4si)(__m128i)(index), \
7725 (__mmask8)(mask), (int)(scale)))
7726
7727#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
7728 ((__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
7729 (void const *)(addr), \
7730 (__v8si)(__m256i)(index), \
7731 (__mmask8)(mask), (int)(scale)))
7732
7733#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7734 ((__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
7735 (void const *)(addr), \
7736 (__v8si)(__m256i)(index), \
7737 (__mmask8)(mask), (int)(scale)))
7738
7739#define _mm256_permutex_pd(X, C) \
7740 ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C)))
7741
7742#define _mm256_mask_permutex_pd(W, U, X, C) \
7743 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7744 (__v4df)_mm256_permutex_pd((X), (C)), \
7745 (__v4df)(__m256d)(W)))
7746
7747#define _mm256_maskz_permutex_pd(U, X, C) \
7748 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7749 (__v4df)_mm256_permutex_pd((X), (C)), \
7750 (__v4df)_mm256_setzero_pd()))
7751
7752#define _mm256_permutex_epi64(X, C) \
7753 ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C)))
7754
7755#define _mm256_mask_permutex_epi64(W, U, X, C) \
7756 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7757 (__v4di)_mm256_permutex_epi64((X), (C)), \
7758 (__v4di)(__m256i)(W)))
7759
7760#define _mm256_maskz_permutex_epi64(U, X, C) \
7761 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7762 (__v4di)_mm256_permutex_epi64((X), (C)), \
7763 (__v4di)_mm256_setzero_si256()))
7764
7765static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
7766_mm256_permutexvar_pd(__m256i __X, __m256d __Y) {
7767 return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X);
7768}
7769
7770static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
7771_mm256_mask_permutexvar_pd(__m256d __W, __mmask8 __U, __m256i __X,
7772 __m256d __Y) {
7773 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
7774 (__v4df)_mm256_permutexvar_pd(__X, __Y),
7775 (__v4df)__W);
7776}
7777
7778static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
7779_mm256_maskz_permutexvar_pd(__mmask8 __U, __m256i __X, __m256d __Y) {
7780 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
7781 (__v4df)_mm256_permutexvar_pd(__X, __Y),
7782 (__v4df)_mm256_setzero_pd());
7783}
7784
7785static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
7786_mm256_permutexvar_epi64(__m256i __X, __m256i __Y) {
7787 return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X);
7788}
7789
7790static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
7791_mm256_maskz_permutexvar_epi64(__mmask8 __M, __m256i __X, __m256i __Y) {
7792 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
7793 (__v4di)_mm256_permutexvar_epi64(__X, __Y),
7794 (__v4di)_mm256_setzero_si256());
7795}
7796
7797static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
7798_mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X,
7799 __m256i __Y) {
7800 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
7801 (__v4di)_mm256_permutexvar_epi64(__X, __Y),
7802 (__v4di)__W);
7803}
7804
7805#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
7806
7807static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
7808_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) {
7809 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
7810 (__v8sf)_mm256_permutexvar_ps(__X, __Y),
7811 (__v8sf)__W);
7812}
7813
7814static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
7815_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) {
7816 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
7817 (__v8sf)_mm256_permutexvar_ps(__X, __Y),
7818 (__v8sf)_mm256_setzero_ps());
7819}
7820
7821#define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
7822
7823static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
7824_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X,
7825 __m256i __Y) {
7826 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
7827 (__v8si)_mm256_permutexvar_epi32(__X, __Y),
7828 (__v8si)__W);
7829}
7830
7831static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
7832_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) {
7833 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
7834 (__v8si)_mm256_permutexvar_epi32(__X, __Y),
7835 (__v8si)_mm256_setzero_si256());
7836}
7837
7838#define _mm_alignr_epi32(A, B, imm) \
7839 ((__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
7840 (__v4si)(__m128i)(B), (int)(imm)))
7841
7842#define _mm_mask_alignr_epi32(W, U, A, B, imm) \
7843 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7844 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
7845 (__v4si)(__m128i)(W)))
7846
7847#define _mm_maskz_alignr_epi32(U, A, B, imm) \
7848 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7849 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
7850 (__v4si)_mm_setzero_si128()))
7851
7852#define _mm256_alignr_epi32(A, B, imm) \
7853 ((__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
7854 (__v8si)(__m256i)(B), (int)(imm)))
7855
7856#define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
7857 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7858 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
7859 (__v8si)(__m256i)(W)))
7860
7861#define _mm256_maskz_alignr_epi32(U, A, B, imm) \
7862 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7863 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
7864 (__v8si)_mm256_setzero_si256()))
7865
7866#define _mm_alignr_epi64(A, B, imm) \
7867 ((__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
7868 (__v2di)(__m128i)(B), (int)(imm)))
7869
7870#define _mm_mask_alignr_epi64(W, U, A, B, imm) \
7871 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
7872 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
7873 (__v2di)(__m128i)(W)))
7874
7875#define _mm_maskz_alignr_epi64(U, A, B, imm) \
7876 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
7877 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
7878 (__v2di)_mm_setzero_si128()))
7879
7880#define _mm256_alignr_epi64(A, B, imm) \
7881 ((__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
7882 (__v4di)(__m256i)(B), (int)(imm)))
7883
7884#define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
7885 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7886 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
7887 (__v4di)(__m256i)(W)))
7888
7889#define _mm256_maskz_alignr_epi64(U, A, B, imm) \
7890 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7891 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
7892 (__v4di)_mm256_setzero_si256()))
7893
7894static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
7895_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
7896 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
7897 (__v4sf)_mm_movehdup_ps(__A),
7898 (__v4sf)__W);
7899}
7900
7901static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
7903 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
7904 (__v4sf)_mm_movehdup_ps(__A),
7905 (__v4sf)_mm_setzero_ps());
7906}
7907
7908static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
7909_mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
7910 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
7911 (__v8sf)_mm256_movehdup_ps(__A),
7912 (__v8sf)__W);
7913}
7914
7915static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
7917 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
7918 (__v8sf)_mm256_movehdup_ps(__A),
7919 (__v8sf)_mm256_setzero_ps());
7920}
7921
7922static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
7923_mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
7924 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
7925 (__v4sf)_mm_moveldup_ps(__A),
7926 (__v4sf)__W);
7927}
7928
7929static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
7931 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
7932 (__v4sf)_mm_moveldup_ps(__A),
7933 (__v4sf)_mm_setzero_ps());
7934}
7935
7936static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
7937_mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
7938 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
7939 (__v8sf)_mm256_moveldup_ps(__A),
7940 (__v8sf)__W);
7941}
7942
7943static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
7945 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
7946 (__v8sf)_mm256_moveldup_ps(__A),
7947 (__v8sf)_mm256_setzero_ps());
7948}
7949
7950#define _mm256_mask_shuffle_epi32(W, U, A, I) \
7951 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7952 (__v8si)_mm256_shuffle_epi32((A), (I)), \
7953 (__v8si)(__m256i)(W)))
7954
7955#define _mm256_maskz_shuffle_epi32(U, A, I) \
7956 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7957 (__v8si)_mm256_shuffle_epi32((A), (I)), \
7958 (__v8si)_mm256_setzero_si256()))
7959
7960#define _mm_mask_shuffle_epi32(W, U, A, I) \
7961 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7962 (__v4si)_mm_shuffle_epi32((A), (I)), \
7963 (__v4si)(__m128i)(W)))
7964
7965#define _mm_maskz_shuffle_epi32(U, A, I) \
7966 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7967 (__v4si)_mm_shuffle_epi32((A), (I)), \
7968 (__v4si)_mm_setzero_si128()))
7969
7970static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
7971_mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A) {
7972 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)__A,
7973 (__v2df)__W);
7974}
7975
7976static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
7977_mm_maskz_mov_pd(__mmask8 __U, __m128d __A) {
7978 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)__A,
7979 (__v2df)_mm_setzero_pd());
7980}
7981
7982static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
7983_mm256_mask_mov_pd(__m256d __W, __mmask8 __U, __m256d __A) {
7984 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)__A,
7985 (__v4df)__W);
7986}
7987
7988static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
7990 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)__A,
7991 (__v4df)_mm256_setzero_pd());
7992}
7993
7994static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
7995_mm_mask_mov_ps(__m128 __W, __mmask8 __U, __m128 __A) {
7996 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)__A,
7997 (__v4sf)__W);
7998}
7999
8000static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8001_mm_maskz_mov_ps(__mmask8 __U, __m128 __A) {
8002 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)__A,
8003 (__v4sf)_mm_setzero_ps());
8004}
8005
8006static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
8007_mm256_mask_mov_ps(__m256 __W, __mmask8 __U, __m256 __A) {
8008 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)__A,
8009 (__v8sf)__W);
8010}
8011
8012static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
8014 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)__A,
8015 (__v8sf)_mm256_setzero_ps());
8016}
8017
8018static __inline__ __m128 __DEFAULT_FN_ATTRS128
8019_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8020{
8021 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8022 (__v4sf) __W,
8023 (__mmask8) __U);
8024}
8025
8026static __inline__ __m128 __DEFAULT_FN_ATTRS128
8028{
8029 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8030 (__v4sf)
8031 _mm_setzero_ps (),
8032 (__mmask8) __U);
8033}
8034
8035static __inline__ __m256 __DEFAULT_FN_ATTRS256
8036_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8037{
8038 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8039 (__v8sf) __W,
8040 (__mmask8) __U);
8041}
8042
8043static __inline__ __m256 __DEFAULT_FN_ATTRS256
8045{
8046 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8047 (__v8sf)
8049 (__mmask8) __U);
8050}
8051
8052#define _mm_mask_cvt_roundps_ph(W, U, A, I) \
8053 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8054 (__v8hi)(__m128i)(W), \
8055 (__mmask8)(U)))
8056
8057#define _mm_maskz_cvt_roundps_ph(U, A, I) \
8058 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8059 (__v8hi)_mm_setzero_si128(), \
8060 (__mmask8)(U)))
8061
8062#define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph
8063#define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph
8064
8065#define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
8066 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8067 (__v8hi)(__m128i)(W), \
8068 (__mmask8)(U)))
8069
8070#define _mm256_maskz_cvt_roundps_ph(U, A, I) \
8071 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8072 (__v8hi)_mm_setzero_si128(), \
8073 (__mmask8)(U)))
8074
8075#define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph
8076#define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph
8077
8078#undef __DEFAULT_FN_ATTRS128
8079#undef __DEFAULT_FN_ATTRS256
8080#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
8081#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
8082
8083#endif /* __AVX512VLINTRIN_H */
static __inline__ vector float vector float __b
Definition altivec.h:578
return __v
Definition arm_acle.h:88
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sllv_epi32(__m256i __X, __m256i __Y)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __X left by the number of bits given...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi32_epi64(__m128i __V)
Sign-extends 32-bit elements from the 128-bit vector of [4 x i32] in __V and returns the 64-bit value...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_max_epi32(__m256i __a, __m256i __b)
Compares the corresponding signed 32-bit integers in the two 256-bit vectors of [8 x i32] in __a and ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mul_epu32(__m256i __a, __m256i __b)
Multiplies unsigned 32-bit integers from even-numered elements of two 256-bit vectors of [8 x i32] an...
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srav_epi32(__m128i __X, __m128i __Y)
Shifts each 32-bit element of the 128-bit vector of [4 x i32] in __X right by the number of bits give...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpackhi_epi32(__m256i __a, __m256i __b)
Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors of [8 x i32] in __a and __b...
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
Definition avx2intrin.h:30
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sra_epi32(__m256i __a, __m128i __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by the number of bits give...
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_broadcastq_epi64(__m128i __X)
Broadcasts the low element from the 128-bit vector of [2 x i64] in __X to both elements of the result...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_slli_epi64(__m256i __a, int __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a left by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sll_epi32(__m256i __a, __m128i __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a left by the number of bits given...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_abs_epi32(__m256i __a)
Computes the absolute value of each signed 32-bit element in the 256-bit vector of [8 x i32] in __a a...
Definition avx2intrin.h:139
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sll_epi64(__m256i __a, __m128i __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a left by the number of bits given...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srav_epi32(__m256i __X, __m256i __Y)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __X right by the number of bits give...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu8_epi64(__m128i __V)
Zero-extends the first four bytes from the 128-bit integer vector in __V and returns the 64-bit value...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu32_epi64(__m128i __V)
Zero-extends 32-bit elements from the 128-bit vector of [4 x i32] in __V and returns the 64-bit value...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu8_epi32(__m128i __V)
Zero-extends bytes from the lower half of the 128-bit integer vector in __V and returns the 32-bit va...
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srlv_epi32(__m128i __X, __m128i __Y)
Shifts each 32-bit element of the 128-bit vector of [4 x i32] in __X right by the number of bits give...
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcastss_ps(__m128 __X)
Broadcasts the 32-bit floating-point value from the low element of the 128-bit vector of [4 x float] ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srli_epi64(__m256i __a, int __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a right by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors of [4 x i64] in __a and __b...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srl_epi64(__m256i __a, __m128i __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a right by the number of bits give...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi8_epi32(__m128i __V)
Sign-extends bytes from the lower half of the 128-bit integer vector in __V and returns the 32-bit va...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcastq_epi64(__m128i __X)
Broadcasts the low element from the 128-bit vector of [2 x i64] in __X to all elements of the result'...
#define __DEFAULT_FN_ATTRS256_CONSTEXPR
Definition avx2intrin.h:29
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mul_epi32(__m256i __a, __m256i __b)
Multiplies signed 32-bit integers from even-numbered elements of two 256-bit vectors of [8 x i32] and...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mullo_epi32(__m256i __a, __m256i __b)
Multiplies signed 32-bit integer elements of two 256-bit vectors of [8 x i32], and returns the lower ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_min_epi32(__m256i __a, __m256i __b)
Compares the corresponding signed 32-bit integers in the two 256-bit vectors of [8 x i32] in __a and ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sub_epi32(__m256i __a, __m256i __b)
Subtracts 32-bit integers from corresponding elements of two 256-bit vectors of [8 x i32].
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpackhi_epi64(__m256i __a, __m256i __b)
Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors of [4 x i64] in __a and __b...
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_broadcastss_ps(__m128 __X)
Broadcasts the 32-bit floating-point value from the low element of the 128-bit vector of [4 x float] ...
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srlv_epi64(__m128i __X, __m128i __Y)
Shifts each 64-bit element of the 128-bit vector of [2 x i64] in __X right by the number of bits give...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srlv_epi32(__m256i __X, __m256i __Y)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __X right by the number of bits give...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu16_epi64(__m128i __V)
Zero-extends 16-bit elements from the lower half of the 128-bit vector of [8 x i16] in __V and return...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpacklo_epi32(__m256i __a, __m256i __b)
Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors of [8 x i32] in __a and __b...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi8_epi64(__m128i __V)
Sign-extends the first four bytes from the 128-bit integer vector in __V and returns the 64-bit value...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srli_epi32(__m256i __a, int __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_add_epi64(__m256i __a, __m256i __b)
Adds 64-bit integers from corresponding elements of two 256-bit vectors of [4 x i64] and returns the ...
Definition avx2intrin.h:333
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi16_epi64(__m128i __V)
Sign-extends 16-bit elements from the lower half of the 128-bit vector of [8 x i16] in __V and return...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_and_si256(__m256i __a, __m256i __b)
Computes the bitwise AND of the 256-bit integer vectors in __a and __b.
Definition avx2intrin.h:448
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_slli_epi32(__m256i __a, int __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a left by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srai_epi32(__m256i __a, int __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_add_epi32(__m256i __a, __m256i __b)
Adds 32-bit integers from corresponding elements of two 256-bit vectors of [8 x i32] and returns the ...
Definition avx2intrin.h:315
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srl_epi32(__m256i __a, __m128i __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by the number of bits give...
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcastsd_pd(__m128d __X)
Broadcasts the 64-bit floating-point value from the low element of the 128-bit vector of [2 x double]...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu16_epi32(__m128i __V)
Zero-extends 16-bit elements from the 128-bit vector of [8 x i16] in __V and returns the 32-bit value...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_max_epu32(__m256i __a, __m256i __b)
Compares the corresponding unsigned 32-bit integers in the two 256-bit vectors of [8 x i32] in __a an...
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sllv_epi64(__m128i __X, __m128i __Y)
Shifts each 64-bit element of the 128-bit vector of [2 x i64] in __X left by the number of bits given...
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sllv_epi32(__m128i __X, __m128i __Y)
Shifts each 32-bit element of the 128-bit vector of [4 x i32] in __X left by the number of bits given...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_min_epu32(__m256i __a, __m256i __b)
Compares the corresponding unsigned 32-bit integers in the two 256-bit vectors of [8 x i32] in __a an...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi16_epi32(__m128i __V)
Sign-extends 16-bit elements from the 128-bit vector of [8 x i16] in __V and returns the 32-bit value...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sub_epi64(__m256i __a, __m256i __b)
Subtracts 64-bit integers from corresponding elements of two 256-bit vectors of [4 x i64].
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_broadcastd_epi32(__m128i __X)
Broadcasts the low element from the 128-bit vector of [4 x i32] in __X to all elements of the result'...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcastd_epi32(__m128i __X)
Broadcasts the low element from the 128-bit vector of [4 x i32] in __X to all elements of the result'...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srlv_epi64(__m256i __X, __m256i __Y)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __X right by the number of bits give...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sllv_epi64(__m256i __X, __m256i __Y)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __X left by the number of bits given...
unsigned char __mmask8
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_rorv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu32(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu32(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
#define _mm_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_rolv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_epi64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_cvtepi32_epi16(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi8(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_cvtepi64_epi8(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expand_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi16(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_pd(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtepi32_ps(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi32_mask(__m128i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_store_epi32(void *__P, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
#define _mm256_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_pd(__m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_xor_epi32(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srav_epi64(__m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_movedup_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi32(__mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_rolv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_rolv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_min_epi64(__m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi64(void *__P, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi32_mask(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_cvtepi64_epi32(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi64(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_max_epi64(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_pd(__mmask8 __U, __m256d __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi16(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_epi64(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mov_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rcp14_pd(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_cvtepu32_pd(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mov_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu32(__mmask8 __U, __m256 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_cvtepi64_epi16(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B)
#define _mm256_cmpneq_epi32_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_rorv_epi32(__m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_cvtepu32_ps(__m128i __A)
#define _mm256_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi32(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sra_epi64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_or_epi64(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_compress_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ps(__mmask8 __U, __m128 __A, __m128 __B)
#define _mm_cmpeq_epi64_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_pd(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_pd(__m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_movedup_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mov_pd(__mmask8 __U, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_load_epi32(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_rorv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi32(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi8(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_rolv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtepu32_ps(__mmask8 __U, __m128i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_load_epi64(void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_pd(__mmask8 __U, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu32(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_getexp_pd(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_load_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu32(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_store_epi32(void *__P, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
#define _mm256_permutexvar_epi32(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srai_epi64(__m128i __A, unsigned int __imm)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_epi32(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_movedup_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_loadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_set1_epi32(__mmask8 __M, int __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi32_mask(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_epi64(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi32(void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_epi64(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcast_f32x4(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_pd(__mmask8 __U, __m256d __A)
#define _mm256_cmpeq_epi64_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi16(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
#define _mm256_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_scalef_pd(__m256d __A, __m256d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __imm)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mov_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expand_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_and_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_andnot_epi64(__m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcast_i32x4(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_min_epu64(__m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mov_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_compress_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi64(void *__P, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
#define _mm_cmpneq_epi64_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sra_epi64(__m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi32(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_pd(__m128d __W, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_load_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expand_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_rorv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_rorv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepi32_ps(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
#define _mm256_cmpeq_epi32_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi64_epi16(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
#define _mm_cmpeq_epi32_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi32_mask(__m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srav_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_and_epi32(__m128i __a, __m128i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ps(__m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_rorv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_min_epi64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_rolv_epi32(__m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_set1_epi32(__mmask8 __M, int __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_rolv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi32(__mmask8 __U, __m256 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_compress_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu32_ps(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu32(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi16(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_pd(void *__P, __mmask8 __U, __m128d __A)
short __v2hi __attribute__((__vector_size__(4)))
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_or_epi32(__m128i __a, __m128i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_rolv_epi32(__m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_scalef_ps(__m256 __A, __m256 __B)
#define _mm256_permutexvar_ps(A, B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi16(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi16(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_pd(__m256d __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_abs_epi64(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_rorv_epi32(__m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutexvar_pd(__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mov_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_abs_epi64(__mmask8 __U, __m128i __A)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_load_epi64(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rcp14_ps(__m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_and_epi64(__m128i __a, __m128i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_pd(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_compress_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_ps(__m256 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_movedup_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A)
#define _mm_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_abs_epi64(__m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu32(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_ps(__mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi64_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_ps(__m256 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi32(void *__P, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi32_epi16(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi16(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_rorv_epi64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ps(__mmask8 __U, __m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_max_epu64(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_rolv_epi64(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_rolv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_loadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_epi32(__m256i __a, __m256i __b)
#define _mm_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi64_mask(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_rolv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_max_epu64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi32_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expand_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srav_epi64(__m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
#define _mm_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expand_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
#define _mm256_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi64_epi32(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_andnot_epi32(__m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi16(__mmask8 __M, __m128i __A)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_store_epi64(void *__P, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_epi32(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ps(__m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_pd(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_rorv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ps(__m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu32(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutexvar_pd(__m256i __X, __m256d __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_getexp_ps(__m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu32_pd(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_rolv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srai_epi64(__m256i __A, unsigned int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_max_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi32(__mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi64_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu32(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_compress_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_min_epu64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi32(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_rorv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi32(__m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_load_epi32(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_cvtepi32_epi8(__m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_rorv_epi64(__m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutexvar_epi64(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi32(void *__P, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ps(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi64(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu32(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_store_epi64(void *__P, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
#define _mm256_cmpneq_epi64_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi32(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_rolv_epi64(__m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
#define _mm_cmpneq_epi32_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutexvar_pd(__mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutexvar_epi64(__m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi32(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mov_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi16(__m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_abs_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_rorv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_xor_epi64(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_pd(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_permutevar_pd(__m256d __a, __m256i __c)
Copies the values in a 256-bit vector of [4 x double] as specified by the 256-bit integer vector oper...
Definition avxintrin.h:825
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvtpd_epi32(__m256d __a)
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x i32].
Definition avxintrin.h:2264
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sqrt_ps(__m256 __a)
Calculates the square roots of the values in a 256-bit vector of [8 x float].
Definition avxintrin.h:351
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_pd(__m256d __a, __m256d __b)
Subtracts two 256-bit vectors of [4 x double].
Definition avxintrin.h:116
static __inline __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_cvtpd_ps(__m256d __a)
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x float].
Definition avxintrin.h:2190
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_mul_pd(__m256d __a, __m256d __b)
Multiplies two 256-bit vectors of [4 x double].
Definition avxintrin.h:304
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_permutevar_ps(__m256 __a, __m256i __c)
Copies the values stored in a 256-bit vector of [8 x float] as specified by the 256-bit integer vecto...
Definition avxintrin.h:969
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_div_pd(__m256d __a, __m256d __b)
Divides two 256-bit vectors of [4 x double].
Definition avxintrin.h:186
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_movehdup_ps(__m256 __a)
Moves and duplicates odd-indexed values from a 256-bit vector of [8 x float] to float values in a 256...
Definition avxintrin.h:2356
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a, __m256 __b)
Subtracts two 256-bit vectors of [8 x float].
Definition avxintrin.h:132
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_cvtepi32_ps(__m256i __a)
Converts a vector of [8 x i32] into a vector of [8 x float].
Definition avxintrin.h:2175
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_unpacklo_ps(__m256 __a, __m256 __b)
Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the two 256-bit vectors of [8 x float] ...
Definition avxintrin.h:2499
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32(__m256d __a)
Converts a 256-bit vector of [4 x double] into four signed truncated (rounded toward zero) 32-bit int...
Definition avxintrin.h:2244
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_cvtps_pd(__m128 __a)
Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4 x double].
Definition avxintrin.h:2224
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvtps_epi32(__m256 __a)
Converts a vector of [8 x float] into a vector of [8 x i32].
Definition avxintrin.h:2208
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_unpackhi_ps(__m256 __a, __m256 __b)
Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the two 256-bit vectors of [8 x float] ...
Definition avxintrin.h:2473
static __inline __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutevar_pd(__m128d __a, __m128i __c)
Copies the values in a 128-bit vector of [2 x double] as specified by the 128-bit integer vector oper...
Definition avxintrin.h:787
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_min_ps(__m256 __a, __m256 __b)
Compares two 256-bit vectors of [8 x float] and returns the lesser of each pair of values.
Definition avxintrin.h:286
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_cvtepi32_pd(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x double].
Definition avxintrin.h:2161
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32(__m256 __a)
Converts a vector of [8 x float] into eight signed truncated (rounded toward zero) 32-bit integers re...
Definition avxintrin.h:2284
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_max_ps(__m256 __a, __m256 __b)
Compares two 256-bit vectors of [8 x float] and returns the greater of each pair of values.
Definition avxintrin.h:244
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition avxintrin.h:4291
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi32(int __i)
Constructs a 256-bit integer vector of [8 x i32], with each of the 32-bit integral vector elements se...
Definition avxintrin.h:4208
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_add_pd(__m256d __a, __m256d __b)
Adds two 256-bit vectors of [4 x double].
Definition avxintrin.h:82
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sqrt_pd(__m256d __a)
Calculates the square roots of the values in a 256-bit vector of [4 x double].
Definition avxintrin.h:336
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi64x(long long __q)
Constructs a 256-bit integer vector of [4 x i64], with each of the 64-bit integral vector elements se...
Definition avxintrin.h:4265
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_add_ps(__m256 __a, __m256 __b)
Adds two 256-bit vectors of [8 x float].
Definition avxintrin.h:98
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_moveldup_ps(__m256 __a)
Moves and duplicates even-indexed values from a 256-bit vector of [8 x float] to float values in a 25...
Definition avxintrin.h:2381
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_movedup_pd(__m256d __a)
Moves and duplicates double-precision floating point values from a 256-bit vector of [4 x double] to ...
Definition avxintrin.h:2403
static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutevar_ps(__m128 __a, __m128i __c)
Copies the values stored in a 128-bit vector of [4 x float] as specified by the 128-bit integer vecto...
Definition avxintrin.h:879
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition avxintrin.h:4279
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_mul_ps(__m256 __a, __m256 __b)
Multiplies two 256-bit vectors of [8 x float].
Definition avxintrin.h:320
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_min_pd(__m256d __a, __m256d __b)
Compares two 256-bit vectors of [4 x double] and returns the lesser of each pair of values.
Definition avxintrin.h:265
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition avxintrin.h:4303
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_div_ps(__m256 __a, __m256 __b)
Divides two 256-bit vectors of [8 x float].
Definition avxintrin.h:202
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_unpacklo_pd(__m256d __a, __m256d __b)
Unpacks the even-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves the...
Definition avxintrin.h:2447
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_unpackhi_pd(__m256d __a, __m256d __b)
Unpacks the odd-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves them...
Definition avxintrin.h:2426
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_max_pd(__m256d __a, __m256d __b)
Compares two 256-bit vectors of [4 x double] and returns the greater of each pair of values.
Definition avxintrin.h:223
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sra_epi32(__m128i __a, __m128i __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2935
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_pd(__m128d __a, __m128d __b)
Performs an element-by-element division of two 128-bit vectors of [2 x double].
Definition emmintrin.h:218
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_srli_epi64(__m128i __a, int __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
Definition emmintrin.h:3050
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_srl_epi64(__m128i __a, __m128i __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
Definition emmintrin.h:3068
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sll_epi64(__m128i __a, __m128i __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2859
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns a vecto...
Definition emmintrin.h:303
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_epi32(__m128i __a, __m128i __b)
Subtracts the corresponding 32-bit integer values in the operands.
Definition emmintrin.h:2536
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_slli_epi64(__m128i __a, int __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2841
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi32_pd(__m128i __a)
Converts the lower two integer elements of a 128-bit vector of [4 x i32] into two double-precision fl...
Definition emmintrin.h:1323
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition emmintrin.h:3878
static __inline__ void int __a
Definition emmintrin.h:4077
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sll_epi32(__m128i __a, __m128i __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2823
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_epi64(__m128i __a, __m128i __b)
Subtracts the corresponding elements of two [2 x i64] vectors.
Definition emmintrin.h:2571
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a)
Calculates the square root of the each of two values stored in a 128-bit vector of [2 x double].
Definition emmintrin.h:258
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_pd(__m128d __a, __m128d __b)
Multiplies two 128-bit vectors of [2 x double].
Definition emmintrin.h:177
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition emmintrin.h:3493
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi64x(long long __q)
Initializes both values in a 128-bit integer vector with the specified 64-bit integer value.
Definition emmintrin.h:3674
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpacklo_epi32(__m128i __a, __m128i __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x i32] and interleaves them i...
Definition emmintrin.h:4576
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_srli_epi32(__m128i __a, int __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
Definition emmintrin.h:3014
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpacklo_epi64(__m128i __a, __m128i __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them into...
Definition emmintrin.h:4597
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition emmintrin.h:1867
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_epu32(__m128i __a, __m128i __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the corresponding elements o...
Definition emmintrin.h:2464
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_epi32(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [4 x i32], saving the lower 32 bits of each...
Definition emmintrin.h:2105
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtps_pd(__m128 __a)
Converts the lower two single-precision floating-point elements of a 128-bit vector of [4 x float] in...
Definition emmintrin.h:1301
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a)
Converts a vector of [4 x float] into four signed truncated (rounded toward zero) 32-bit integers,...
Definition emmintrin.h:3362
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpackhi_epi32(__m128i __a, __m128i __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x i32] and interleaves them ...
Definition emmintrin.h:4469
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_pd(__m128d __a, __m128d __b)
Adds two 128-bit vectors of [2 x double].
Definition emmintrin.h:98
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpackhi_pd(__m128d __a, __m128d __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them ...
Definition emmintrin.h:4666
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
Definition emmintrin.h:2674
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns a vecto...
Definition emmintrin.h:349
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpacklo_pd(__m128d __a, __m128d __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them i...
Definition emmintrin.h:4686
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpackhi_epi64(__m128i __a, __m128i __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them int...
Definition emmintrin.h:4490
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_slli_epi32(__m128i __a, int __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2805
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_srl_epi32(__m128i __a, __m128i __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
Definition emmintrin.h:3032
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_epi64(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [2 x i64], saving the lower 64 bits of each...
Definition emmintrin.h:2143
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi32_ps(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x float].
Definition emmintrin.h:3325
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value.
Definition emmintrin.h:3709
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_pd(__m128d __a, __m128d __b)
Subtracts two 128-bit vectors of [2 x double].
Definition emmintrin.h:138
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_srai_epi32(__m128i __a, int __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2916
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32].
Definition emmintrin.h:3343
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
Computes a negated multiply-add of 128-bit vectors of [4 x float].
Definition fmaintrin.h:248
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
Computes a multiply-add of 128-bit vectors of [4 x float].
Definition fmaintrin.h:48
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
Computes a negated multiply-add of 128-bit vectors of [2 x double].
Definition fmaintrin.h:269
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
Computes a multiply-subtract of 256-bit vectors of [4 x double].
Definition fmaintrin.h:615
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
Computes a multiply-subtract of 256-bit vectors of [8 x float].
Definition fmaintrin.h:594
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
Computes a multiply-subtract of 128-bit vectors of [4 x float].
Definition fmaintrin.h:148
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
Computes a negated multiply-add of 256-bit vectors of [4 x double].
Definition fmaintrin.h:657
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
Computes a multiply-add of 256-bit vectors of [8 x float].
Definition fmaintrin.h:552
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
Computes a negated multiply-subtract of 256-bit vectors of [4 x double].
Definition fmaintrin.h:699
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
Computes a negated multiply-subtract of 256-bit vectors of [8 x float].
Definition fmaintrin.h:678
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
Computes a multiply-add of 128-bit vectors of [2 x double].
Definition fmaintrin.h:69
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
Computes a multiply-add of 256-bit vectors of [4 x double].
Definition fmaintrin.h:573
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
Computes a negated multiply-subtract of 128-bit vectors of [4 x float].
Definition fmaintrin.h:348
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
Computes a negated multiply-add of 256-bit vectors of [8 x float].
Definition fmaintrin.h:636
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
Computes a multiply-subtract of 128-bit vectors of [2 x double].
Definition fmaintrin.h:169
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
Computes a negated multiply-subtract of 128-bit vectors of [2 x double].
Definition fmaintrin.h:369
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movedup_pd(__m128d __a)
Moves and duplicates the double-precision value in the lower bits of a 128-bit vector of [2 x double]...
Definition pmmintrin.h:248
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_moveldup_ps(__m128 __a)
Duplicates even-indexed values from a 128-bit vector of [4 x float] to float values stored in a 128-b...
Definition pmmintrin.h:150
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movehdup_ps(__m128 __a)
Moves and duplicates odd-indexed values from a 128-bit vector of [4 x float] to float values stored i...
Definition pmmintrin.h:129
__inline unsigned int unsigned int unsigned int * __P
Definition bmi2intrin.h:25
__inline unsigned int unsigned int __Y
Definition bmi2intrin.h:19
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_epi32(__m128i __V1, __m128i __V2)
Multiplies corresponding even-indexed elements of two 128-bit vectors of [4 x i32] and returns a 128-...
Definition smmintrin.h:562
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_epi32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x i32] and returns a 128-bit vector ...
Definition smmintrin.h:760
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepu16_epi64(__m128i __V)
Zero-extends each of the lower two 16-bit integer elements of a 128-bit integer vector of [8 x i16] t...
Definition smmintrin.h:1426
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_epu32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x u32] and returns a 128-bit vector ...
Definition smmintrin.h:796
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepu32_epi64(__m128i __V)
Zero-extends each of the lower two 32-bit integer elements of a 128-bit integer vector of [4 x i32] t...
Definition smmintrin.h:1445
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi8_epi32(__m128i __V)
Sign-extends each of the lower four 8-bit integer elements of a 128-bit vector of [16 x i8] to 32-bit...
Definition smmintrin.h:1248
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi16_epi64(__m128i __V)
Sign-extends each of the lower two 16-bit integer elements of a 128-bit integer vector of [8 x i16] t...
Definition smmintrin.h:1309
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepu8_epi32(__m128i __V)
Zero-extends each of the lower four 8-bit integer elements of a 128-bit vector of [16 x i8] to 32-bit...
Definition smmintrin.h:1369
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi8_epi64(__m128i __V)
Sign-extends each of the lower two 8-bit integer elements of a 128-bit integer vector of [16 x i8] to...
Definition smmintrin.h:1269
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_epi32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x i32] and returns a 128-bit vector ...
Definition smmintrin.h:742
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mullo_epi32(__m128i __V1, __m128i __V2)
Multiples corresponding elements of two 128-bit vectors of [4 x i32] and returns the lower 32 bits of...
Definition smmintrin.h:543
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi16_epi32(__m128i __V)
Sign-extends each of the lower four 16-bit integer elements of a 128-bit integer vector of [8 x i16] ...
Definition smmintrin.h:1290
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_epu32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x u32] and returns a 128-bit vector ...
Definition smmintrin.h:778
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi32_epi64(__m128i __V)
Sign-extends each of the lower two 32-bit integer elements of a 128-bit integer vector of [4 x i32] t...
Definition smmintrin.h:1328
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepu16_epi32(__m128i __V)
Zero-extends each of the lower four 16-bit integer elements of a 128-bit integer vector of [8 x i16] ...
Definition smmintrin.h:1407
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepu8_epi64(__m128i __V)
Zero-extends each of the lower two 8-bit integer elements of a 128-bit integer vector of [16 x i8] to...
Definition smmintrin.h:1388
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition tmmintrin.h:131
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpacklo_ps(__m128 __a, __m128 __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x float] and interleaves them...
Definition xmmintrin.h:2780
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_ps(__m128 __a, __m128 __b)
Adds two 128-bit vectors of [4 x float], and returns the results of the addition.
Definition xmmintrin.h:98
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpackhi_ps(__m128 __a, __m128 __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x float] and interleaves the...
Definition xmmintrin.h:2759
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_ps(__m128 __a, __m128 __b)
Divides two 128-bit vectors of [4 x float].
Definition xmmintrin.h:218
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the greater of each pair of values.
Definition xmmintrin.h:412
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the lesser of each pair of values.
Definition xmmintrin.h:366
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ps(__m128 __a)
Calculates the square roots of the values stored in a 128-bit vector of [4 x float].
Definition xmmintrin.h:250
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition xmmintrin.h:2018
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_ps(__m128 __a, __m128 __b)
Subtracts each of the values of the second operand from the first operand, both of which are 128-bit ...
Definition xmmintrin.h:139
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_ps(__m128 __a, __m128 __b)
Multiplies two 128-bit vectors of [4 x float] and returns the results of the multiplication.
Definition xmmintrin.h:179