clang 22.0.0git
avx512vlintrin.h
Go to the documentation of this file.
1/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __AVX512VLINTRIN_H
15#define __AVX512VLINTRIN_H
16
17#define __DEFAULT_FN_ATTRS128 \
18 __attribute__((__always_inline__, __nodebug__, \
19 __target__("avx512vl,no-evex512"), \
20 __min_vector_width__(128)))
21#define __DEFAULT_FN_ATTRS256 \
22 __attribute__((__always_inline__, __nodebug__, \
23 __target__("avx512vl,no-evex512"), \
24 __min_vector_width__(256)))
25
26#if defined(__cplusplus) && (__cplusplus >= 201103L)
27#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
28#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
29#else
30#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
31#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
32#endif
33
34typedef short __v2hi __attribute__((__vector_size__(4)));
35typedef char __v4qi __attribute__((__vector_size__(4)));
36typedef char __v2qi __attribute__((__vector_size__(2)));
37
38/* Integer compare */
39
40#define _mm_cmpeq_epi32_mask(A, B) \
41 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
42#define _mm_mask_cmpeq_epi32_mask(k, A, B) \
43 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
44#define _mm_cmpge_epi32_mask(A, B) \
45 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
46#define _mm_mask_cmpge_epi32_mask(k, A, B) \
47 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
48#define _mm_cmpgt_epi32_mask(A, B) \
49 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
50#define _mm_mask_cmpgt_epi32_mask(k, A, B) \
51 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
52#define _mm_cmple_epi32_mask(A, B) \
53 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
54#define _mm_mask_cmple_epi32_mask(k, A, B) \
55 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
56#define _mm_cmplt_epi32_mask(A, B) \
57 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
58#define _mm_mask_cmplt_epi32_mask(k, A, B) \
59 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
60#define _mm_cmpneq_epi32_mask(A, B) \
61 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
62#define _mm_mask_cmpneq_epi32_mask(k, A, B) \
63 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
64
65#define _mm256_cmpeq_epi32_mask(A, B) \
66 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
67#define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
68 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
69#define _mm256_cmpge_epi32_mask(A, B) \
70 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
71#define _mm256_mask_cmpge_epi32_mask(k, A, B) \
72 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
73#define _mm256_cmpgt_epi32_mask(A, B) \
74 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
75#define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
76 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
77#define _mm256_cmple_epi32_mask(A, B) \
78 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
79#define _mm256_mask_cmple_epi32_mask(k, A, B) \
80 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
81#define _mm256_cmplt_epi32_mask(A, B) \
82 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
83#define _mm256_mask_cmplt_epi32_mask(k, A, B) \
84 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
85#define _mm256_cmpneq_epi32_mask(A, B) \
86 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
87#define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
88 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
89
90#define _mm_cmpeq_epu32_mask(A, B) \
91 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
92#define _mm_mask_cmpeq_epu32_mask(k, A, B) \
93 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
94#define _mm_cmpge_epu32_mask(A, B) \
95 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
96#define _mm_mask_cmpge_epu32_mask(k, A, B) \
97 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
98#define _mm_cmpgt_epu32_mask(A, B) \
99 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
100#define _mm_mask_cmpgt_epu32_mask(k, A, B) \
101 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
102#define _mm_cmple_epu32_mask(A, B) \
103 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
104#define _mm_mask_cmple_epu32_mask(k, A, B) \
105 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
106#define _mm_cmplt_epu32_mask(A, B) \
107 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
108#define _mm_mask_cmplt_epu32_mask(k, A, B) \
109 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
110#define _mm_cmpneq_epu32_mask(A, B) \
111 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
112#define _mm_mask_cmpneq_epu32_mask(k, A, B) \
113 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
114
115#define _mm256_cmpeq_epu32_mask(A, B) \
116 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
117#define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
118 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
119#define _mm256_cmpge_epu32_mask(A, B) \
120 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
121#define _mm256_mask_cmpge_epu32_mask(k, A, B) \
122 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
123#define _mm256_cmpgt_epu32_mask(A, B) \
124 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
125#define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
126 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
127#define _mm256_cmple_epu32_mask(A, B) \
128 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
129#define _mm256_mask_cmple_epu32_mask(k, A, B) \
130 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
131#define _mm256_cmplt_epu32_mask(A, B) \
132 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
133#define _mm256_mask_cmplt_epu32_mask(k, A, B) \
134 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
135#define _mm256_cmpneq_epu32_mask(A, B) \
136 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
137#define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
138 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
139
140#define _mm_cmpeq_epi64_mask(A, B) \
141 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
142#define _mm_mask_cmpeq_epi64_mask(k, A, B) \
143 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
144#define _mm_cmpge_epi64_mask(A, B) \
145 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
146#define _mm_mask_cmpge_epi64_mask(k, A, B) \
147 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
148#define _mm_cmpgt_epi64_mask(A, B) \
149 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
150#define _mm_mask_cmpgt_epi64_mask(k, A, B) \
151 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
152#define _mm_cmple_epi64_mask(A, B) \
153 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
154#define _mm_mask_cmple_epi64_mask(k, A, B) \
155 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
156#define _mm_cmplt_epi64_mask(A, B) \
157 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
158#define _mm_mask_cmplt_epi64_mask(k, A, B) \
159 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
160#define _mm_cmpneq_epi64_mask(A, B) \
161 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
162#define _mm_mask_cmpneq_epi64_mask(k, A, B) \
163 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
164
165#define _mm256_cmpeq_epi64_mask(A, B) \
166 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
167#define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
168 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
169#define _mm256_cmpge_epi64_mask(A, B) \
170 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
171#define _mm256_mask_cmpge_epi64_mask(k, A, B) \
172 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
173#define _mm256_cmpgt_epi64_mask(A, B) \
174 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
175#define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
176 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
177#define _mm256_cmple_epi64_mask(A, B) \
178 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
179#define _mm256_mask_cmple_epi64_mask(k, A, B) \
180 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
181#define _mm256_cmplt_epi64_mask(A, B) \
182 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
183#define _mm256_mask_cmplt_epi64_mask(k, A, B) \
184 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
185#define _mm256_cmpneq_epi64_mask(A, B) \
186 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
187#define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
188 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
189
190#define _mm_cmpeq_epu64_mask(A, B) \
191 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
192#define _mm_mask_cmpeq_epu64_mask(k, A, B) \
193 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
194#define _mm_cmpge_epu64_mask(A, B) \
195 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
196#define _mm_mask_cmpge_epu64_mask(k, A, B) \
197 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
198#define _mm_cmpgt_epu64_mask(A, B) \
199 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
200#define _mm_mask_cmpgt_epu64_mask(k, A, B) \
201 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
202#define _mm_cmple_epu64_mask(A, B) \
203 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
204#define _mm_mask_cmple_epu64_mask(k, A, B) \
205 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
206#define _mm_cmplt_epu64_mask(A, B) \
207 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
208#define _mm_mask_cmplt_epu64_mask(k, A, B) \
209 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
210#define _mm_cmpneq_epu64_mask(A, B) \
211 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
212#define _mm_mask_cmpneq_epu64_mask(k, A, B) \
213 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
214
215#define _mm256_cmpeq_epu64_mask(A, B) \
216 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
217#define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
218 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
219#define _mm256_cmpge_epu64_mask(A, B) \
220 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
221#define _mm256_mask_cmpge_epu64_mask(k, A, B) \
222 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
223#define _mm256_cmpgt_epu64_mask(A, B) \
224 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
225#define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
226 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
227#define _mm256_cmple_epu64_mask(A, B) \
228 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
229#define _mm256_mask_cmple_epu64_mask(k, A, B) \
230 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
231#define _mm256_cmplt_epu64_mask(A, B) \
232 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
233#define _mm256_mask_cmplt_epu64_mask(k, A, B) \
234 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
235#define _mm256_cmpneq_epu64_mask(A, B) \
236 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
237#define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
238 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
239
240static __inline__ __m256i __DEFAULT_FN_ATTRS256
241_mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
242{
243 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
244 (__v8si)_mm256_add_epi32(__A, __B),
245 (__v8si)__W);
246}
247
248static __inline__ __m256i __DEFAULT_FN_ATTRS256
249_mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
250{
251 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
252 (__v8si)_mm256_add_epi32(__A, __B),
253 (__v8si)_mm256_setzero_si256());
254}
255
256static __inline__ __m256i __DEFAULT_FN_ATTRS256
257_mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
258{
259 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
260 (__v4di)_mm256_add_epi64(__A, __B),
261 (__v4di)__W);
262}
263
264static __inline__ __m256i __DEFAULT_FN_ATTRS256
265_mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
266{
267 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
268 (__v4di)_mm256_add_epi64(__A, __B),
269 (__v4di)_mm256_setzero_si256());
270}
271
272static __inline__ __m256i __DEFAULT_FN_ATTRS256
273_mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
274{
275 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
276 (__v8si)_mm256_sub_epi32(__A, __B),
277 (__v8si)__W);
278}
279
280static __inline__ __m256i __DEFAULT_FN_ATTRS256
281_mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
282{
283 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
284 (__v8si)_mm256_sub_epi32(__A, __B),
285 (__v8si)_mm256_setzero_si256());
286}
287
288static __inline__ __m256i __DEFAULT_FN_ATTRS256
289_mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
290{
291 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
292 (__v4di)_mm256_sub_epi64(__A, __B),
293 (__v4di)__W);
294}
295
296static __inline__ __m256i __DEFAULT_FN_ATTRS256
297_mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
298{
299 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
300 (__v4di)_mm256_sub_epi64(__A, __B),
301 (__v4di)_mm256_setzero_si256());
302}
303
304static __inline__ __m128i __DEFAULT_FN_ATTRS128
305_mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
306{
307 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
308 (__v4si)_mm_add_epi32(__A, __B),
309 (__v4si)__W);
310}
311
312static __inline__ __m128i __DEFAULT_FN_ATTRS128
313_mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
314{
315 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
316 (__v4si)_mm_add_epi32(__A, __B),
317 (__v4si)_mm_setzero_si128());
318}
319
320static __inline__ __m128i __DEFAULT_FN_ATTRS128
321_mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
322{
323 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
324 (__v2di)_mm_add_epi64(__A, __B),
325 (__v2di)__W);
326}
327
328static __inline__ __m128i __DEFAULT_FN_ATTRS128
329_mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
330{
331 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
332 (__v2di)_mm_add_epi64(__A, __B),
333 (__v2di)_mm_setzero_si128());
334}
335
336static __inline__ __m128i __DEFAULT_FN_ATTRS128
337_mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
338{
339 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
340 (__v4si)_mm_sub_epi32(__A, __B),
341 (__v4si)__W);
342}
343
344static __inline__ __m128i __DEFAULT_FN_ATTRS128
345_mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
346{
347 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
348 (__v4si)_mm_sub_epi32(__A, __B),
349 (__v4si)_mm_setzero_si128());
350}
351
352static __inline__ __m128i __DEFAULT_FN_ATTRS128
353_mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
354{
355 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
356 (__v2di)_mm_sub_epi64(__A, __B),
357 (__v2di)__W);
358}
359
360static __inline__ __m128i __DEFAULT_FN_ATTRS128
361_mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
362{
363 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
364 (__v2di)_mm_sub_epi64(__A, __B),
365 (__v2di)_mm_setzero_si128());
366}
367
368static __inline__ __m256i __DEFAULT_FN_ATTRS256
369_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
370{
371 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
372 (__v4di)_mm256_mul_epi32(__X, __Y),
373 (__v4di)__W);
374}
375
376static __inline__ __m256i __DEFAULT_FN_ATTRS256
377_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
378{
379 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
380 (__v4di)_mm256_mul_epi32(__X, __Y),
381 (__v4di)_mm256_setzero_si256());
382}
383
384static __inline__ __m128i __DEFAULT_FN_ATTRS128
385_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
386{
387 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
388 (__v2di)_mm_mul_epi32(__X, __Y),
389 (__v2di)__W);
390}
391
392static __inline__ __m128i __DEFAULT_FN_ATTRS128
393_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
394{
395 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
396 (__v2di)_mm_mul_epi32(__X, __Y),
397 (__v2di)_mm_setzero_si128());
398}
399
400static __inline__ __m256i __DEFAULT_FN_ATTRS256
401_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
402{
403 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
404 (__v4di)_mm256_mul_epu32(__X, __Y),
405 (__v4di)__W);
406}
407
408static __inline__ __m256i __DEFAULT_FN_ATTRS256
409_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
410{
411 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
412 (__v4di)_mm256_mul_epu32(__X, __Y),
413 (__v4di)_mm256_setzero_si256());
414}
415
416static __inline__ __m128i __DEFAULT_FN_ATTRS128
417_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
418{
419 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
420 (__v2di)_mm_mul_epu32(__X, __Y),
421 (__v2di)__W);
422}
423
424static __inline__ __m128i __DEFAULT_FN_ATTRS128
425_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
426{
427 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
428 (__v2di)_mm_mul_epu32(__X, __Y),
429 (__v2di)_mm_setzero_si128());
430}
431
432static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
433_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
434 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
435 (__v8si)_mm256_mullo_epi32(__A, __B),
436 (__v8si)_mm256_setzero_si256());
437}
438
439static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
440_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
441 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
442 (__v8si)_mm256_mullo_epi32(__A, __B),
443 (__v8si)__W);
444}
445
446static __inline__ __m128i __DEFAULT_FN_ATTRS128
447_mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
448{
449 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
450 (__v4si)_mm_mullo_epi32(__A, __B),
451 (__v4si)_mm_setzero_si128());
452}
453
454static __inline__ __m128i __DEFAULT_FN_ATTRS128
455_mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
456{
457 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
458 (__v4si)_mm_mullo_epi32(__A, __B),
459 (__v4si)__W);
460}
461
462static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
463_mm256_and_epi32(__m256i __a, __m256i __b) {
464 return (__m256i)((__v8su)__a & (__v8su)__b);
465}
466
467static __inline__ __m256i __DEFAULT_FN_ATTRS256
468_mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
469{
470 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
471 (__v8si)_mm256_and_epi32(__A, __B),
472 (__v8si)__W);
473}
474
475static __inline__ __m256i __DEFAULT_FN_ATTRS256
476_mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
477{
478 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
479}
480
481static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
482_mm_and_epi32(__m128i __a, __m128i __b) {
483 return (__m128i)((__v4su)__a & (__v4su)__b);
484}
485
486static __inline__ __m128i __DEFAULT_FN_ATTRS128
487_mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
488{
489 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
490 (__v4si)_mm_and_epi32(__A, __B),
491 (__v4si)__W);
492}
493
494static __inline__ __m128i __DEFAULT_FN_ATTRS128
495_mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
496{
497 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
498}
499
500static __inline__ __m256i __DEFAULT_FN_ATTRS256
501_mm256_andnot_epi32(__m256i __A, __m256i __B)
502{
503 return (__m256i)(~(__v8su)__A & (__v8su)__B);
504}
505
506static __inline__ __m256i __DEFAULT_FN_ATTRS256
507_mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
508{
509 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
510 (__v8si)_mm256_andnot_epi32(__A, __B),
511 (__v8si)__W);
512}
513
514static __inline__ __m256i __DEFAULT_FN_ATTRS256
515_mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
516{
518 __U, __A, __B);
519}
520
521static __inline__ __m128i __DEFAULT_FN_ATTRS128
522_mm_andnot_epi32(__m128i __A, __m128i __B)
523{
524 return (__m128i)(~(__v4su)__A & (__v4su)__B);
525}
526
527static __inline__ __m128i __DEFAULT_FN_ATTRS128
528_mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
529{
530 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
531 (__v4si)_mm_andnot_epi32(__A, __B),
532 (__v4si)__W);
533}
534
535static __inline__ __m128i __DEFAULT_FN_ATTRS128
536_mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
537{
538 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
539}
540
541static __inline__ __m256i __DEFAULT_FN_ATTRS256
542_mm256_or_epi32(__m256i __a, __m256i __b)
543{
544 return (__m256i)((__v8su)__a | (__v8su)__b);
545}
546
547static __inline__ __m256i __DEFAULT_FN_ATTRS256
548_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
549{
550 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
551 (__v8si)_mm256_or_epi32(__A, __B),
552 (__v8si)__W);
553}
554
555static __inline__ __m256i __DEFAULT_FN_ATTRS256
556_mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
557{
558 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
559}
560
561static __inline__ __m128i __DEFAULT_FN_ATTRS128
562_mm_or_epi32(__m128i __a, __m128i __b)
563{
564 return (__m128i)((__v4su)__a | (__v4su)__b);
565}
566
567static __inline__ __m128i __DEFAULT_FN_ATTRS128
568_mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
569{
570 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
571 (__v4si)_mm_or_epi32(__A, __B),
572 (__v4si)__W);
573}
574
575static __inline__ __m128i __DEFAULT_FN_ATTRS128
576_mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
577{
578 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
579}
580
581static __inline__ __m256i __DEFAULT_FN_ATTRS256
582_mm256_xor_epi32(__m256i __a, __m256i __b)
583{
584 return (__m256i)((__v8su)__a ^ (__v8su)__b);
585}
586
587static __inline__ __m256i __DEFAULT_FN_ATTRS256
588_mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
589{
590 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
591 (__v8si)_mm256_xor_epi32(__A, __B),
592 (__v8si)__W);
593}
594
595static __inline__ __m256i __DEFAULT_FN_ATTRS256
596_mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
597{
598 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
599}
600
601static __inline__ __m128i __DEFAULT_FN_ATTRS128
602_mm_xor_epi32(__m128i __a, __m128i __b)
603{
604 return (__m128i)((__v4su)__a ^ (__v4su)__b);
605}
606
607static __inline__ __m128i __DEFAULT_FN_ATTRS128
608_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
609{
610 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
611 (__v4si)_mm_xor_epi32(__A, __B),
612 (__v4si)__W);
613}
614
615static __inline__ __m128i __DEFAULT_FN_ATTRS128
616_mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
617{
618 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
619}
620
621static __inline__ __m256i __DEFAULT_FN_ATTRS256
622_mm256_and_epi64(__m256i __a, __m256i __b)
623{
624 return (__m256i)((__v4du)__a & (__v4du)__b);
625}
626
627static __inline__ __m256i __DEFAULT_FN_ATTRS256
628_mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
629{
630 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
631 (__v4di)_mm256_and_epi64(__A, __B),
632 (__v4di)__W);
633}
634
635static __inline__ __m256i __DEFAULT_FN_ATTRS256
636_mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
637{
638 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
639}
640
641static __inline__ __m128i __DEFAULT_FN_ATTRS128
642_mm_and_epi64(__m128i __a, __m128i __b)
643{
644 return (__m128i)((__v2du)__a & (__v2du)__b);
645}
646
647static __inline__ __m128i __DEFAULT_FN_ATTRS128
648_mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
649{
650 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
651 (__v2di)_mm_and_epi64(__A, __B),
652 (__v2di)__W);
653}
654
655static __inline__ __m128i __DEFAULT_FN_ATTRS128
656_mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
657{
658 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
659}
660
661static __inline__ __m256i __DEFAULT_FN_ATTRS256
662_mm256_andnot_epi64(__m256i __A, __m256i __B)
663{
664 return (__m256i)(~(__v4du)__A & (__v4du)__B);
665}
666
667static __inline__ __m256i __DEFAULT_FN_ATTRS256
668_mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
669{
670 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
671 (__v4di)_mm256_andnot_epi64(__A, __B),
672 (__v4di)__W);
673}
674
675static __inline__ __m256i __DEFAULT_FN_ATTRS256
676_mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
677{
679 __U, __A, __B);
680}
681
682static __inline__ __m128i __DEFAULT_FN_ATTRS128
683_mm_andnot_epi64(__m128i __A, __m128i __B)
684{
685 return (__m128i)(~(__v2du)__A & (__v2du)__B);
686}
687
688static __inline__ __m128i __DEFAULT_FN_ATTRS128
689_mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
690{
691 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
692 (__v2di)_mm_andnot_epi64(__A, __B),
693 (__v2di)__W);
694}
695
696static __inline__ __m128i __DEFAULT_FN_ATTRS128
697_mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
698{
699 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
700}
701
702static __inline__ __m256i __DEFAULT_FN_ATTRS256
703_mm256_or_epi64(__m256i __a, __m256i __b)
704{
705 return (__m256i)((__v4du)__a | (__v4du)__b);
706}
707
708static __inline__ __m256i __DEFAULT_FN_ATTRS256
709_mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
710{
711 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
712 (__v4di)_mm256_or_epi64(__A, __B),
713 (__v4di)__W);
714}
715
716static __inline__ __m256i __DEFAULT_FN_ATTRS256
717_mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
718{
719 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
720}
721
722static __inline__ __m128i __DEFAULT_FN_ATTRS128
723_mm_or_epi64(__m128i __a, __m128i __b)
724{
725 return (__m128i)((__v2du)__a | (__v2du)__b);
726}
727
728static __inline__ __m128i __DEFAULT_FN_ATTRS128
729_mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
730{
731 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
732 (__v2di)_mm_or_epi64(__A, __B),
733 (__v2di)__W);
734}
735
736static __inline__ __m128i __DEFAULT_FN_ATTRS128
737_mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
738{
739 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
740}
741
742static __inline__ __m256i __DEFAULT_FN_ATTRS256
743_mm256_xor_epi64(__m256i __a, __m256i __b)
744{
745 return (__m256i)((__v4du)__a ^ (__v4du)__b);
746}
747
748static __inline__ __m256i __DEFAULT_FN_ATTRS256
749_mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
750{
751 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
752 (__v4di)_mm256_xor_epi64(__A, __B),
753 (__v4di)__W);
754}
755
756static __inline__ __m256i __DEFAULT_FN_ATTRS256
757_mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
758{
759 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
760}
761
762static __inline__ __m128i __DEFAULT_FN_ATTRS128
763_mm_xor_epi64(__m128i __a, __m128i __b)
764{
765 return (__m128i)((__v2du)__a ^ (__v2du)__b);
766}
767
768static __inline__ __m128i __DEFAULT_FN_ATTRS128
769_mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
770 __m128i __B)
771{
772 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
773 (__v2di)_mm_xor_epi64(__A, __B),
774 (__v2di)__W);
775}
776
777static __inline__ __m128i __DEFAULT_FN_ATTRS128
778_mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
779{
780 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
781}
782
783#define _mm_cmp_epi32_mask(a, b, p) \
784 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
785 (__v4si)(__m128i)(b), (int)(p), \
786 (__mmask8)-1))
787
788#define _mm_mask_cmp_epi32_mask(m, a, b, p) \
789 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
790 (__v4si)(__m128i)(b), (int)(p), \
791 (__mmask8)(m)))
792
793#define _mm_cmp_epu32_mask(a, b, p) \
794 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
795 (__v4si)(__m128i)(b), (int)(p), \
796 (__mmask8)-1))
797
798#define _mm_mask_cmp_epu32_mask(m, a, b, p) \
799 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
800 (__v4si)(__m128i)(b), (int)(p), \
801 (__mmask8)(m)))
802
803#define _mm256_cmp_epi32_mask(a, b, p) \
804 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
805 (__v8si)(__m256i)(b), (int)(p), \
806 (__mmask8)-1))
807
808#define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
809 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
810 (__v8si)(__m256i)(b), (int)(p), \
811 (__mmask8)(m)))
812
813#define _mm256_cmp_epu32_mask(a, b, p) \
814 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
815 (__v8si)(__m256i)(b), (int)(p), \
816 (__mmask8)-1))
817
818#define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
819 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
820 (__v8si)(__m256i)(b), (int)(p), \
821 (__mmask8)(m)))
822
823#define _mm_cmp_epi64_mask(a, b, p) \
824 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
825 (__v2di)(__m128i)(b), (int)(p), \
826 (__mmask8)-1))
827
828#define _mm_mask_cmp_epi64_mask(m, a, b, p) \
829 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
830 (__v2di)(__m128i)(b), (int)(p), \
831 (__mmask8)(m)))
832
833#define _mm_cmp_epu64_mask(a, b, p) \
834 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
835 (__v2di)(__m128i)(b), (int)(p), \
836 (__mmask8)-1))
837
838#define _mm_mask_cmp_epu64_mask(m, a, b, p) \
839 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
840 (__v2di)(__m128i)(b), (int)(p), \
841 (__mmask8)(m)))
842
843#define _mm256_cmp_epi64_mask(a, b, p) \
844 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
845 (__v4di)(__m256i)(b), (int)(p), \
846 (__mmask8)-1))
847
848#define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
849 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
850 (__v4di)(__m256i)(b), (int)(p), \
851 (__mmask8)(m)))
852
853#define _mm256_cmp_epu64_mask(a, b, p) \
854 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
855 (__v4di)(__m256i)(b), (int)(p), \
856 (__mmask8)-1))
857
858#define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
859 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
860 (__v4di)(__m256i)(b), (int)(p), \
861 (__mmask8)(m)))
862
863#define _mm256_cmp_ps_mask(a, b, p) \
864 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
865 (__v8sf)(__m256)(b), (int)(p), \
866 (__mmask8)-1))
867
868#define _mm256_mask_cmp_ps_mask(m, a, b, p) \
869 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
870 (__v8sf)(__m256)(b), (int)(p), \
871 (__mmask8)(m)))
872
873#define _mm256_cmp_pd_mask(a, b, p) \
874 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
875 (__v4df)(__m256d)(b), (int)(p), \
876 (__mmask8)-1))
877
878#define _mm256_mask_cmp_pd_mask(m, a, b, p) \
879 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
880 (__v4df)(__m256d)(b), (int)(p), \
881 (__mmask8)(m)))
882
883#define _mm_cmp_ps_mask(a, b, p) \
884 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
885 (__v4sf)(__m128)(b), (int)(p), \
886 (__mmask8)-1))
887
888#define _mm_mask_cmp_ps_mask(m, a, b, p) \
889 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
890 (__v4sf)(__m128)(b), (int)(p), \
891 (__mmask8)(m)))
892
893#define _mm_cmp_pd_mask(a, b, p) \
894 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
895 (__v2df)(__m128d)(b), (int)(p), \
896 (__mmask8)-1))
897
898#define _mm_mask_cmp_pd_mask(m, a, b, p) \
899 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
900 (__v2df)(__m128d)(b), (int)(p), \
901 (__mmask8)(m)))
902
903static __inline__ __m128d __DEFAULT_FN_ATTRS128
904_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
905{
906 return (__m128d)__builtin_ia32_selectpd_128(
907 (__mmask8)__U,
908 __builtin_elementwise_fma((__v2df)__A, (__v2df)__B, (__v2df)__C),
909 (__v2df)__A);
910}
911
912static __inline__ __m128d __DEFAULT_FN_ATTRS128
913_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
914{
915 return (__m128d)__builtin_ia32_selectpd_128(
916 (__mmask8)__U,
917 __builtin_elementwise_fma((__v2df)__A, (__v2df)__B, (__v2df)__C),
918 (__v2df)__C);
919}
920
921static __inline__ __m128d __DEFAULT_FN_ATTRS128
922_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
923{
924 return (__m128d)__builtin_ia32_selectpd_128(
925 (__mmask8)__U,
926 __builtin_elementwise_fma((__v2df)__A, (__v2df)__B, (__v2df)__C),
927 (__v2df)_mm_setzero_pd());
928}
929
930static __inline__ __m128d __DEFAULT_FN_ATTRS128
931_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
932{
933 return (__m128d)__builtin_ia32_selectpd_128(
934 (__mmask8)__U,
935 __builtin_elementwise_fma((__v2df)__A, (__v2df)__B, -(__v2df)__C),
936 (__v2df)__A);
937}
938
939static __inline__ __m128d __DEFAULT_FN_ATTRS128
940_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
941{
942 return (__m128d)__builtin_ia32_selectpd_128(
943 (__mmask8)__U,
944 __builtin_elementwise_fma((__v2df)__A, (__v2df)__B, -(__v2df)__C),
945 (__v2df)_mm_setzero_pd());
946}
947
948static __inline__ __m128d __DEFAULT_FN_ATTRS128
949_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
950{
951 return (__m128d)__builtin_ia32_selectpd_128(
952 (__mmask8)__U,
953 __builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, (__v2df)__C),
954 (__v2df)__C);
955}
956
957static __inline__ __m128d __DEFAULT_FN_ATTRS128
958_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
959{
960 return (__m128d)__builtin_ia32_selectpd_128(
961 (__mmask8)__U,
962 __builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, (__v2df)__C),
963 (__v2df)_mm_setzero_pd());
964}
965
966static __inline__ __m128d __DEFAULT_FN_ATTRS128
967_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
968{
969 return (__m128d)__builtin_ia32_selectpd_128(
970 (__mmask8)__U,
971 __builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B, -(__v2df)__C),
972 (__v2df)_mm_setzero_pd());
973}
974
975static __inline__ __m256d __DEFAULT_FN_ATTRS256
976_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
977{
978 return (__m256d)__builtin_ia32_selectpd_256(
979 (__mmask8)__U,
980 __builtin_elementwise_fma((__v4df)__A, (__v4df)__B, (__v4df)__C),
981 (__v4df)__A);
982}
983
984static __inline__ __m256d __DEFAULT_FN_ATTRS256
985_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
986{
987 return (__m256d)__builtin_ia32_selectpd_256(
988 (__mmask8)__U,
989 __builtin_elementwise_fma((__v4df)__A, (__v4df)__B, (__v4df)__C),
990 (__v4df)__C);
991}
992
993static __inline__ __m256d __DEFAULT_FN_ATTRS256
994_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
995{
996 return (__m256d)__builtin_ia32_selectpd_256(
997 (__mmask8)__U,
998 __builtin_elementwise_fma((__v4df)__A, (__v4df)__B, (__v4df)__C),
999 (__v4df)_mm256_setzero_pd());
1000}
1001
1002static __inline__ __m256d __DEFAULT_FN_ATTRS256
1003_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1004{
1005 return (__m256d)__builtin_ia32_selectpd_256(
1006 (__mmask8)__U,
1007 __builtin_elementwise_fma((__v4df)__A, (__v4df)__B, -(__v4df)__C),
1008 (__v4df)__A);
1009}
1010
1011static __inline__ __m256d __DEFAULT_FN_ATTRS256
1012_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1013{
1014 return (__m256d)__builtin_ia32_selectpd_256(
1015 (__mmask8)__U,
1016 __builtin_elementwise_fma((__v4df)__A, (__v4df)__B, -(__v4df)__C),
1017 (__v4df)_mm256_setzero_pd());
1018}
1019
1020static __inline__ __m256d __DEFAULT_FN_ATTRS256
1021_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1022{
1023 return (__m256d)__builtin_ia32_selectpd_256(
1024 (__mmask8)__U,
1025 __builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, (__v4df)__C),
1026 (__v4df)__C);
1027}
1028
1029static __inline__ __m256d __DEFAULT_FN_ATTRS256
1030_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1031{
1032 return (__m256d)__builtin_ia32_selectpd_256(
1033 (__mmask8)__U,
1034 __builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, (__v4df)__C),
1035 (__v4df)_mm256_setzero_pd());
1036}
1037
1038static __inline__ __m256d __DEFAULT_FN_ATTRS256
1039_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1040{
1041 return (__m256d)__builtin_ia32_selectpd_256(
1042 (__mmask8)__U,
1043 __builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B, -(__v4df)__C),
1044 (__v4df)_mm256_setzero_pd());
1045}
1046
1047static __inline__ __m128 __DEFAULT_FN_ATTRS128
1048_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1049{
1050 return (__m128)__builtin_ia32_selectps_128(
1051 (__mmask8)__U,
1052 __builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, (__v4sf)__C),
1053 (__v4sf)__A);
1054}
1055
1056static __inline__ __m128 __DEFAULT_FN_ATTRS128
1057_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1058{
1059 return (__m128)__builtin_ia32_selectps_128(
1060 (__mmask8)__U,
1061 __builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, (__v4sf)__C),
1062 (__v4sf)__C);
1063}
1064
1065static __inline__ __m128 __DEFAULT_FN_ATTRS128
1066_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1067{
1068 return (__m128)__builtin_ia32_selectps_128(
1069 (__mmask8)__U,
1070 __builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, (__v4sf)__C),
1071 (__v4sf)_mm_setzero_ps());
1072}
1073
1074static __inline__ __m128 __DEFAULT_FN_ATTRS128
1075_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1076{
1077 return (__m128)__builtin_ia32_selectps_128(
1078 (__mmask8)__U,
1079 __builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C),
1080 (__v4sf)__A);
1081}
1082
1083static __inline__ __m128 __DEFAULT_FN_ATTRS128
1084_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1085{
1086 return (__m128)__builtin_ia32_selectps_128(
1087 (__mmask8)__U,
1088 __builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C),
1089 (__v4sf)_mm_setzero_ps());
1090}
1091
1092static __inline__ __m128 __DEFAULT_FN_ATTRS128
1093_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1094{
1095 return (__m128)__builtin_ia32_selectps_128(
1096 (__mmask8)__U,
1097 __builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C),
1098 (__v4sf)__C);
1099}
1100
1101static __inline__ __m128 __DEFAULT_FN_ATTRS128
1102_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1103{
1104 return (__m128)__builtin_ia32_selectps_128(
1105 (__mmask8)__U,
1106 __builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C),
1107 (__v4sf)_mm_setzero_ps());
1108}
1109
1110static __inline__ __m128 __DEFAULT_FN_ATTRS128
1111_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1112{
1113 return (__m128)__builtin_ia32_selectps_128(
1114 (__mmask8)__U,
1115 __builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C),
1116 (__v4sf)_mm_setzero_ps());
1117}
1118
1119static __inline__ __m256 __DEFAULT_FN_ATTRS256
1120_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1121{
1122 return (__m256)__builtin_ia32_selectps_256(
1123 (__mmask8)__U,
1124 __builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, (__v8sf)__C),
1125 (__v8sf)__A);
1126}
1127
1128static __inline__ __m256 __DEFAULT_FN_ATTRS256
1129_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1130{
1131 return (__m256)__builtin_ia32_selectps_256(
1132 (__mmask8)__U,
1133 __builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, (__v8sf)__C),
1134 (__v8sf)__C);
1135}
1136
1137static __inline__ __m256 __DEFAULT_FN_ATTRS256
1138_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1139{
1140 return (__m256)__builtin_ia32_selectps_256(
1141 (__mmask8)__U,
1142 __builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, (__v8sf)__C),
1143 (__v8sf)_mm256_setzero_ps());
1144}
1145
1146static __inline__ __m256 __DEFAULT_FN_ATTRS256
1147_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1148{
1149 return (__m256)__builtin_ia32_selectps_256(
1150 (__mmask8)__U,
1151 __builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C),
1152 (__v8sf)__A);
1153}
1154
1155static __inline__ __m256 __DEFAULT_FN_ATTRS256
1156_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1157{
1158 return (__m256)__builtin_ia32_selectps_256(
1159 (__mmask8)__U,
1160 __builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C),
1161 (__v8sf)_mm256_setzero_ps());
1162}
1163
1164static __inline__ __m256 __DEFAULT_FN_ATTRS256
1165_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1166{
1167 return (__m256)__builtin_ia32_selectps_256(
1168 (__mmask8)__U,
1169 __builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C),
1170 (__v8sf)__C);
1171}
1172
1173static __inline__ __m256 __DEFAULT_FN_ATTRS256
1174_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1175{
1176 return (__m256)__builtin_ia32_selectps_256(
1177 (__mmask8)__U,
1178 __builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C),
1179 (__v8sf)_mm256_setzero_ps());
1180}
1181
1182static __inline__ __m256 __DEFAULT_FN_ATTRS256
1183_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1184{
1185 return (__m256)__builtin_ia32_selectps_256(
1186 (__mmask8)__U,
1187 __builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C),
1188 (__v8sf)_mm256_setzero_ps());
1189}
1190
1191static __inline__ __m128d __DEFAULT_FN_ATTRS128
1192_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1193{
1194 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1195 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1196 (__v2df) __B,
1197 (__v2df) __C),
1198 (__v2df) __A);
1199}
1200
1201static __inline__ __m128d __DEFAULT_FN_ATTRS128
1202_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1203{
1204 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1205 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1206 (__v2df) __B,
1207 (__v2df) __C),
1208 (__v2df) __C);
1209}
1210
1211static __inline__ __m128d __DEFAULT_FN_ATTRS128
1212_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1213{
1214 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1215 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1216 (__v2df) __B,
1217 (__v2df) __C),
1218 (__v2df)_mm_setzero_pd());
1219}
1220
1221static __inline__ __m128d __DEFAULT_FN_ATTRS128
1222_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1223{
1224 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1225 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1226 (__v2df) __B,
1227 -(__v2df) __C),
1228 (__v2df) __A);
1229}
1230
1231static __inline__ __m128d __DEFAULT_FN_ATTRS128
1232_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1233{
1234 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1235 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1236 (__v2df) __B,
1237 -(__v2df) __C),
1238 (__v2df)_mm_setzero_pd());
1239}
1240
1241static __inline__ __m256d __DEFAULT_FN_ATTRS256
1242_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1243{
1244 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1245 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1246 (__v4df) __B,
1247 (__v4df) __C),
1248 (__v4df) __A);
1249}
1250
1251static __inline__ __m256d __DEFAULT_FN_ATTRS256
1252_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1253{
1254 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1255 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1256 (__v4df) __B,
1257 (__v4df) __C),
1258 (__v4df) __C);
1259}
1260
1261static __inline__ __m256d __DEFAULT_FN_ATTRS256
1262_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1263{
1264 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1265 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1266 (__v4df) __B,
1267 (__v4df) __C),
1268 (__v4df)_mm256_setzero_pd());
1269}
1270
1271static __inline__ __m256d __DEFAULT_FN_ATTRS256
1272_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1273{
1274 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1275 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1276 (__v4df) __B,
1277 -(__v4df) __C),
1278 (__v4df) __A);
1279}
1280
1281static __inline__ __m256d __DEFAULT_FN_ATTRS256
1282_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1283{
1284 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1285 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1286 (__v4df) __B,
1287 -(__v4df) __C),
1288 (__v4df)_mm256_setzero_pd());
1289}
1290
1291static __inline__ __m128 __DEFAULT_FN_ATTRS128
1292_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1293{
1294 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1295 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1296 (__v4sf) __B,
1297 (__v4sf) __C),
1298 (__v4sf) __A);
1299}
1300
1301static __inline__ __m128 __DEFAULT_FN_ATTRS128
1302_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1303{
1304 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1305 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1306 (__v4sf) __B,
1307 (__v4sf) __C),
1308 (__v4sf) __C);
1309}
1310
1311static __inline__ __m128 __DEFAULT_FN_ATTRS128
1312_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1313{
1314 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1315 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1316 (__v4sf) __B,
1317 (__v4sf) __C),
1318 (__v4sf)_mm_setzero_ps());
1319}
1320
1321static __inline__ __m128 __DEFAULT_FN_ATTRS128
1322_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1323{
1324 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1325 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1326 (__v4sf) __B,
1327 -(__v4sf) __C),
1328 (__v4sf) __A);
1329}
1330
1331static __inline__ __m128 __DEFAULT_FN_ATTRS128
1332_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1333{
1334 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1335 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1336 (__v4sf) __B,
1337 -(__v4sf) __C),
1338 (__v4sf)_mm_setzero_ps());
1339}
1340
1341static __inline__ __m256 __DEFAULT_FN_ATTRS256
1342_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
1343 __m256 __C)
1344{
1345 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1346 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1347 (__v8sf) __B,
1348 (__v8sf) __C),
1349 (__v8sf) __A);
1350}
1351
1352static __inline__ __m256 __DEFAULT_FN_ATTRS256
1353_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1354{
1355 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1356 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1357 (__v8sf) __B,
1358 (__v8sf) __C),
1359 (__v8sf) __C);
1360}
1361
1362static __inline__ __m256 __DEFAULT_FN_ATTRS256
1363_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1364{
1365 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1366 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1367 (__v8sf) __B,
1368 (__v8sf) __C),
1369 (__v8sf)_mm256_setzero_ps());
1370}
1371
1372static __inline__ __m256 __DEFAULT_FN_ATTRS256
1373_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1374{
1375 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1376 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1377 (__v8sf) __B,
1378 -(__v8sf) __C),
1379 (__v8sf) __A);
1380}
1381
1382static __inline__ __m256 __DEFAULT_FN_ATTRS256
1383_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1384{
1385 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1386 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1387 (__v8sf) __B,
1388 -(__v8sf) __C),
1389 (__v8sf)_mm256_setzero_ps());
1390}
1391
1392static __inline__ __m128d __DEFAULT_FN_ATTRS128
1393_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1394{
1395 return (__m128d)__builtin_ia32_selectpd_128(
1396 (__mmask8)__U,
1397 __builtin_elementwise_fma((__v2df)__A, (__v2df)__B, -(__v2df)__C),
1398 (__v2df)__C);
1399}
1400
1401static __inline__ __m256d __DEFAULT_FN_ATTRS256
1402_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1403{
1404 return (__m256d)__builtin_ia32_selectpd_256(
1405 (__mmask8)__U,
1406 __builtin_elementwise_fma((__v4df)__A, (__v4df)__B, -(__v4df)__C),
1407 (__v4df)__C);
1408}
1409
1410static __inline__ __m128 __DEFAULT_FN_ATTRS128
1411_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1412{
1413 return (__m128)__builtin_ia32_selectps_128(
1414 (__mmask8)__U,
1415 __builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C),
1416 (__v4sf)__C);
1417}
1418
1419static __inline__ __m256 __DEFAULT_FN_ATTRS256
1420_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1421{
1422 return (__m256)__builtin_ia32_selectps_256(
1423 (__mmask8)__U,
1424 __builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C),
1425 (__v8sf)__C);
1426}
1427
1428static __inline__ __m128d __DEFAULT_FN_ATTRS128
1429_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1430{
1431 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1432 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1433 (__v2df) __B,
1434 -(__v2df) __C),
1435 (__v2df) __C);
1436}
1437
1438static __inline__ __m256d __DEFAULT_FN_ATTRS256
1439_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1440{
1441 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1442 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1443 (__v4df) __B,
1444 -(__v4df) __C),
1445 (__v4df) __C);
1446}
1447
1448static __inline__ __m128 __DEFAULT_FN_ATTRS128
1449_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1450{
1451 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1452 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1453 (__v4sf) __B,
1454 -(__v4sf) __C),
1455 (__v4sf) __C);
1456}
1457
1458static __inline__ __m256 __DEFAULT_FN_ATTRS256
1459_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1460{
1461 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1462 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1463 (__v8sf) __B,
1464 -(__v8sf) __C),
1465 (__v8sf) __C);
1466}
1467
1468static __inline__ __m128d __DEFAULT_FN_ATTRS128
1469_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1470{
1471 return (__m128d)__builtin_ia32_selectpd_128(
1472 (__mmask8)__U,
1473 __builtin_elementwise_fma((__v2df)__A, -(__v2df)__B, (__v2df)__C),
1474 (__v2df)__A);
1475}
1476
1477static __inline__ __m256d __DEFAULT_FN_ATTRS256
1478_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1479{
1480 return (__m256d)__builtin_ia32_selectpd_256(
1481 (__mmask8)__U,
1482 __builtin_elementwise_fma((__v4df)__A, -(__v4df)__B, (__v4df)__C),
1483 (__v4df)__A);
1484}
1485
1486static __inline__ __m128 __DEFAULT_FN_ATTRS128
1487_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1488{
1489 return (__m128)__builtin_ia32_selectps_128(
1490 (__mmask8)__U,
1491 __builtin_elementwise_fma((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C),
1492 (__v4sf)__A);
1493}
1494
1495static __inline__ __m256 __DEFAULT_FN_ATTRS256
1496_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1497{
1498 return (__m256)__builtin_ia32_selectps_256(
1499 (__mmask8)__U,
1500 __builtin_elementwise_fma((__v8sf)__A, -(__v8sf)__B, (__v8sf)__C),
1501 (__v8sf)__A);
1502}
1503
1504static __inline__ __m128d __DEFAULT_FN_ATTRS128
1505_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1506{
1507 return (__m128d)__builtin_ia32_selectpd_128(
1508 (__mmask8)__U,
1509 __builtin_elementwise_fma((__v2df)__A, -(__v2df)__B, -(__v2df)__C),
1510 (__v2df)__A);
1511}
1512
1513static __inline__ __m128d __DEFAULT_FN_ATTRS128
1514_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1515{
1516 return (__m128d)__builtin_ia32_selectpd_128(
1517 (__mmask8)__U,
1518 __builtin_elementwise_fma((__v2df)__A, -(__v2df)__B, -(__v2df)__C),
1519 (__v2df)__C);
1520}
1521
1522static __inline__ __m256d __DEFAULT_FN_ATTRS256
1523_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1524{
1525 return (__m256d)__builtin_ia32_selectpd_256(
1526 (__mmask8)__U,
1527 __builtin_elementwise_fma((__v4df)__A, -(__v4df)__B, -(__v4df)__C),
1528 (__v4df)__A);
1529}
1530
1531static __inline__ __m256d __DEFAULT_FN_ATTRS256
1532_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1533{
1534 return (__m256d)__builtin_ia32_selectpd_256(
1535 (__mmask8)__U,
1536 __builtin_elementwise_fma((__v4df)__A, -(__v4df)__B, -(__v4df)__C),
1537 (__v4df)__C);
1538}
1539
1540static __inline__ __m128 __DEFAULT_FN_ATTRS128
1541_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1542{
1543 return (__m128)__builtin_ia32_selectps_128(
1544 (__mmask8)__U,
1545 __builtin_elementwise_fma((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C),
1546 (__v4sf)__A);
1547}
1548
1549static __inline__ __m128 __DEFAULT_FN_ATTRS128
1550_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1551{
1552 return (__m128)__builtin_ia32_selectps_128(
1553 (__mmask8)__U,
1554 __builtin_elementwise_fma((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C),
1555 (__v4sf)__C);
1556}
1557
1558static __inline__ __m256 __DEFAULT_FN_ATTRS256
1559_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1560{
1561 return (__m256)__builtin_ia32_selectps_256(
1562 (__mmask8)__U,
1563 __builtin_elementwise_fma((__v8sf)__A, -(__v8sf)__B, -(__v8sf)__C),
1564 (__v8sf)__A);
1565}
1566
1567static __inline__ __m256 __DEFAULT_FN_ATTRS256
1568_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1569{
1570 return (__m256)__builtin_ia32_selectps_256(
1571 (__mmask8)__U,
1572 __builtin_elementwise_fma((__v8sf)__A, -(__v8sf)__B, -(__v8sf)__C),
1573 (__v8sf)__C);
1574}
1575
1576static __inline__ __m128d __DEFAULT_FN_ATTRS128
1577_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1578 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1579 (__v2df)_mm_add_pd(__A, __B),
1580 (__v2df)__W);
1581}
1582
1583static __inline__ __m128d __DEFAULT_FN_ATTRS128
1584_mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
1585 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1586 (__v2df)_mm_add_pd(__A, __B),
1587 (__v2df)_mm_setzero_pd());
1588}
1589
1590static __inline__ __m256d __DEFAULT_FN_ATTRS256
1591_mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
1592 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1593 (__v4df)_mm256_add_pd(__A, __B),
1594 (__v4df)__W);
1595}
1596
1597static __inline__ __m256d __DEFAULT_FN_ATTRS256
1598_mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
1599 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1600 (__v4df)_mm256_add_pd(__A, __B),
1601 (__v4df)_mm256_setzero_pd());
1602}
1603
1604static __inline__ __m128 __DEFAULT_FN_ATTRS128
1605_mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1606 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1607 (__v4sf)_mm_add_ps(__A, __B),
1608 (__v4sf)__W);
1609}
1610
1611static __inline__ __m128 __DEFAULT_FN_ATTRS128
1612_mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
1613 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1614 (__v4sf)_mm_add_ps(__A, __B),
1615 (__v4sf)_mm_setzero_ps());
1616}
1617
1618static __inline__ __m256 __DEFAULT_FN_ATTRS256
1619_mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
1620 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1621 (__v8sf)_mm256_add_ps(__A, __B),
1622 (__v8sf)__W);
1623}
1624
1625static __inline__ __m256 __DEFAULT_FN_ATTRS256
1626_mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
1627 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1628 (__v8sf)_mm256_add_ps(__A, __B),
1629 (__v8sf)_mm256_setzero_ps());
1630}
1631
1632static __inline__ __m128i __DEFAULT_FN_ATTRS128
1633_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
1634 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
1635 (__v4si) __W,
1636 (__v4si) __A);
1637}
1638
1639static __inline__ __m256i __DEFAULT_FN_ATTRS256
1640_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
1641 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
1642 (__v8si) __W,
1643 (__v8si) __A);
1644}
1645
1646static __inline__ __m128d __DEFAULT_FN_ATTRS128
1647_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
1648 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
1649 (__v2df) __W,
1650 (__v2df) __A);
1651}
1652
1653static __inline__ __m256d __DEFAULT_FN_ATTRS256
1654_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
1655 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
1656 (__v4df) __W,
1657 (__v4df) __A);
1658}
1659
1660static __inline__ __m128 __DEFAULT_FN_ATTRS128
1661_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
1662 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
1663 (__v4sf) __W,
1664 (__v4sf) __A);
1665}
1666
1667static __inline__ __m256 __DEFAULT_FN_ATTRS256
1668_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
1669 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
1670 (__v8sf) __W,
1671 (__v8sf) __A);
1672}
1673
1674static __inline__ __m128i __DEFAULT_FN_ATTRS128
1675_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
1676 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
1677 (__v2di) __W,
1678 (__v2di) __A);
1679}
1680
1681static __inline__ __m256i __DEFAULT_FN_ATTRS256
1682_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
1683 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
1684 (__v4di) __W,
1685 (__v4di) __A);
1686}
1687
1688static __inline__ __m128d __DEFAULT_FN_ATTRS128
1689_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
1690 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1691 (__v2df) __W,
1692 (__mmask8) __U);
1693}
1694
1695static __inline__ __m128d __DEFAULT_FN_ATTRS128
1697 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1698 (__v2df)
1699 _mm_setzero_pd (),
1700 (__mmask8) __U);
1701}
1702
1703static __inline__ __m256d __DEFAULT_FN_ATTRS256
1704_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
1705 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1706 (__v4df) __W,
1707 (__mmask8) __U);
1708}
1709
1710static __inline__ __m256d __DEFAULT_FN_ATTRS256
1712 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1713 (__v4df)
1715 (__mmask8) __U);
1716}
1717
1718static __inline__ __m128i __DEFAULT_FN_ATTRS128
1719_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
1720 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1721 (__v2di) __W,
1722 (__mmask8) __U);
1723}
1724
1725static __inline__ __m128i __DEFAULT_FN_ATTRS128
1727 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1728 (__v2di)
1730 (__mmask8) __U);
1731}
1732
1733static __inline__ __m256i __DEFAULT_FN_ATTRS256
1734_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
1735 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1736 (__v4di) __W,
1737 (__mmask8) __U);
1738}
1739
1740static __inline__ __m256i __DEFAULT_FN_ATTRS256
1742 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1743 (__v4di)
1745 (__mmask8) __U);
1746}
1747
1748static __inline__ __m128 __DEFAULT_FN_ATTRS128
1749_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
1750 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1751 (__v4sf) __W,
1752 (__mmask8) __U);
1753}
1754
1755static __inline__ __m128 __DEFAULT_FN_ATTRS128
1757 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1758 (__v4sf)
1759 _mm_setzero_ps (),
1760 (__mmask8) __U);
1761}
1762
1763static __inline__ __m256 __DEFAULT_FN_ATTRS256
1764_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
1765 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1766 (__v8sf) __W,
1767 (__mmask8) __U);
1768}
1769
1770static __inline__ __m256 __DEFAULT_FN_ATTRS256
1772 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1773 (__v8sf)
1775 (__mmask8) __U);
1776}
1777
1778static __inline__ __m128i __DEFAULT_FN_ATTRS128
1779_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
1780 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1781 (__v4si) __W,
1782 (__mmask8) __U);
1783}
1784
1785static __inline__ __m128i __DEFAULT_FN_ATTRS128
1787 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1788 (__v4si)
1790 (__mmask8) __U);
1791}
1792
1793static __inline__ __m256i __DEFAULT_FN_ATTRS256
1794_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
1795 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1796 (__v8si) __W,
1797 (__mmask8) __U);
1798}
1799
1800static __inline__ __m256i __DEFAULT_FN_ATTRS256
1802 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1803 (__v8si)
1805 (__mmask8) __U);
1806}
1807
1808static __inline__ void __DEFAULT_FN_ATTRS128
1809_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
1810 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
1811 (__v2df) __A,
1812 (__mmask8) __U);
1813}
1814
1815static __inline__ void __DEFAULT_FN_ATTRS256
1816_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
1817 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
1818 (__v4df) __A,
1819 (__mmask8) __U);
1820}
1821
1822static __inline__ void __DEFAULT_FN_ATTRS128
1823_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
1824 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
1825 (__v2di) __A,
1826 (__mmask8) __U);
1827}
1828
1829static __inline__ void __DEFAULT_FN_ATTRS256
1831 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
1832 (__v4di) __A,
1833 (__mmask8) __U);
1834}
1835
1836static __inline__ void __DEFAULT_FN_ATTRS128
1837_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
1838 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
1839 (__v4sf) __A,
1840 (__mmask8) __U);
1841}
1842
1843static __inline__ void __DEFAULT_FN_ATTRS256
1845 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
1846 (__v8sf) __A,
1847 (__mmask8) __U);
1848}
1849
1850static __inline__ void __DEFAULT_FN_ATTRS128
1851_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
1852 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
1853 (__v4si) __A,
1854 (__mmask8) __U);
1855}
1856
1857static __inline__ void __DEFAULT_FN_ATTRS256
1859 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
1860 (__v8si) __A,
1861 (__mmask8) __U);
1862}
1863
1864static __inline__ __m128d __DEFAULT_FN_ATTRS128
1865_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
1866 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1867 (__v2df)_mm_cvtepi32_pd(__A),
1868 (__v2df)__W);
1869}
1870
1871static __inline__ __m128d __DEFAULT_FN_ATTRS128
1873 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1874 (__v2df)_mm_cvtepi32_pd(__A),
1875 (__v2df)_mm_setzero_pd());
1876}
1877
1878static __inline__ __m256d __DEFAULT_FN_ATTRS256
1879_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
1880 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1881 (__v4df)_mm256_cvtepi32_pd(__A),
1882 (__v4df)__W);
1883}
1884
1885static __inline__ __m256d __DEFAULT_FN_ATTRS256
1887 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1888 (__v4df)_mm256_cvtepi32_pd(__A),
1889 (__v4df)_mm256_setzero_pd());
1890}
1891
1892static __inline__ __m128 __DEFAULT_FN_ATTRS128
1893_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
1894 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1895 (__v4sf)_mm_cvtepi32_ps(__A),
1896 (__v4sf)__W);
1897}
1898
1899static __inline__ __m128 __DEFAULT_FN_ATTRS128
1901 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1902 (__v4sf)_mm_cvtepi32_ps(__A),
1903 (__v4sf)_mm_setzero_ps());
1904}
1905
1906static __inline__ __m256 __DEFAULT_FN_ATTRS256
1907_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
1908 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1909 (__v8sf)_mm256_cvtepi32_ps(__A),
1910 (__v8sf)__W);
1911}
1912
1913static __inline__ __m256 __DEFAULT_FN_ATTRS256
1915 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1916 (__v8sf)_mm256_cvtepi32_ps(__A),
1917 (__v8sf)_mm256_setzero_ps());
1918}
1919
1920static __inline__ __m128i __DEFAULT_FN_ATTRS128
1921_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
1922 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1923 (__v4si) __W,
1924 (__mmask8) __U);
1925}
1926
1927static __inline__ __m128i __DEFAULT_FN_ATTRS128
1929 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1930 (__v4si)
1932 (__mmask8) __U);
1933}
1934
1935static __inline__ __m128i __DEFAULT_FN_ATTRS256
1936_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
1937 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1938 (__v4si)_mm256_cvtpd_epi32(__A),
1939 (__v4si)__W);
1940}
1941
1942static __inline__ __m128i __DEFAULT_FN_ATTRS256
1944 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1945 (__v4si)_mm256_cvtpd_epi32(__A),
1946 (__v4si)_mm_setzero_si128());
1947}
1948
1949static __inline__ __m128 __DEFAULT_FN_ATTRS128
1950_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
1951 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1952 (__v4sf) __W,
1953 (__mmask8) __U);
1954}
1955
1956static __inline__ __m128 __DEFAULT_FN_ATTRS128
1957_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
1958 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1959 (__v4sf)
1960 _mm_setzero_ps (),
1961 (__mmask8) __U);
1962}
1963
1964static __inline__ __m128 __DEFAULT_FN_ATTRS256
1965_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
1966 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1967 (__v4sf)_mm256_cvtpd_ps(__A),
1968 (__v4sf)__W);
1969}
1970
1971static __inline__ __m128 __DEFAULT_FN_ATTRS256
1973 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1974 (__v4sf)_mm256_cvtpd_ps(__A),
1975 (__v4sf)_mm_setzero_ps());
1976}
1977
1978static __inline__ __m128i __DEFAULT_FN_ATTRS128
1979_mm_cvtpd_epu32 (__m128d __A) {
1980 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1981 (__v4si)
1983 (__mmask8) -1);
1984}
1985
1986static __inline__ __m128i __DEFAULT_FN_ATTRS128
1987_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
1988 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1989 (__v4si) __W,
1990 (__mmask8) __U);
1991}
1992
1993static __inline__ __m128i __DEFAULT_FN_ATTRS128
1995 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1996 (__v4si)
1998 (__mmask8) __U);
1999}
2000
2001static __inline__ __m128i __DEFAULT_FN_ATTRS256
2002_mm256_cvtpd_epu32 (__m256d __A) {
2003 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2004 (__v4si)
2006 (__mmask8) -1);
2007}
2008
2009static __inline__ __m128i __DEFAULT_FN_ATTRS256
2010_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2011 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2012 (__v4si) __W,
2013 (__mmask8) __U);
2014}
2015
2016static __inline__ __m128i __DEFAULT_FN_ATTRS256
2018 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2019 (__v4si)
2021 (__mmask8) __U);
2022}
2023
2024static __inline__ __m128i __DEFAULT_FN_ATTRS128
2025_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2026 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2027 (__v4si)_mm_cvtps_epi32(__A),
2028 (__v4si)__W);
2029}
2030
2031static __inline__ __m128i __DEFAULT_FN_ATTRS128
2033 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2034 (__v4si)_mm_cvtps_epi32(__A),
2035 (__v4si)_mm_setzero_si128());
2036}
2037
2038static __inline__ __m256i __DEFAULT_FN_ATTRS256
2039_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2040 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2041 (__v8si)_mm256_cvtps_epi32(__A),
2042 (__v8si)__W);
2043}
2044
2045static __inline__ __m256i __DEFAULT_FN_ATTRS256
2047 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2048 (__v8si)_mm256_cvtps_epi32(__A),
2049 (__v8si)_mm256_setzero_si256());
2050}
2051
2052static __inline__ __m128d __DEFAULT_FN_ATTRS128
2053_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
2054 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2055 (__v2df)_mm_cvtps_pd(__A),
2056 (__v2df)__W);
2057}
2058
2059static __inline__ __m128d __DEFAULT_FN_ATTRS128
2060_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2061 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2062 (__v2df)_mm_cvtps_pd(__A),
2063 (__v2df)_mm_setzero_pd());
2064}
2065
2066static __inline__ __m256d __DEFAULT_FN_ATTRS256
2067_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
2068 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2069 (__v4df)_mm256_cvtps_pd(__A),
2070 (__v4df)__W);
2071}
2072
2073static __inline__ __m256d __DEFAULT_FN_ATTRS256
2075 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2076 (__v4df)_mm256_cvtps_pd(__A),
2077 (__v4df)_mm256_setzero_pd());
2078}
2079
2080static __inline__ __m128i __DEFAULT_FN_ATTRS128
2081_mm_cvtps_epu32 (__m128 __A) {
2082 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2083 (__v4si)
2085 (__mmask8) -1);
2086}
2087
2088static __inline__ __m128i __DEFAULT_FN_ATTRS128
2089_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2090 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2091 (__v4si) __W,
2092 (__mmask8) __U);
2093}
2094
2095static __inline__ __m128i __DEFAULT_FN_ATTRS128
2097 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2098 (__v4si)
2100 (__mmask8) __U);
2101}
2102
2103static __inline__ __m256i __DEFAULT_FN_ATTRS256
2105 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2106 (__v8si)
2108 (__mmask8) -1);
2109}
2110
2111static __inline__ __m256i __DEFAULT_FN_ATTRS256
2112_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2113 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2114 (__v8si) __W,
2115 (__mmask8) __U);
2116}
2117
2118static __inline__ __m256i __DEFAULT_FN_ATTRS256
2120 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2121 (__v8si)
2123 (__mmask8) __U);
2124}
2125
2126static __inline__ __m128i __DEFAULT_FN_ATTRS128
2127_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
2128 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2129 (__v4si) __W,
2130 (__mmask8) __U);
2131}
2132
2133static __inline__ __m128i __DEFAULT_FN_ATTRS128
2135 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2136 (__v4si)
2138 (__mmask8) __U);
2139}
2140
2141static __inline__ __m128i __DEFAULT_FN_ATTRS256
2142_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
2143 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2144 (__v4si)_mm256_cvttpd_epi32(__A),
2145 (__v4si)__W);
2146}
2147
2148static __inline__ __m128i __DEFAULT_FN_ATTRS256
2150 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2151 (__v4si)_mm256_cvttpd_epi32(__A),
2152 (__v4si)_mm_setzero_si128());
2153}
2154
2155static __inline__ __m128i __DEFAULT_FN_ATTRS128
2156_mm_cvttpd_epu32 (__m128d __A) {
2157 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2158 (__v4si)
2160 (__mmask8) -1);
2161}
2162
2163static __inline__ __m128i __DEFAULT_FN_ATTRS128
2164_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2165 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2166 (__v4si) __W,
2167 (__mmask8) __U);
2168}
2169
2170static __inline__ __m128i __DEFAULT_FN_ATTRS128
2172 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2173 (__v4si)
2175 (__mmask8) __U);
2176}
2177
2178static __inline__ __m128i __DEFAULT_FN_ATTRS256
2179_mm256_cvttpd_epu32 (__m256d __A) {
2180 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2181 (__v4si)
2183 (__mmask8) -1);
2184}
2185
2186static __inline__ __m128i __DEFAULT_FN_ATTRS256
2187_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2188 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2189 (__v4si) __W,
2190 (__mmask8) __U);
2191}
2192
2193static __inline__ __m128i __DEFAULT_FN_ATTRS256
2195 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2196 (__v4si)
2198 (__mmask8) __U);
2199}
2200
2201static __inline__ __m128i __DEFAULT_FN_ATTRS128
2202_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2203 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2204 (__v4si)_mm_cvttps_epi32(__A),
2205 (__v4si)__W);
2206}
2207
2208static __inline__ __m128i __DEFAULT_FN_ATTRS128
2210 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2211 (__v4si)_mm_cvttps_epi32(__A),
2212 (__v4si)_mm_setzero_si128());
2213}
2214
2215static __inline__ __m256i __DEFAULT_FN_ATTRS256
2216_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2217 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2218 (__v8si)_mm256_cvttps_epi32(__A),
2219 (__v8si)__W);
2220}
2221
2222static __inline__ __m256i __DEFAULT_FN_ATTRS256
2224 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2225 (__v8si)_mm256_cvttps_epi32(__A),
2226 (__v8si)_mm256_setzero_si256());
2227}
2228
2229static __inline__ __m128i __DEFAULT_FN_ATTRS128
2230_mm_cvttps_epu32 (__m128 __A) {
2231 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2232 (__v4si)
2234 (__mmask8) -1);
2235}
2236
2237static __inline__ __m128i __DEFAULT_FN_ATTRS128
2238_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2239 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2240 (__v4si) __W,
2241 (__mmask8) __U);
2242}
2243
2244static __inline__ __m128i __DEFAULT_FN_ATTRS128
2246 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2247 (__v4si)
2249 (__mmask8) __U);
2250}
2251
2252static __inline__ __m256i __DEFAULT_FN_ATTRS256
2254 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2255 (__v8si)
2257 (__mmask8) -1);
2258}
2259
2260static __inline__ __m256i __DEFAULT_FN_ATTRS256
2261_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2262 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2263 (__v8si) __W,
2264 (__mmask8) __U);
2265}
2266
2267static __inline__ __m256i __DEFAULT_FN_ATTRS256
2269 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2270 (__v8si)
2272 (__mmask8) __U);
2273}
2274
2275static __inline__ __m128d __DEFAULT_FN_ATTRS128
2276_mm_cvtepu32_pd (__m128i __A) {
2277 return (__m128d) __builtin_convertvector(
2278 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2279}
2280
2281static __inline__ __m128d __DEFAULT_FN_ATTRS128
2282_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
2283 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2284 (__v2df)_mm_cvtepu32_pd(__A),
2285 (__v2df)__W);
2286}
2287
2288static __inline__ __m128d __DEFAULT_FN_ATTRS128
2290 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2291 (__v2df)_mm_cvtepu32_pd(__A),
2292 (__v2df)_mm_setzero_pd());
2293}
2294
2295static __inline__ __m256d __DEFAULT_FN_ATTRS256
2296_mm256_cvtepu32_pd (__m128i __A) {
2297 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2298}
2299
2300static __inline__ __m256d __DEFAULT_FN_ATTRS256
2301_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
2302 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2303 (__v4df)_mm256_cvtepu32_pd(__A),
2304 (__v4df)__W);
2305}
2306
2307static __inline__ __m256d __DEFAULT_FN_ATTRS256
2309 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2310 (__v4df)_mm256_cvtepu32_pd(__A),
2311 (__v4df)_mm256_setzero_pd());
2312}
2313
2314static __inline__ __m128 __DEFAULT_FN_ATTRS128
2315_mm_cvtepu32_ps (__m128i __A) {
2316 return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
2317}
2318
2319static __inline__ __m128 __DEFAULT_FN_ATTRS128
2320_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
2321 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2322 (__v4sf)_mm_cvtepu32_ps(__A),
2323 (__v4sf)__W);
2324}
2325
2326static __inline__ __m128 __DEFAULT_FN_ATTRS128
2328 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2329 (__v4sf)_mm_cvtepu32_ps(__A),
2330 (__v4sf)_mm_setzero_ps());
2331}
2332
2333static __inline__ __m256 __DEFAULT_FN_ATTRS256
2334_mm256_cvtepu32_ps (__m256i __A) {
2335 return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
2336}
2337
2338static __inline__ __m256 __DEFAULT_FN_ATTRS256
2339_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
2340 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2341 (__v8sf)_mm256_cvtepu32_ps(__A),
2342 (__v8sf)__W);
2343}
2344
2345static __inline__ __m256 __DEFAULT_FN_ATTRS256
2347 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2348 (__v8sf)_mm256_cvtepu32_ps(__A),
2349 (__v8sf)_mm256_setzero_ps());
2350}
2351
2352static __inline__ __m128d __DEFAULT_FN_ATTRS128
2353_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2354 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2355 (__v2df)_mm_div_pd(__A, __B),
2356 (__v2df)__W);
2357}
2358
2359static __inline__ __m128d __DEFAULT_FN_ATTRS128
2360_mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2361 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2362 (__v2df)_mm_div_pd(__A, __B),
2363 (__v2df)_mm_setzero_pd());
2364}
2365
2366static __inline__ __m256d __DEFAULT_FN_ATTRS256
2367_mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2368 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2369 (__v4df)_mm256_div_pd(__A, __B),
2370 (__v4df)__W);
2371}
2372
2373static __inline__ __m256d __DEFAULT_FN_ATTRS256
2374_mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2375 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2376 (__v4df)_mm256_div_pd(__A, __B),
2377 (__v4df)_mm256_setzero_pd());
2378}
2379
2380static __inline__ __m128 __DEFAULT_FN_ATTRS128
2381_mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2382 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2383 (__v4sf)_mm_div_ps(__A, __B),
2384 (__v4sf)__W);
2385}
2386
2387static __inline__ __m128 __DEFAULT_FN_ATTRS128
2388_mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2389 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2390 (__v4sf)_mm_div_ps(__A, __B),
2391 (__v4sf)_mm_setzero_ps());
2392}
2393
2394static __inline__ __m256 __DEFAULT_FN_ATTRS256
2395_mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2396 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2397 (__v8sf)_mm256_div_ps(__A, __B),
2398 (__v8sf)__W);
2399}
2400
2401static __inline__ __m256 __DEFAULT_FN_ATTRS256
2402_mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2403 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2404 (__v8sf)_mm256_div_ps(__A, __B),
2405 (__v8sf)_mm256_setzero_ps());
2406}
2407
2408static __inline__ __m128d __DEFAULT_FN_ATTRS128
2409_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2410 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2411 (__v2df) __W,
2412 (__mmask8) __U);
2413}
2414
2415static __inline__ __m128d __DEFAULT_FN_ATTRS128
2416_mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
2417 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2418 (__v2df)
2419 _mm_setzero_pd (),
2420 (__mmask8) __U);
2421}
2422
2423static __inline__ __m256d __DEFAULT_FN_ATTRS256
2424_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2425 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2426 (__v4df) __W,
2427 (__mmask8) __U);
2428}
2429
2430static __inline__ __m256d __DEFAULT_FN_ATTRS256
2432 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2433 (__v4df)
2435 (__mmask8) __U);
2436}
2437
2438static __inline__ __m128i __DEFAULT_FN_ATTRS128
2439_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2440 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2441 (__v2di) __W,
2442 (__mmask8) __U);
2443}
2444
2445static __inline__ __m128i __DEFAULT_FN_ATTRS128
2447 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2448 (__v2di)
2450 (__mmask8) __U);
2451}
2452
2453static __inline__ __m256i __DEFAULT_FN_ATTRS256
2454_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2455 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2456 (__v4di) __W,
2457 (__mmask8) __U);
2458}
2459
2460static __inline__ __m256i __DEFAULT_FN_ATTRS256
2462 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2463 (__v4di)
2465 (__mmask8) __U);
2466}
2467
2468static __inline__ __m128d __DEFAULT_FN_ATTRS128
2469_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
2470 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
2471 (__v2df) __W,
2472 (__mmask8)
2473 __U);
2474}
2475
2476static __inline__ __m128d __DEFAULT_FN_ATTRS128
2478 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
2479 (__v2df)
2480 _mm_setzero_pd (),
2481 (__mmask8)
2482 __U);
2483}
2484
2485static __inline__ __m256d __DEFAULT_FN_ATTRS256
2486_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
2487 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
2488 (__v4df) __W,
2489 (__mmask8)
2490 __U);
2491}
2492
2493static __inline__ __m256d __DEFAULT_FN_ATTRS256
2495 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
2496 (__v4df)
2498 (__mmask8)
2499 __U);
2500}
2501
2502static __inline__ __m128i __DEFAULT_FN_ATTRS128
2503_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
2504 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
2505 (__v2di) __W,
2506 (__mmask8)
2507 __U);
2508}
2509
2510static __inline__ __m128i __DEFAULT_FN_ATTRS128
2512 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
2513 (__v2di)
2515 (__mmask8)
2516 __U);
2517}
2518
2519static __inline__ __m256i __DEFAULT_FN_ATTRS256
2521 void const *__P) {
2522 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
2523 (__v4di) __W,
2524 (__mmask8)
2525 __U);
2526}
2527
2528static __inline__ __m256i __DEFAULT_FN_ATTRS256
2530 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
2531 (__v4di)
2533 (__mmask8)
2534 __U);
2535}
2536
2537static __inline__ __m128 __DEFAULT_FN_ATTRS128
2538_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
2539 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
2540 (__v4sf) __W,
2541 (__mmask8) __U);
2542}
2543
2544static __inline__ __m128 __DEFAULT_FN_ATTRS128
2546 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
2547 (__v4sf)
2548 _mm_setzero_ps (),
2549 (__mmask8)
2550 __U);
2551}
2552
2553static __inline__ __m256 __DEFAULT_FN_ATTRS256
2554_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
2555 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
2556 (__v8sf) __W,
2557 (__mmask8) __U);
2558}
2559
2560static __inline__ __m256 __DEFAULT_FN_ATTRS256
2562 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
2563 (__v8sf)
2565 (__mmask8)
2566 __U);
2567}
2568
2569static __inline__ __m128i __DEFAULT_FN_ATTRS128
2570_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
2571 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
2572 (__v4si) __W,
2573 (__mmask8)
2574 __U);
2575}
2576
2577static __inline__ __m128i __DEFAULT_FN_ATTRS128
2579 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
2580 (__v4si)
2582 (__mmask8) __U);
2583}
2584
2585static __inline__ __m256i __DEFAULT_FN_ATTRS256
2587 void const *__P) {
2588 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
2589 (__v8si) __W,
2590 (__mmask8)
2591 __U);
2592}
2593
2594static __inline__ __m256i __DEFAULT_FN_ATTRS256
2596 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
2597 (__v8si)
2599 (__mmask8)
2600 __U);
2601}
2602
2603static __inline__ __m128 __DEFAULT_FN_ATTRS128
2604_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2605 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2606 (__v4sf) __W,
2607 (__mmask8) __U);
2608}
2609
2610static __inline__ __m128 __DEFAULT_FN_ATTRS128
2612 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2613 (__v4sf)
2614 _mm_setzero_ps (),
2615 (__mmask8) __U);
2616}
2617
2618static __inline__ __m256 __DEFAULT_FN_ATTRS256
2619_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2620 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2621 (__v8sf) __W,
2622 (__mmask8) __U);
2623}
2624
2625static __inline__ __m256 __DEFAULT_FN_ATTRS256
2627 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2628 (__v8sf)
2630 (__mmask8) __U);
2631}
2632
2633static __inline__ __m128i __DEFAULT_FN_ATTRS128
2634_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2635 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2636 (__v4si) __W,
2637 (__mmask8) __U);
2638}
2639
2640static __inline__ __m128i __DEFAULT_FN_ATTRS128
2642 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2643 (__v4si)
2645 (__mmask8) __U);
2646}
2647
2648static __inline__ __m256i __DEFAULT_FN_ATTRS256
2649_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2650 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2651 (__v8si) __W,
2652 (__mmask8) __U);
2653}
2654
2655static __inline__ __m256i __DEFAULT_FN_ATTRS256
2657 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2658 (__v8si)
2660 (__mmask8) __U);
2661}
2662
2663static __inline__ __m128d __DEFAULT_FN_ATTRS128
2664_mm_getexp_pd (__m128d __A) {
2665 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2666 (__v2df)
2667 _mm_setzero_pd (),
2668 (__mmask8) -1);
2669}
2670
2671static __inline__ __m128d __DEFAULT_FN_ATTRS128
2672_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2673 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2674 (__v2df) __W,
2675 (__mmask8) __U);
2676}
2677
2678static __inline__ __m128d __DEFAULT_FN_ATTRS128
2679_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
2680 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2681 (__v2df)
2682 _mm_setzero_pd (),
2683 (__mmask8) __U);
2684}
2685
2686static __inline__ __m256d __DEFAULT_FN_ATTRS256
2687_mm256_getexp_pd (__m256d __A) {
2688 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2689 (__v4df)
2691 (__mmask8) -1);
2692}
2693
2694static __inline__ __m256d __DEFAULT_FN_ATTRS256
2695_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2696 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2697 (__v4df) __W,
2698 (__mmask8) __U);
2699}
2700
2701static __inline__ __m256d __DEFAULT_FN_ATTRS256
2703 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2704 (__v4df)
2706 (__mmask8) __U);
2707}
2708
2709static __inline__ __m128 __DEFAULT_FN_ATTRS128
2710_mm_getexp_ps (__m128 __A) {
2711 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2712 (__v4sf)
2713 _mm_setzero_ps (),
2714 (__mmask8) -1);
2715}
2716
2717static __inline__ __m128 __DEFAULT_FN_ATTRS128
2718_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2719 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2720 (__v4sf) __W,
2721 (__mmask8) __U);
2722}
2723
2724static __inline__ __m128 __DEFAULT_FN_ATTRS128
2726 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2727 (__v4sf)
2728 _mm_setzero_ps (),
2729 (__mmask8) __U);
2730}
2731
2732static __inline__ __m256 __DEFAULT_FN_ATTRS256
2733_mm256_getexp_ps (__m256 __A) {
2734 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2735 (__v8sf)
2737 (__mmask8) -1);
2738}
2739
2740static __inline__ __m256 __DEFAULT_FN_ATTRS256
2741_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2742 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2743 (__v8sf) __W,
2744 (__mmask8) __U);
2745}
2746
2747static __inline__ __m256 __DEFAULT_FN_ATTRS256
2749 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2750 (__v8sf)
2752 (__mmask8) __U);
2753}
2754
2755static __inline__ __m128d __DEFAULT_FN_ATTRS128
2756_mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2757 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2758 (__v2df)_mm_max_pd(__A, __B),
2759 (__v2df)__W);
2760}
2761
2762static __inline__ __m128d __DEFAULT_FN_ATTRS128
2763_mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2764 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2765 (__v2df)_mm_max_pd(__A, __B),
2766 (__v2df)_mm_setzero_pd());
2767}
2768
2769static __inline__ __m256d __DEFAULT_FN_ATTRS256
2770_mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2771 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2772 (__v4df)_mm256_max_pd(__A, __B),
2773 (__v4df)__W);
2774}
2775
2776static __inline__ __m256d __DEFAULT_FN_ATTRS256
2777_mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2778 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2779 (__v4df)_mm256_max_pd(__A, __B),
2780 (__v4df)_mm256_setzero_pd());
2781}
2782
2783static __inline__ __m128 __DEFAULT_FN_ATTRS128
2784_mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2785 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2786 (__v4sf)_mm_max_ps(__A, __B),
2787 (__v4sf)__W);
2788}
2789
2790static __inline__ __m128 __DEFAULT_FN_ATTRS128
2791_mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2792 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2793 (__v4sf)_mm_max_ps(__A, __B),
2794 (__v4sf)_mm_setzero_ps());
2795}
2796
2797static __inline__ __m256 __DEFAULT_FN_ATTRS256
2798_mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2799 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2800 (__v8sf)_mm256_max_ps(__A, __B),
2801 (__v8sf)__W);
2802}
2803
2804static __inline__ __m256 __DEFAULT_FN_ATTRS256
2805_mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2806 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2807 (__v8sf)_mm256_max_ps(__A, __B),
2808 (__v8sf)_mm256_setzero_ps());
2809}
2810
2811static __inline__ __m128d __DEFAULT_FN_ATTRS128
2812_mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2813 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2814 (__v2df)_mm_min_pd(__A, __B),
2815 (__v2df)__W);
2816}
2817
2818static __inline__ __m128d __DEFAULT_FN_ATTRS128
2819_mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2820 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2821 (__v2df)_mm_min_pd(__A, __B),
2822 (__v2df)_mm_setzero_pd());
2823}
2824
2825static __inline__ __m256d __DEFAULT_FN_ATTRS256
2826_mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2827 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2828 (__v4df)_mm256_min_pd(__A, __B),
2829 (__v4df)__W);
2830}
2831
2832static __inline__ __m256d __DEFAULT_FN_ATTRS256
2833_mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2834 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2835 (__v4df)_mm256_min_pd(__A, __B),
2836 (__v4df)_mm256_setzero_pd());
2837}
2838
2839static __inline__ __m128 __DEFAULT_FN_ATTRS128
2840_mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2841 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2842 (__v4sf)_mm_min_ps(__A, __B),
2843 (__v4sf)__W);
2844}
2845
2846static __inline__ __m128 __DEFAULT_FN_ATTRS128
2847_mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2848 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2849 (__v4sf)_mm_min_ps(__A, __B),
2850 (__v4sf)_mm_setzero_ps());
2851}
2852
2853static __inline__ __m256 __DEFAULT_FN_ATTRS256
2854_mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2855 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2856 (__v8sf)_mm256_min_ps(__A, __B),
2857 (__v8sf)__W);
2858}
2859
2860static __inline__ __m256 __DEFAULT_FN_ATTRS256
2861_mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2862 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2863 (__v8sf)_mm256_min_ps(__A, __B),
2864 (__v8sf)_mm256_setzero_ps());
2865}
2866
2867static __inline__ __m128d __DEFAULT_FN_ATTRS128
2868_mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2869 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2870 (__v2df)_mm_mul_pd(__A, __B),
2871 (__v2df)__W);
2872}
2873
2874static __inline__ __m128d __DEFAULT_FN_ATTRS128
2875_mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2876 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2877 (__v2df)_mm_mul_pd(__A, __B),
2878 (__v2df)_mm_setzero_pd());
2879}
2880
2881static __inline__ __m256d __DEFAULT_FN_ATTRS256
2882_mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2883 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2884 (__v4df)_mm256_mul_pd(__A, __B),
2885 (__v4df)__W);
2886}
2887
2888static __inline__ __m256d __DEFAULT_FN_ATTRS256
2889_mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2890 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2891 (__v4df)_mm256_mul_pd(__A, __B),
2892 (__v4df)_mm256_setzero_pd());
2893}
2894
2895static __inline__ __m128 __DEFAULT_FN_ATTRS128
2896_mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2897 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2898 (__v4sf)_mm_mul_ps(__A, __B),
2899 (__v4sf)__W);
2900}
2901
2902static __inline__ __m128 __DEFAULT_FN_ATTRS128
2903_mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2904 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2905 (__v4sf)_mm_mul_ps(__A, __B),
2906 (__v4sf)_mm_setzero_ps());
2907}
2908
2909static __inline__ __m256 __DEFAULT_FN_ATTRS256
2910_mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2911 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2912 (__v8sf)_mm256_mul_ps(__A, __B),
2913 (__v8sf)__W);
2914}
2915
2916static __inline__ __m256 __DEFAULT_FN_ATTRS256
2917_mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2918 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2919 (__v8sf)_mm256_mul_ps(__A, __B),
2920 (__v8sf)_mm256_setzero_ps());
2921}
2922
2923static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2924_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
2925 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2926 (__v4si)_mm_abs_epi32(__A),
2927 (__v4si)__W);
2928}
2929
2930static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2932 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2933 (__v4si)_mm_abs_epi32(__A),
2934 (__v4si)_mm_setzero_si128());
2935}
2936
2937static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2938_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
2939 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2940 (__v8si)_mm256_abs_epi32(__A),
2941 (__v8si)__W);
2942}
2943
2944static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2946 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2947 (__v8si)_mm256_abs_epi32(__A),
2948 (__v8si)_mm256_setzero_si256());
2949}
2950
2951static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2952_mm_abs_epi64(__m128i __A) {
2953 return (__m128i)__builtin_elementwise_abs((__v2di)__A);
2954}
2955
2956static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2957_mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
2958 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
2959 (__v2di)_mm_abs_epi64(__A),
2960 (__v2di)__W);
2961}
2962
2963static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
2965 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
2966 (__v2di)_mm_abs_epi64(__A),
2967 (__v2di)_mm_setzero_si128());
2968}
2969
2970static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2971_mm256_abs_epi64(__m256i __A) {
2972 return (__m256i)__builtin_elementwise_abs((__v4di)__A);
2973}
2974
2975static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2976_mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
2977 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
2978 (__v4di)_mm256_abs_epi64(__A),
2979 (__v4di)__W);
2980}
2981
2982static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2984 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
2985 (__v4di)_mm256_abs_epi64(__A),
2986 (__v4di)_mm256_setzero_si256());
2987}
2988
2989static __inline__ __m128i __DEFAULT_FN_ATTRS128
2990_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
2991 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2992 (__v4si)_mm_max_epi32(__A, __B),
2993 (__v4si)_mm_setzero_si128());
2994}
2995
2996static __inline__ __m128i __DEFAULT_FN_ATTRS128
2997_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
2998 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2999 (__v4si)_mm_max_epi32(__A, __B),
3000 (__v4si)__W);
3001}
3002
3003static __inline__ __m256i __DEFAULT_FN_ATTRS256
3004_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3005 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3006 (__v8si)_mm256_max_epi32(__A, __B),
3007 (__v8si)_mm256_setzero_si256());
3008}
3009
3010static __inline__ __m256i __DEFAULT_FN_ATTRS256
3011_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3012 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3013 (__v8si)_mm256_max_epi32(__A, __B),
3014 (__v8si)__W);
3015}
3016
3017static __inline__ __m128i __DEFAULT_FN_ATTRS128
3018_mm_max_epi64 (__m128i __A, __m128i __B) {
3019 return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B);
3020}
3021
3022static __inline__ __m128i __DEFAULT_FN_ATTRS128
3023_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3024 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3025 (__v2di)_mm_max_epi64(__A, __B),
3026 (__v2di)_mm_setzero_si128());
3027}
3028
3029static __inline__ __m128i __DEFAULT_FN_ATTRS128
3030_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3031 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3032 (__v2di)_mm_max_epi64(__A, __B),
3033 (__v2di)__W);
3034}
3035
3036static __inline__ __m256i __DEFAULT_FN_ATTRS256
3037_mm256_max_epi64 (__m256i __A, __m256i __B) {
3038 return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B);
3039}
3040
3041static __inline__ __m256i __DEFAULT_FN_ATTRS256
3042_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3043 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3044 (__v4di)_mm256_max_epi64(__A, __B),
3045 (__v4di)_mm256_setzero_si256());
3046}
3047
3048static __inline__ __m256i __DEFAULT_FN_ATTRS256
3049_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3050 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3051 (__v4di)_mm256_max_epi64(__A, __B),
3052 (__v4di)__W);
3053}
3054
3055static __inline__ __m128i __DEFAULT_FN_ATTRS128
3056_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3057 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3058 (__v4si)_mm_max_epu32(__A, __B),
3059 (__v4si)_mm_setzero_si128());
3060}
3061
3062static __inline__ __m128i __DEFAULT_FN_ATTRS128
3063_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3064 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3065 (__v4si)_mm_max_epu32(__A, __B),
3066 (__v4si)__W);
3067}
3068
3069static __inline__ __m256i __DEFAULT_FN_ATTRS256
3070_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3071 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3072 (__v8si)_mm256_max_epu32(__A, __B),
3073 (__v8si)_mm256_setzero_si256());
3074}
3075
3076static __inline__ __m256i __DEFAULT_FN_ATTRS256
3077_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3078 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3079 (__v8si)_mm256_max_epu32(__A, __B),
3080 (__v8si)__W);
3081}
3082
3083static __inline__ __m128i __DEFAULT_FN_ATTRS128
3084_mm_max_epu64 (__m128i __A, __m128i __B) {
3085 return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B);
3086}
3087
3088static __inline__ __m128i __DEFAULT_FN_ATTRS128
3089_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3090 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3091 (__v2di)_mm_max_epu64(__A, __B),
3092 (__v2di)_mm_setzero_si128());
3093}
3094
3095static __inline__ __m128i __DEFAULT_FN_ATTRS128
3096_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3097 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3098 (__v2di)_mm_max_epu64(__A, __B),
3099 (__v2di)__W);
3100}
3101
3102static __inline__ __m256i __DEFAULT_FN_ATTRS256
3103_mm256_max_epu64 (__m256i __A, __m256i __B) {
3104 return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B);
3105}
3106
3107static __inline__ __m256i __DEFAULT_FN_ATTRS256
3108_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3109 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3110 (__v4di)_mm256_max_epu64(__A, __B),
3111 (__v4di)_mm256_setzero_si256());
3112}
3113
3114static __inline__ __m256i __DEFAULT_FN_ATTRS256
3115_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3116 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3117 (__v4di)_mm256_max_epu64(__A, __B),
3118 (__v4di)__W);
3119}
3120
3121static __inline__ __m128i __DEFAULT_FN_ATTRS128
3122_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3123 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3124 (__v4si)_mm_min_epi32(__A, __B),
3125 (__v4si)_mm_setzero_si128());
3126}
3127
3128static __inline__ __m128i __DEFAULT_FN_ATTRS128
3129_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3130 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3131 (__v4si)_mm_min_epi32(__A, __B),
3132 (__v4si)__W);
3133}
3134
3135static __inline__ __m256i __DEFAULT_FN_ATTRS256
3136_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3137 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3138 (__v8si)_mm256_min_epi32(__A, __B),
3139 (__v8si)_mm256_setzero_si256());
3140}
3141
3142static __inline__ __m256i __DEFAULT_FN_ATTRS256
3143_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3144 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3145 (__v8si)_mm256_min_epi32(__A, __B),
3146 (__v8si)__W);
3147}
3148
3149static __inline__ __m128i __DEFAULT_FN_ATTRS128
3150_mm_min_epi64 (__m128i __A, __m128i __B) {
3151 return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B);
3152}
3153
3154static __inline__ __m128i __DEFAULT_FN_ATTRS128
3155_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3156 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3157 (__v2di)_mm_min_epi64(__A, __B),
3158 (__v2di)__W);
3159}
3160
3161static __inline__ __m128i __DEFAULT_FN_ATTRS128
3162_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3163 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3164 (__v2di)_mm_min_epi64(__A, __B),
3165 (__v2di)_mm_setzero_si128());
3166}
3167
3168static __inline__ __m256i __DEFAULT_FN_ATTRS256
3169_mm256_min_epi64 (__m256i __A, __m256i __B) {
3170 return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B);
3171}
3172
3173static __inline__ __m256i __DEFAULT_FN_ATTRS256
3174_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3175 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3176 (__v4di)_mm256_min_epi64(__A, __B),
3177 (__v4di)__W);
3178}
3179
3180static __inline__ __m256i __DEFAULT_FN_ATTRS256
3181_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3182 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3183 (__v4di)_mm256_min_epi64(__A, __B),
3184 (__v4di)_mm256_setzero_si256());
3185}
3186
3187static __inline__ __m128i __DEFAULT_FN_ATTRS128
3188_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3189 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3190 (__v4si)_mm_min_epu32(__A, __B),
3191 (__v4si)_mm_setzero_si128());
3192}
3193
3194static __inline__ __m128i __DEFAULT_FN_ATTRS128
3195_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3196 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3197 (__v4si)_mm_min_epu32(__A, __B),
3198 (__v4si)__W);
3199}
3200
3201static __inline__ __m256i __DEFAULT_FN_ATTRS256
3202_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3203 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3204 (__v8si)_mm256_min_epu32(__A, __B),
3205 (__v8si)_mm256_setzero_si256());
3206}
3207
3208static __inline__ __m256i __DEFAULT_FN_ATTRS256
3209_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3210 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3211 (__v8si)_mm256_min_epu32(__A, __B),
3212 (__v8si)__W);
3213}
3214
3215static __inline__ __m128i __DEFAULT_FN_ATTRS128
3216_mm_min_epu64 (__m128i __A, __m128i __B) {
3217 return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B);
3218}
3219
3220static __inline__ __m128i __DEFAULT_FN_ATTRS128
3221_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3222 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3223 (__v2di)_mm_min_epu64(__A, __B),
3224 (__v2di)__W);
3225}
3226
3227static __inline__ __m128i __DEFAULT_FN_ATTRS128
3228_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3229 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3230 (__v2di)_mm_min_epu64(__A, __B),
3231 (__v2di)_mm_setzero_si128());
3232}
3233
3234static __inline__ __m256i __DEFAULT_FN_ATTRS256
3235_mm256_min_epu64 (__m256i __A, __m256i __B) {
3236 return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B);
3237}
3238
3239static __inline__ __m256i __DEFAULT_FN_ATTRS256
3240_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3241 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3242 (__v4di)_mm256_min_epu64(__A, __B),
3243 (__v4di)__W);
3244}
3245
3246static __inline__ __m256i __DEFAULT_FN_ATTRS256
3247_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3248 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3249 (__v4di)_mm256_min_epu64(__A, __B),
3250 (__v4di)_mm256_setzero_si256());
3251}
3252
3253#define _mm_roundscale_pd(A, imm) \
3254 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3255 (int)(imm), \
3256 (__v2df)_mm_setzero_pd(), \
3257 (__mmask8)-1))
3258
3259
3260#define _mm_mask_roundscale_pd(W, U, A, imm) \
3261 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3262 (int)(imm), \
3263 (__v2df)(__m128d)(W), \
3264 (__mmask8)(U)))
3265
3266
3267#define _mm_maskz_roundscale_pd(U, A, imm) \
3268 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3269 (int)(imm), \
3270 (__v2df)_mm_setzero_pd(), \
3271 (__mmask8)(U)))
3272
3273
3274#define _mm256_roundscale_pd(A, imm) \
3275 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3276 (int)(imm), \
3277 (__v4df)_mm256_setzero_pd(), \
3278 (__mmask8)-1))
3279
3280
3281#define _mm256_mask_roundscale_pd(W, U, A, imm) \
3282 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3283 (int)(imm), \
3284 (__v4df)(__m256d)(W), \
3285 (__mmask8)(U)))
3286
3287
3288#define _mm256_maskz_roundscale_pd(U, A, imm) \
3289 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3290 (int)(imm), \
3291 (__v4df)_mm256_setzero_pd(), \
3292 (__mmask8)(U)))
3293
3294#define _mm_roundscale_ps(A, imm) \
3295 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3296 (__v4sf)_mm_setzero_ps(), \
3297 (__mmask8)-1))
3298
3299
3300#define _mm_mask_roundscale_ps(W, U, A, imm) \
3301 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3302 (__v4sf)(__m128)(W), \
3303 (__mmask8)(U)))
3304
3305
3306#define _mm_maskz_roundscale_ps(U, A, imm) \
3307 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3308 (__v4sf)_mm_setzero_ps(), \
3309 (__mmask8)(U)))
3310
3311#define _mm256_roundscale_ps(A, imm) \
3312 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3313 (__v8sf)_mm256_setzero_ps(), \
3314 (__mmask8)-1))
3315
3316#define _mm256_mask_roundscale_ps(W, U, A, imm) \
3317 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3318 (__v8sf)(__m256)(W), \
3319 (__mmask8)(U)))
3320
3321
3322#define _mm256_maskz_roundscale_ps(U, A, imm) \
3323 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3324 (__v8sf)_mm256_setzero_ps(), \
3325 (__mmask8)(U)))
3326
3327static __inline__ __m128d __DEFAULT_FN_ATTRS128
3328_mm_scalef_pd (__m128d __A, __m128d __B) {
3329 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3330 (__v2df) __B,
3331 (__v2df)
3332 _mm_setzero_pd (),
3333 (__mmask8) -1);
3334}
3335
3336static __inline__ __m128d __DEFAULT_FN_ATTRS128
3337_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3338 __m128d __B) {
3339 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3340 (__v2df) __B,
3341 (__v2df) __W,
3342 (__mmask8) __U);
3343}
3344
3345static __inline__ __m128d __DEFAULT_FN_ATTRS128
3346_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3347 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3348 (__v2df) __B,
3349 (__v2df)
3350 _mm_setzero_pd (),
3351 (__mmask8) __U);
3352}
3353
3354static __inline__ __m256d __DEFAULT_FN_ATTRS256
3355_mm256_scalef_pd (__m256d __A, __m256d __B) {
3356 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3357 (__v4df) __B,
3358 (__v4df)
3360 (__mmask8) -1);
3361}
3362
3363static __inline__ __m256d __DEFAULT_FN_ATTRS256
3364_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3365 __m256d __B) {
3366 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3367 (__v4df) __B,
3368 (__v4df) __W,
3369 (__mmask8) __U);
3370}
3371
3372static __inline__ __m256d __DEFAULT_FN_ATTRS256
3373_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3374 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3375 (__v4df) __B,
3376 (__v4df)
3378 (__mmask8) __U);
3379}
3380
3381static __inline__ __m128 __DEFAULT_FN_ATTRS128
3382_mm_scalef_ps (__m128 __A, __m128 __B) {
3383 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3384 (__v4sf) __B,
3385 (__v4sf)
3386 _mm_setzero_ps (),
3387 (__mmask8) -1);
3388}
3389
3390static __inline__ __m128 __DEFAULT_FN_ATTRS128
3391_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3392 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3393 (__v4sf) __B,
3394 (__v4sf) __W,
3395 (__mmask8) __U);
3396}
3397
3398static __inline__ __m128 __DEFAULT_FN_ATTRS128
3399_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3400 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3401 (__v4sf) __B,
3402 (__v4sf)
3403 _mm_setzero_ps (),
3404 (__mmask8) __U);
3405}
3406
3407static __inline__ __m256 __DEFAULT_FN_ATTRS256
3408_mm256_scalef_ps (__m256 __A, __m256 __B) {
3409 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3410 (__v8sf) __B,
3411 (__v8sf)
3413 (__mmask8) -1);
3414}
3415
3416static __inline__ __m256 __DEFAULT_FN_ATTRS256
3417_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3418 __m256 __B) {
3419 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3420 (__v8sf) __B,
3421 (__v8sf) __W,
3422 (__mmask8) __U);
3423}
3424
3425static __inline__ __m256 __DEFAULT_FN_ATTRS256
3426_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3427 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3428 (__v8sf) __B,
3429 (__v8sf)
3431 (__mmask8) __U);
3432}
3433
3434#define _mm_i64scatter_pd(addr, index, v1, scale) \
3435 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
3436 (__v2di)(__m128i)(index), \
3437 (__v2df)(__m128d)(v1), (int)(scale))
3438
3439#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3440 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
3441 (__v2di)(__m128i)(index), \
3442 (__v2df)(__m128d)(v1), (int)(scale))
3443
3444#define _mm_i64scatter_epi64(addr, index, v1, scale) \
3445 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
3446 (__v2di)(__m128i)(index), \
3447 (__v2di)(__m128i)(v1), (int)(scale))
3448
3449#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3450 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
3451 (__v2di)(__m128i)(index), \
3452 (__v2di)(__m128i)(v1), (int)(scale))
3453
3454#define _mm256_i64scatter_pd(addr, index, v1, scale) \
3455 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
3456 (__v4di)(__m256i)(index), \
3457 (__v4df)(__m256d)(v1), (int)(scale))
3458
3459#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3460 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
3461 (__v4di)(__m256i)(index), \
3462 (__v4df)(__m256d)(v1), (int)(scale))
3463
3464#define _mm256_i64scatter_epi64(addr, index, v1, scale) \
3465 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
3466 (__v4di)(__m256i)(index), \
3467 (__v4di)(__m256i)(v1), (int)(scale))
3468
3469#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3470 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
3471 (__v4di)(__m256i)(index), \
3472 (__v4di)(__m256i)(v1), (int)(scale))
3473
3474#define _mm_i64scatter_ps(addr, index, v1, scale) \
3475 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
3476 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3477 (int)(scale))
3478
3479#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3480 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
3481 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3482 (int)(scale))
3483
3484#define _mm_i64scatter_epi32(addr, index, v1, scale) \
3485 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
3486 (__v2di)(__m128i)(index), \
3487 (__v4si)(__m128i)(v1), (int)(scale))
3488
3489#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3490 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
3491 (__v2di)(__m128i)(index), \
3492 (__v4si)(__m128i)(v1), (int)(scale))
3493
3494#define _mm256_i64scatter_ps(addr, index, v1, scale) \
3495 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
3496 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3497 (int)(scale))
3498
3499#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3500 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
3501 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3502 (int)(scale))
3503
3504#define _mm256_i64scatter_epi32(addr, index, v1, scale) \
3505 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
3506 (__v4di)(__m256i)(index), \
3507 (__v4si)(__m128i)(v1), (int)(scale))
3508
3509#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3510 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
3511 (__v4di)(__m256i)(index), \
3512 (__v4si)(__m128i)(v1), (int)(scale))
3513
3514#define _mm_i32scatter_pd(addr, index, v1, scale) \
3515 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
3516 (__v4si)(__m128i)(index), \
3517 (__v2df)(__m128d)(v1), (int)(scale))
3518
3519#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3520 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
3521 (__v4si)(__m128i)(index), \
3522 (__v2df)(__m128d)(v1), (int)(scale))
3523
3524#define _mm_i32scatter_epi64(addr, index, v1, scale) \
3525 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
3526 (__v4si)(__m128i)(index), \
3527 (__v2di)(__m128i)(v1), (int)(scale))
3528
3529#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3530 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
3531 (__v4si)(__m128i)(index), \
3532 (__v2di)(__m128i)(v1), (int)(scale))
3533
3534#define _mm256_i32scatter_pd(addr, index, v1, scale) \
3535 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
3536 (__v4si)(__m128i)(index), \
3537 (__v4df)(__m256d)(v1), (int)(scale))
3538
3539#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3540 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
3541 (__v4si)(__m128i)(index), \
3542 (__v4df)(__m256d)(v1), (int)(scale))
3543
3544#define _mm256_i32scatter_epi64(addr, index, v1, scale) \
3545 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
3546 (__v4si)(__m128i)(index), \
3547 (__v4di)(__m256i)(v1), (int)(scale))
3548
3549#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3550 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
3551 (__v4si)(__m128i)(index), \
3552 (__v4di)(__m256i)(v1), (int)(scale))
3553
3554#define _mm_i32scatter_ps(addr, index, v1, scale) \
3555 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
3556 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3557 (int)(scale))
3558
3559#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3560 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
3561 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3562 (int)(scale))
3563
3564#define _mm_i32scatter_epi32(addr, index, v1, scale) \
3565 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
3566 (__v4si)(__m128i)(index), \
3567 (__v4si)(__m128i)(v1), (int)(scale))
3568
3569#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3570 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
3571 (__v4si)(__m128i)(index), \
3572 (__v4si)(__m128i)(v1), (int)(scale))
3573
3574#define _mm256_i32scatter_ps(addr, index, v1, scale) \
3575 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
3576 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3577 (int)(scale))
3578
3579#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3580 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
3581 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3582 (int)(scale))
3583
3584#define _mm256_i32scatter_epi32(addr, index, v1, scale) \
3585 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
3586 (__v8si)(__m256i)(index), \
3587 (__v8si)(__m256i)(v1), (int)(scale))
3588
3589#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3590 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
3591 (__v8si)(__m256i)(index), \
3592 (__v8si)(__m256i)(v1), (int)(scale))
3593
3594 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3595 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
3596 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3597 (__v2df)_mm_sqrt_pd(__A),
3598 (__v2df)__W);
3599 }
3600
3601 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3602 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
3603 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3604 (__v2df)_mm_sqrt_pd(__A),
3605 (__v2df)_mm_setzero_pd());
3606 }
3607
3608 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3609 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
3610 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3611 (__v4df)_mm256_sqrt_pd(__A),
3612 (__v4df)__W);
3613 }
3614
3615 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3616 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
3617 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3618 (__v4df)_mm256_sqrt_pd(__A),
3619 (__v4df)_mm256_setzero_pd());
3620 }
3621
3622 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3623 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
3624 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3625 (__v4sf)_mm_sqrt_ps(__A),
3626 (__v4sf)__W);
3627 }
3628
3629 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3630 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
3631 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3632 (__v4sf)_mm_sqrt_ps(__A),
3633 (__v4sf)_mm_setzero_ps());
3634 }
3635
3636 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3637 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
3638 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3639 (__v8sf)_mm256_sqrt_ps(__A),
3640 (__v8sf)__W);
3641 }
3642
3643 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3645 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3646 (__v8sf)_mm256_sqrt_ps(__A),
3647 (__v8sf)_mm256_setzero_ps());
3648 }
3649
3650 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3651 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3652 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3653 (__v2df)_mm_sub_pd(__A, __B),
3654 (__v2df)__W);
3655 }
3656
3657 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3658 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3659 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3660 (__v2df)_mm_sub_pd(__A, __B),
3661 (__v2df)_mm_setzero_pd());
3662 }
3663
3664 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3665 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3666 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3667 (__v4df)_mm256_sub_pd(__A, __B),
3668 (__v4df)__W);
3669 }
3670
3671 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3672 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3673 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3674 (__v4df)_mm256_sub_pd(__A, __B),
3675 (__v4df)_mm256_setzero_pd());
3676 }
3677
3678 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3679 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3680 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3681 (__v4sf)_mm_sub_ps(__A, __B),
3682 (__v4sf)__W);
3683 }
3684
3685 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3686 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3687 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3688 (__v4sf)_mm_sub_ps(__A, __B),
3689 (__v4sf)_mm_setzero_ps());
3690 }
3691
3692 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3693 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3694 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3695 (__v8sf)_mm256_sub_ps(__A, __B),
3696 (__v8sf)__W);
3697 }
3698
3699 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3700 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3701 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3702 (__v8sf)_mm256_sub_ps(__A, __B),
3703 (__v8sf)_mm256_setzero_ps());
3704 }
3705
3706 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3707 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
3708 return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
3709 (__v4si)__B);
3710 }
3711
3712 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3713 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I,
3714 __m128i __B) {
3715 return (__m128i)__builtin_ia32_selectd_128(__U,
3716 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3717 (__v4si)__A);
3718 }
3719
3720 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3721 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,
3722 __m128i __B) {
3723 return (__m128i)__builtin_ia32_selectd_128(__U,
3724 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3725 (__v4si)__I);
3726 }
3727
3728 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3729 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,
3730 __m128i __B) {
3731 return (__m128i)__builtin_ia32_selectd_128(__U,
3732 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3733 (__v4si)_mm_setzero_si128());
3734 }
3735
3736 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3737 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
3738 return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
3739 (__v8si) __B);
3740 }
3741
3742 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3743 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I,
3744 __m256i __B) {
3745 return (__m256i)__builtin_ia32_selectd_256(__U,
3746 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3747 (__v8si)__A);
3748 }
3749
3750 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3751 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U,
3752 __m256i __B) {
3753 return (__m256i)__builtin_ia32_selectd_256(__U,
3754 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3755 (__v8si)__I);
3756 }
3757
3758 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3759 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I,
3760 __m256i __B) {
3761 return (__m256i)__builtin_ia32_selectd_256(__U,
3762 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3763 (__v8si)_mm256_setzero_si256());
3764 }
3765
3766 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3767 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
3768 return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
3769 (__v2df)__B);
3770 }
3771
3772 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3773 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
3774 return (__m128d)__builtin_ia32_selectpd_128(__U,
3775 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3776 (__v2df)__A);
3777 }
3778
3779 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3780 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
3781 return (__m128d)__builtin_ia32_selectpd_128(__U,
3782 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3783 (__v2df)(__m128d)__I);
3784 }
3785
3786 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3787 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
3788 return (__m128d)__builtin_ia32_selectpd_128(__U,
3789 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3790 (__v2df)_mm_setzero_pd());
3791 }
3792
3793 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3794 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
3795 return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
3796 (__v4df)__B);
3797 }
3798
3799 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3800 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I,
3801 __m256d __B) {
3802 return (__m256d)__builtin_ia32_selectpd_256(__U,
3803 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3804 (__v4df)__A);
3805 }
3806
3807 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3808 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,
3809 __m256d __B) {
3810 return (__m256d)__builtin_ia32_selectpd_256(__U,
3811 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3812 (__v4df)(__m256d)__I);
3813 }
3814
3815 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3816 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,
3817 __m256d __B) {
3818 return (__m256d)__builtin_ia32_selectpd_256(__U,
3819 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3820 (__v4df)_mm256_setzero_pd());
3821 }
3822
3823 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3824 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
3825 return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
3826 (__v4sf)__B);
3827 }
3828
3829 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3830 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
3831 return (__m128)__builtin_ia32_selectps_128(__U,
3832 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3833 (__v4sf)__A);
3834 }
3835
3836 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3837 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
3838 return (__m128)__builtin_ia32_selectps_128(__U,
3839 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3840 (__v4sf)(__m128)__I);
3841 }
3842
3843 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3844 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
3845 return (__m128)__builtin_ia32_selectps_128(__U,
3846 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3847 (__v4sf)_mm_setzero_ps());
3848 }
3849
3850 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3851 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
3852 return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
3853 (__v8sf) __B);
3854 }
3855
3856 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3857 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
3858 return (__m256)__builtin_ia32_selectps_256(__U,
3859 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3860 (__v8sf)__A);
3861 }
3862
3863 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3864 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,
3865 __m256 __B) {
3866 return (__m256)__builtin_ia32_selectps_256(__U,
3867 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3868 (__v8sf)(__m256)__I);
3869 }
3870
3871 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3872 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I,
3873 __m256 __B) {
3874 return (__m256)__builtin_ia32_selectps_256(__U,
3875 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3876 (__v8sf)_mm256_setzero_ps());
3877 }
3878
3879 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3880 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
3881 return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
3882 (__v2di)__B);
3883 }
3884
3885 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3886 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I,
3887 __m128i __B) {
3888 return (__m128i)__builtin_ia32_selectq_128(__U,
3889 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3890 (__v2di)__A);
3891 }
3892
3893 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3894 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
3895 __m128i __B) {
3896 return (__m128i)__builtin_ia32_selectq_128(__U,
3897 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3898 (__v2di)__I);
3899 }
3900
3901 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3902 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I,
3903 __m128i __B) {
3904 return (__m128i)__builtin_ia32_selectq_128(__U,
3905 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3906 (__v2di)_mm_setzero_si128());
3907 }
3908
3909
3910 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3911 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
3912 return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
3913 (__v4di) __B);
3914 }
3915
3916 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3917 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I,
3918 __m256i __B) {
3919 return (__m256i)__builtin_ia32_selectq_256(__U,
3920 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3921 (__v4di)__A);
3922 }
3923
3924 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3925 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U,
3926 __m256i __B) {
3927 return (__m256i)__builtin_ia32_selectq_256(__U,
3928 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3929 (__v4di)__I);
3930 }
3931
3932 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3933 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I,
3934 __m256i __B) {
3935 return (__m256i)__builtin_ia32_selectq_256(__U,
3936 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3937 (__v4di)_mm256_setzero_si256());
3938 }
3939
3940 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3941 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3942 {
3943 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3944 (__v4si)_mm_cvtepi8_epi32(__A),
3945 (__v4si)__W);
3946 }
3947
3948 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3950 {
3951 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3952 (__v4si)_mm_cvtepi8_epi32(__A),
3953 (__v4si)_mm_setzero_si128());
3954 }
3955
3956 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3957 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3958 {
3959 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3960 (__v8si)_mm256_cvtepi8_epi32(__A),
3961 (__v8si)__W);
3962 }
3963
3964 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3966 {
3967 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3968 (__v8si)_mm256_cvtepi8_epi32(__A),
3969 (__v8si)_mm256_setzero_si256());
3970 }
3971
3972 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3973 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
3974 {
3975 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3976 (__v2di)_mm_cvtepi8_epi64(__A),
3977 (__v2di)__W);
3978 }
3979
3980 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3982 {
3983 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3984 (__v2di)_mm_cvtepi8_epi64(__A),
3985 (__v2di)_mm_setzero_si128());
3986 }
3987
3988 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3989 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
3990 {
3991 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3992 (__v4di)_mm256_cvtepi8_epi64(__A),
3993 (__v4di)__W);
3994 }
3995
3996 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3998 {
3999 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4000 (__v4di)_mm256_cvtepi8_epi64(__A),
4001 (__v4di)_mm256_setzero_si256());
4002 }
4003
4004 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4005 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4006 {
4007 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4008 (__v2di)_mm_cvtepi32_epi64(__X),
4009 (__v2di)__W);
4010 }
4011
4012 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4014 {
4015 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4016 (__v2di)_mm_cvtepi32_epi64(__X),
4017 (__v2di)_mm_setzero_si128());
4018 }
4019
4020 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4021 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4022 {
4023 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4024 (__v4di)_mm256_cvtepi32_epi64(__X),
4025 (__v4di)__W);
4026 }
4027
4028 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4030 {
4031 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4032 (__v4di)_mm256_cvtepi32_epi64(__X),
4033 (__v4di)_mm256_setzero_si256());
4034 }
4035
4036 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4037 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4038 {
4039 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4040 (__v4si)_mm_cvtepi16_epi32(__A),
4041 (__v4si)__W);
4042 }
4043
4044 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4046 {
4047 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4048 (__v4si)_mm_cvtepi16_epi32(__A),
4049 (__v4si)_mm_setzero_si128());
4050 }
4051
4052 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4053 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4054 {
4055 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4056 (__v8si)_mm256_cvtepi16_epi32(__A),
4057 (__v8si)__W);
4058 }
4059
4060 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4062 {
4063 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4064 (__v8si)_mm256_cvtepi16_epi32(__A),
4065 (__v8si)_mm256_setzero_si256());
4066 }
4067
4068 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4069 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4070 {
4071 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4072 (__v2di)_mm_cvtepi16_epi64(__A),
4073 (__v2di)__W);
4074 }
4075
4076 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4078 {
4079 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4080 (__v2di)_mm_cvtepi16_epi64(__A),
4081 (__v2di)_mm_setzero_si128());
4082 }
4083
4084 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4085 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4086 {
4087 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4088 (__v4di)_mm256_cvtepi16_epi64(__A),
4089 (__v4di)__W);
4090 }
4091
4092 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4094 {
4095 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4096 (__v4di)_mm256_cvtepi16_epi64(__A),
4097 (__v4di)_mm256_setzero_si256());
4098 }
4099
4100
4101 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4102 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4103 {
4104 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4105 (__v4si)_mm_cvtepu8_epi32(__A),
4106 (__v4si)__W);
4107 }
4108
4109 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4111 {
4112 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4113 (__v4si)_mm_cvtepu8_epi32(__A),
4114 (__v4si)_mm_setzero_si128());
4115 }
4116
4117 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4118 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4119 {
4120 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4121 (__v8si)_mm256_cvtepu8_epi32(__A),
4122 (__v8si)__W);
4123 }
4124
4125 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4127 {
4128 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4129 (__v8si)_mm256_cvtepu8_epi32(__A),
4130 (__v8si)_mm256_setzero_si256());
4131 }
4132
4133 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4134 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4135 {
4136 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4137 (__v2di)_mm_cvtepu8_epi64(__A),
4138 (__v2di)__W);
4139 }
4140
4141 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4143 {
4144 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4145 (__v2di)_mm_cvtepu8_epi64(__A),
4146 (__v2di)_mm_setzero_si128());
4147 }
4148
4149 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4150 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4151 {
4152 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4153 (__v4di)_mm256_cvtepu8_epi64(__A),
4154 (__v4di)__W);
4155 }
4156
4157 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4159 {
4160 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4161 (__v4di)_mm256_cvtepu8_epi64(__A),
4162 (__v4di)_mm256_setzero_si256());
4163 }
4164
4165 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4166 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4167 {
4168 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4169 (__v2di)_mm_cvtepu32_epi64(__X),
4170 (__v2di)__W);
4171 }
4172
4173 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4175 {
4176 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4177 (__v2di)_mm_cvtepu32_epi64(__X),
4178 (__v2di)_mm_setzero_si128());
4179 }
4180
4181 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4182 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4183 {
4184 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4185 (__v4di)_mm256_cvtepu32_epi64(__X),
4186 (__v4di)__W);
4187 }
4188
4189 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4191 {
4192 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4193 (__v4di)_mm256_cvtepu32_epi64(__X),
4194 (__v4di)_mm256_setzero_si256());
4195 }
4196
4197 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4198 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4199 {
4200 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4201 (__v4si)_mm_cvtepu16_epi32(__A),
4202 (__v4si)__W);
4203 }
4204
4205 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4207 {
4208 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4209 (__v4si)_mm_cvtepu16_epi32(__A),
4210 (__v4si)_mm_setzero_si128());
4211 }
4212
4213 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4214 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4215 {
4216 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4217 (__v8si)_mm256_cvtepu16_epi32(__A),
4218 (__v8si)__W);
4219 }
4220
4221 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4223 {
4224 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4225 (__v8si)_mm256_cvtepu16_epi32(__A),
4226 (__v8si)_mm256_setzero_si256());
4227 }
4228
4229 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4230 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4231 {
4232 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4233 (__v2di)_mm_cvtepu16_epi64(__A),
4234 (__v2di)__W);
4235 }
4236
4237 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4239 {
4240 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4241 (__v2di)_mm_cvtepu16_epi64(__A),
4242 (__v2di)_mm_setzero_si128());
4243 }
4244
4245 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4246 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4247 {
4248 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4249 (__v4di)_mm256_cvtepu16_epi64(__A),
4250 (__v4di)__W);
4251 }
4252
4253 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4255 {
4256 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4257 (__v4di)_mm256_cvtepu16_epi64(__A),
4258 (__v4di)_mm256_setzero_si256());
4259 }
4260
4261
4262#define _mm_rol_epi32(a, b) \
4263 ((__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b)))
4264
4265#define _mm_mask_rol_epi32(w, u, a, b) \
4266 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4267 (__v4si)_mm_rol_epi32((a), (b)), \
4268 (__v4si)(__m128i)(w)))
4269
4270#define _mm_maskz_rol_epi32(u, a, b) \
4271 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4272 (__v4si)_mm_rol_epi32((a), (b)), \
4273 (__v4si)_mm_setzero_si128()))
4274
4275#define _mm256_rol_epi32(a, b) \
4276 ((__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b)))
4277
4278#define _mm256_mask_rol_epi32(w, u, a, b) \
4279 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4280 (__v8si)_mm256_rol_epi32((a), (b)), \
4281 (__v8si)(__m256i)(w)))
4282
4283#define _mm256_maskz_rol_epi32(u, a, b) \
4284 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4285 (__v8si)_mm256_rol_epi32((a), (b)), \
4286 (__v8si)_mm256_setzero_si256()))
4287
4288#define _mm_rol_epi64(a, b) \
4289 ((__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b)))
4290
4291#define _mm_mask_rol_epi64(w, u, a, b) \
4292 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4293 (__v2di)_mm_rol_epi64((a), (b)), \
4294 (__v2di)(__m128i)(w)))
4295
4296#define _mm_maskz_rol_epi64(u, a, b) \
4297 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4298 (__v2di)_mm_rol_epi64((a), (b)), \
4299 (__v2di)_mm_setzero_si128()))
4300
4301#define _mm256_rol_epi64(a, b) \
4302 ((__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b)))
4303
4304#define _mm256_mask_rol_epi64(w, u, a, b) \
4305 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4306 (__v4di)_mm256_rol_epi64((a), (b)), \
4307 (__v4di)(__m256i)(w)))
4308
4309#define _mm256_maskz_rol_epi64(u, a, b) \
4310 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4311 (__v4di)_mm256_rol_epi64((a), (b)), \
4312 (__v4di)_mm256_setzero_si256()))
4313
4314static __inline__ __m128i __DEFAULT_FN_ATTRS128
4315_mm_rolv_epi32 (__m128i __A, __m128i __B)
4316{
4317 return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B);
4318}
4319
4320static __inline__ __m128i __DEFAULT_FN_ATTRS128
4321_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4322{
4323 return (__m128i)__builtin_ia32_selectd_128(__U,
4324 (__v4si)_mm_rolv_epi32(__A, __B),
4325 (__v4si)__W);
4326}
4327
4328static __inline__ __m128i __DEFAULT_FN_ATTRS128
4329_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4330{
4331 return (__m128i)__builtin_ia32_selectd_128(__U,
4332 (__v4si)_mm_rolv_epi32(__A, __B),
4333 (__v4si)_mm_setzero_si128());
4334}
4335
4336static __inline__ __m256i __DEFAULT_FN_ATTRS256
4337_mm256_rolv_epi32 (__m256i __A, __m256i __B)
4338{
4339 return (__m256i)__builtin_elementwise_fshl((__v8su)__A, (__v8su)__A, (__v8su)__B);
4340}
4341
4342static __inline__ __m256i __DEFAULT_FN_ATTRS256
4343_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4344{
4345 return (__m256i)__builtin_ia32_selectd_256(__U,
4346 (__v8si)_mm256_rolv_epi32(__A, __B),
4347 (__v8si)__W);
4348}
4349
4350static __inline__ __m256i __DEFAULT_FN_ATTRS256
4351_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4352{
4353 return (__m256i)__builtin_ia32_selectd_256(__U,
4354 (__v8si)_mm256_rolv_epi32(__A, __B),
4355 (__v8si)_mm256_setzero_si256());
4356}
4357
4358static __inline__ __m128i __DEFAULT_FN_ATTRS128
4359_mm_rolv_epi64 (__m128i __A, __m128i __B)
4360{
4361 return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B);
4362}
4363
4364static __inline__ __m128i __DEFAULT_FN_ATTRS128
4365_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4366{
4367 return (__m128i)__builtin_ia32_selectq_128(__U,
4368 (__v2di)_mm_rolv_epi64(__A, __B),
4369 (__v2di)__W);
4370}
4371
4372static __inline__ __m128i __DEFAULT_FN_ATTRS128
4373_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4374{
4375 return (__m128i)__builtin_ia32_selectq_128(__U,
4376 (__v2di)_mm_rolv_epi64(__A, __B),
4377 (__v2di)_mm_setzero_si128());
4378}
4379
4380static __inline__ __m256i __DEFAULT_FN_ATTRS256
4381_mm256_rolv_epi64 (__m256i __A, __m256i __B)
4382{
4383 return (__m256i)__builtin_elementwise_fshl((__v4du)__A, (__v4du)__A, (__v4du)__B);
4384}
4385
4386static __inline__ __m256i __DEFAULT_FN_ATTRS256
4387_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4388{
4389 return (__m256i)__builtin_ia32_selectq_256(__U,
4390 (__v4di)_mm256_rolv_epi64(__A, __B),
4391 (__v4di)__W);
4392}
4393
4394static __inline__ __m256i __DEFAULT_FN_ATTRS256
4395_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4396{
4397 return (__m256i)__builtin_ia32_selectq_256(__U,
4398 (__v4di)_mm256_rolv_epi64(__A, __B),
4399 (__v4di)_mm256_setzero_si256());
4400}
4401
4402#define _mm_ror_epi32(a, b) \
4403 ((__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b)))
4404
4405#define _mm_mask_ror_epi32(w, u, a, b) \
4406 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4407 (__v4si)_mm_ror_epi32((a), (b)), \
4408 (__v4si)(__m128i)(w)))
4409
4410#define _mm_maskz_ror_epi32(u, a, b) \
4411 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4412 (__v4si)_mm_ror_epi32((a), (b)), \
4413 (__v4si)_mm_setzero_si128()))
4414
4415#define _mm256_ror_epi32(a, b) \
4416 ((__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b)))
4417
4418#define _mm256_mask_ror_epi32(w, u, a, b) \
4419 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4420 (__v8si)_mm256_ror_epi32((a), (b)), \
4421 (__v8si)(__m256i)(w)))
4422
4423#define _mm256_maskz_ror_epi32(u, a, b) \
4424 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4425 (__v8si)_mm256_ror_epi32((a), (b)), \
4426 (__v8si)_mm256_setzero_si256()))
4427
4428#define _mm_ror_epi64(a, b) \
4429 ((__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b)))
4430
4431#define _mm_mask_ror_epi64(w, u, a, b) \
4432 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4433 (__v2di)_mm_ror_epi64((a), (b)), \
4434 (__v2di)(__m128i)(w)))
4435
4436#define _mm_maskz_ror_epi64(u, a, b) \
4437 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4438 (__v2di)_mm_ror_epi64((a), (b)), \
4439 (__v2di)_mm_setzero_si128()))
4440
4441#define _mm256_ror_epi64(a, b) \
4442 ((__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b)))
4443
4444#define _mm256_mask_ror_epi64(w, u, a, b) \
4445 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4446 (__v4di)_mm256_ror_epi64((a), (b)), \
4447 (__v4di)(__m256i)(w)))
4448
4449#define _mm256_maskz_ror_epi64(u, a, b) \
4450 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4451 (__v4di)_mm256_ror_epi64((a), (b)), \
4452 (__v4di)_mm256_setzero_si256()))
4453
4454static __inline__ __m128i __DEFAULT_FN_ATTRS128
4455_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4456{
4457 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4458 (__v4si)_mm_sll_epi32(__A, __B),
4459 (__v4si)__W);
4460}
4461
4462static __inline__ __m128i __DEFAULT_FN_ATTRS128
4463_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4464{
4465 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4466 (__v4si)_mm_sll_epi32(__A, __B),
4467 (__v4si)_mm_setzero_si128());
4468}
4469
4470static __inline__ __m256i __DEFAULT_FN_ATTRS256
4471_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4472{
4473 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4474 (__v8si)_mm256_sll_epi32(__A, __B),
4475 (__v8si)__W);
4476}
4477
4478static __inline__ __m256i __DEFAULT_FN_ATTRS256
4479_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4480{
4481 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4482 (__v8si)_mm256_sll_epi32(__A, __B),
4483 (__v8si)_mm256_setzero_si256());
4484}
4485
4486static __inline__ __m128i __DEFAULT_FN_ATTRS128
4487_mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4488{
4489 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4490 (__v4si)_mm_slli_epi32(__A, (int)__B),
4491 (__v4si)__W);
4492}
4493
4494static __inline__ __m128i __DEFAULT_FN_ATTRS128
4495_mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
4496{
4497 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4498 (__v4si)_mm_slli_epi32(__A, (int)__B),
4499 (__v4si)_mm_setzero_si128());
4500}
4501
4502static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4503_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A,
4504 unsigned int __B) {
4505 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4506 (__v8si)_mm256_slli_epi32(__A, (int)__B),
4507 (__v8si)__W);
4508}
4509
4510static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4511_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) {
4512 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4513 (__v8si)_mm256_slli_epi32(__A, (int)__B),
4514 (__v8si)_mm256_setzero_si256());
4515}
4516
4517static __inline__ __m128i __DEFAULT_FN_ATTRS128
4518_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4519{
4520 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4521 (__v2di)_mm_sll_epi64(__A, __B),
4522 (__v2di)__W);
4523}
4524
4525static __inline__ __m128i __DEFAULT_FN_ATTRS128
4526_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4527{
4528 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4529 (__v2di)_mm_sll_epi64(__A, __B),
4530 (__v2di)_mm_setzero_si128());
4531}
4532
4533static __inline__ __m256i __DEFAULT_FN_ATTRS256
4534_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4535{
4536 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4537 (__v4di)_mm256_sll_epi64(__A, __B),
4538 (__v4di)__W);
4539}
4540
4541static __inline__ __m256i __DEFAULT_FN_ATTRS256
4542_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4543{
4544 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4545 (__v4di)_mm256_sll_epi64(__A, __B),
4546 (__v4di)_mm256_setzero_si256());
4547}
4548
4549static __inline__ __m128i __DEFAULT_FN_ATTRS128
4550_mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4551{
4552 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4553 (__v2di)_mm_slli_epi64(__A, (int)__B),
4554 (__v2di)__W);
4555}
4556
4557static __inline__ __m128i __DEFAULT_FN_ATTRS128
4558_mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
4559{
4560 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4561 (__v2di)_mm_slli_epi64(__A, (int)__B),
4562 (__v2di)_mm_setzero_si128());
4563}
4564
4565static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4566_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A,
4567 unsigned int __B) {
4568 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4569 (__v4di)_mm256_slli_epi64(__A, (int)__B),
4570 (__v4di)__W);
4571}
4572
4573static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4574_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) {
4575 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4576 (__v4di)_mm256_slli_epi64(__A, (int)__B),
4577 (__v4di)_mm256_setzero_si256());
4578}
4579
4580static __inline__ __m128i __DEFAULT_FN_ATTRS128
4581_mm_rorv_epi32 (__m128i __A, __m128i __B)
4582{
4583 return (__m128i)__builtin_elementwise_fshr((__v4su)__A, (__v4su)__A, (__v4su)__B);
4584}
4585
4586static __inline__ __m128i __DEFAULT_FN_ATTRS128
4587_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4588{
4589 return (__m128i)__builtin_ia32_selectd_128(__U,
4590 (__v4si)_mm_rorv_epi32(__A, __B),
4591 (__v4si)__W);
4592}
4593
4594static __inline__ __m128i __DEFAULT_FN_ATTRS128
4595_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4596{
4597 return (__m128i)__builtin_ia32_selectd_128(__U,
4598 (__v4si)_mm_rorv_epi32(__A, __B),
4599 (__v4si)_mm_setzero_si128());
4600}
4601
4602static __inline__ __m256i __DEFAULT_FN_ATTRS256
4603_mm256_rorv_epi32 (__m256i __A, __m256i __B)
4604{
4605 return (__m256i)__builtin_elementwise_fshr((__v8su)__A, (__v8su)__A, (__v8su)__B);
4606}
4607
4608static __inline__ __m256i __DEFAULT_FN_ATTRS256
4609_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4610{
4611 return (__m256i)__builtin_ia32_selectd_256(__U,
4612 (__v8si)_mm256_rorv_epi32(__A, __B),
4613 (__v8si)__W);
4614}
4615
4616static __inline__ __m256i __DEFAULT_FN_ATTRS256
4617_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4618{
4619 return (__m256i)__builtin_ia32_selectd_256(__U,
4620 (__v8si)_mm256_rorv_epi32(__A, __B),
4621 (__v8si)_mm256_setzero_si256());
4622}
4623
4624static __inline__ __m128i __DEFAULT_FN_ATTRS128
4625_mm_rorv_epi64 (__m128i __A, __m128i __B)
4626{
4627 return (__m128i)__builtin_elementwise_fshr((__v2du)__A, (__v2du)__A, (__v2du)__B);
4628}
4629
4630static __inline__ __m128i __DEFAULT_FN_ATTRS128
4631_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4632{
4633 return (__m128i)__builtin_ia32_selectq_128(__U,
4634 (__v2di)_mm_rorv_epi64(__A, __B),
4635 (__v2di)__W);
4636}
4637
4638static __inline__ __m128i __DEFAULT_FN_ATTRS128
4639_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4640{
4641 return (__m128i)__builtin_ia32_selectq_128(__U,
4642 (__v2di)_mm_rorv_epi64(__A, __B),
4643 (__v2di)_mm_setzero_si128());
4644}
4645
4646static __inline__ __m256i __DEFAULT_FN_ATTRS256
4647_mm256_rorv_epi64 (__m256i __A, __m256i __B)
4648{
4649 return (__m256i)__builtin_elementwise_fshr((__v4du)__A, (__v4du)__A, (__v4du)__B);
4650}
4651
4652static __inline__ __m256i __DEFAULT_FN_ATTRS256
4653_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4654{
4655 return (__m256i)__builtin_ia32_selectq_256(__U,
4656 (__v4di)_mm256_rorv_epi64(__A, __B),
4657 (__v4di)__W);
4658}
4659
4660static __inline__ __m256i __DEFAULT_FN_ATTRS256
4661_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4662{
4663 return (__m256i)__builtin_ia32_selectq_256(__U,
4664 (__v4di)_mm256_rorv_epi64(__A, __B),
4665 (__v4di)_mm256_setzero_si256());
4666}
4667
4668static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4669_mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4670{
4671 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4672 (__v2di)_mm_sllv_epi64(__X, __Y),
4673 (__v2di)__W);
4674}
4675
4676static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4677_mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4678{
4679 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4680 (__v2di)_mm_sllv_epi64(__X, __Y),
4681 (__v2di)_mm_setzero_si128());
4682}
4683
4684static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4685_mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4686{
4687 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4688 (__v4di)_mm256_sllv_epi64(__X, __Y),
4689 (__v4di)__W);
4690}
4691
4692static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4693_mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4694{
4695 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4696 (__v4di)_mm256_sllv_epi64(__X, __Y),
4697 (__v4di)_mm256_setzero_si256());
4698}
4699
4700static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4701_mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4702{
4703 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4704 (__v4si)_mm_sllv_epi32(__X, __Y),
4705 (__v4si)__W);
4706}
4707
4708static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4709_mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4710{
4711 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4712 (__v4si)_mm_sllv_epi32(__X, __Y),
4713 (__v4si)_mm_setzero_si128());
4714}
4715
4716static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4717_mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4718{
4719 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4720 (__v8si)_mm256_sllv_epi32(__X, __Y),
4721 (__v8si)__W);
4722}
4723
4724static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4725_mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4726{
4727 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4728 (__v8si)_mm256_sllv_epi32(__X, __Y),
4729 (__v8si)_mm256_setzero_si256());
4730}
4731
4732static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4733_mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4734{
4735 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4736 (__v2di)_mm_srlv_epi64(__X, __Y),
4737 (__v2di)__W);
4738}
4739
4740static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4741_mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4742{
4743 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4744 (__v2di)_mm_srlv_epi64(__X, __Y),
4745 (__v2di)_mm_setzero_si128());
4746}
4747
4748static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4749_mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4750{
4751 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4752 (__v4di)_mm256_srlv_epi64(__X, __Y),
4753 (__v4di)__W);
4754}
4755
4756static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4757_mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4758{
4759 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4760 (__v4di)_mm256_srlv_epi64(__X, __Y),
4761 (__v4di)_mm256_setzero_si256());
4762}
4763
4764static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4765_mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4766{
4767 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4768 (__v4si)_mm_srlv_epi32(__X, __Y),
4769 (__v4si)__W);
4770}
4771
4772static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4773_mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4774{
4775 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4776 (__v4si)_mm_srlv_epi32(__X, __Y),
4777 (__v4si)_mm_setzero_si128());
4778}
4779
4780static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4781_mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4782{
4783 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4784 (__v8si)_mm256_srlv_epi32(__X, __Y),
4785 (__v8si)__W);
4786}
4787
4788static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4789_mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4790{
4791 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4792 (__v8si)_mm256_srlv_epi32(__X, __Y),
4793 (__v8si)_mm256_setzero_si256());
4794}
4795
4796static __inline__ __m128i __DEFAULT_FN_ATTRS128
4797_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4798{
4799 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4800 (__v4si)_mm_srl_epi32(__A, __B),
4801 (__v4si)__W);
4802}
4803
4804static __inline__ __m128i __DEFAULT_FN_ATTRS128
4805_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4806{
4807 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4808 (__v4si)_mm_srl_epi32(__A, __B),
4809 (__v4si)_mm_setzero_si128());
4810}
4811
4812static __inline__ __m256i __DEFAULT_FN_ATTRS256
4813_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4814{
4815 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4816 (__v8si)_mm256_srl_epi32(__A, __B),
4817 (__v8si)__W);
4818}
4819
4820static __inline__ __m256i __DEFAULT_FN_ATTRS256
4821_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4822{
4823 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4824 (__v8si)_mm256_srl_epi32(__A, __B),
4825 (__v8si)_mm256_setzero_si256());
4826}
4827
4828static __inline__ __m128i __DEFAULT_FN_ATTRS128
4829_mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4830{
4831 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4832 (__v4si)_mm_srli_epi32(__A, (int)__B),
4833 (__v4si)__W);
4834}
4835
4836static __inline__ __m128i __DEFAULT_FN_ATTRS128
4837_mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
4838{
4839 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4840 (__v4si)_mm_srli_epi32(__A, (int)__B),
4841 (__v4si)_mm_setzero_si128());
4842}
4843
4844static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4845_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A,
4846 unsigned int __B) {
4847 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4848 (__v8si)_mm256_srli_epi32(__A, (int)__B),
4849 (__v8si)__W);
4850}
4851
4852static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4853_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) {
4854 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4855 (__v8si)_mm256_srli_epi32(__A, (int)__B),
4856 (__v8si)_mm256_setzero_si256());
4857}
4858
4859static __inline__ __m128i __DEFAULT_FN_ATTRS128
4860_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4861{
4862 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4863 (__v2di)_mm_srl_epi64(__A, __B),
4864 (__v2di)__W);
4865}
4866
4867static __inline__ __m128i __DEFAULT_FN_ATTRS128
4868_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4869{
4870 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4871 (__v2di)_mm_srl_epi64(__A, __B),
4872 (__v2di)_mm_setzero_si128());
4873}
4874
4875static __inline__ __m256i __DEFAULT_FN_ATTRS256
4876_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4877{
4878 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4879 (__v4di)_mm256_srl_epi64(__A, __B),
4880 (__v4di)__W);
4881}
4882
4883static __inline__ __m256i __DEFAULT_FN_ATTRS256
4884_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4885{
4886 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4887 (__v4di)_mm256_srl_epi64(__A, __B),
4888 (__v4di)_mm256_setzero_si256());
4889}
4890
4891static __inline__ __m128i __DEFAULT_FN_ATTRS128
4892_mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4893{
4894 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4895 (__v2di)_mm_srli_epi64(__A, (int)__B),
4896 (__v2di)__W);
4897}
4898
4899static __inline__ __m128i __DEFAULT_FN_ATTRS128
4900_mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
4901{
4902 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4903 (__v2di)_mm_srli_epi64(__A, (int)__B),
4904 (__v2di)_mm_setzero_si128());
4905}
4906
4907static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4908_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A,
4909 unsigned int __B) {
4910 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4911 (__v4di)_mm256_srli_epi64(__A, (int)__B),
4912 (__v4di)__W);
4913}
4914
4915static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4916_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) {
4917 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4918 (__v4di)_mm256_srli_epi64(__A, (int)__B),
4919 (__v4di)_mm256_setzero_si256());
4920}
4921
4922static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4923_mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4924{
4925 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4926 (__v4si)_mm_srav_epi32(__X, __Y),
4927 (__v4si)__W);
4928}
4929
4930static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
4931_mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4932{
4933 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4934 (__v4si)_mm_srav_epi32(__X, __Y),
4935 (__v4si)_mm_setzero_si128());
4936}
4937
4938static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4939_mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4940{
4941 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4942 (__v8si)_mm256_srav_epi32(__X, __Y),
4943 (__v8si)__W);
4944}
4945
4946static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
4947_mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4948{
4949 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4950 (__v8si)_mm256_srav_epi32(__X, __Y),
4951 (__v8si)_mm256_setzero_si256());
4952}
4953
4954static __inline__ __m128i __DEFAULT_FN_ATTRS128
4955_mm_srav_epi64(__m128i __X, __m128i __Y)
4956{
4957 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
4958}
4959
4960static __inline__ __m128i __DEFAULT_FN_ATTRS128
4961_mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4962{
4963 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4964 (__v2di)_mm_srav_epi64(__X, __Y),
4965 (__v2di)__W);
4966}
4967
4968static __inline__ __m128i __DEFAULT_FN_ATTRS128
4969_mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4970{
4971 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4972 (__v2di)_mm_srav_epi64(__X, __Y),
4973 (__v2di)_mm_setzero_si128());
4974}
4975
4976static __inline__ __m256i __DEFAULT_FN_ATTRS256
4977_mm256_srav_epi64(__m256i __X, __m256i __Y)
4978{
4979 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
4980}
4981
4982static __inline__ __m256i __DEFAULT_FN_ATTRS256
4983_mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4984{
4985 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4986 (__v4di)_mm256_srav_epi64(__X, __Y),
4987 (__v4di)__W);
4988}
4989
4990static __inline__ __m256i __DEFAULT_FN_ATTRS256
4991_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
4992{
4993 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4994 (__v4di)_mm256_srav_epi64(__X, __Y),
4995 (__v4di)_mm256_setzero_si256());
4996}
4997
4998static __inline__ __m128i __DEFAULT_FN_ATTRS128
4999_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5000{
5001 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5002 (__v4si) __A,
5003 (__v4si) __W);
5004}
5005
5006static __inline__ __m128i __DEFAULT_FN_ATTRS128
5008{
5009 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5010 (__v4si) __A,
5011 (__v4si) _mm_setzero_si128 ());
5012}
5013
5014
5015static __inline__ __m256i __DEFAULT_FN_ATTRS256
5016_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5017{
5018 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5019 (__v8si) __A,
5020 (__v8si) __W);
5021}
5022
5023static __inline__ __m256i __DEFAULT_FN_ATTRS256
5025{
5026 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5027 (__v8si) __A,
5028 (__v8si) _mm256_setzero_si256 ());
5029}
5030
5031static __inline __m128i __DEFAULT_FN_ATTRS128
5032_mm_load_epi32 (void const *__P)
5033{
5034 return *(const __m128i *) __P;
5035}
5036
5037static __inline__ __m128i __DEFAULT_FN_ATTRS128
5038_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5039{
5040 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
5041 (__v4si) __W,
5042 (__mmask8)
5043 __U);
5044}
5045
5046static __inline__ __m128i __DEFAULT_FN_ATTRS128
5048{
5049 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
5050 (__v4si)
5052 (__mmask8)
5053 __U);
5054}
5055
5056static __inline __m256i __DEFAULT_FN_ATTRS256
5058{
5059 return *(const __m256i *) __P;
5060}
5061
5062static __inline__ __m256i __DEFAULT_FN_ATTRS256
5063_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5064{
5065 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
5066 (__v8si) __W,
5067 (__mmask8)
5068 __U);
5069}
5070
5071static __inline__ __m256i __DEFAULT_FN_ATTRS256
5073{
5074 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
5075 (__v8si)
5077 (__mmask8)
5078 __U);
5079}
5080
5081static __inline void __DEFAULT_FN_ATTRS128
5082_mm_store_epi32 (void *__P, __m128i __A)
5083{
5084 *(__m128i *) __P = __A;
5085}
5086
5087static __inline__ void __DEFAULT_FN_ATTRS128
5088_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
5089{
5090 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5091 (__v4si) __A,
5092 (__mmask8) __U);
5093}
5094
5095static __inline void __DEFAULT_FN_ATTRS256
5096_mm256_store_epi32 (void *__P, __m256i __A)
5097{
5098 *(__m256i *) __P = __A;
5099}
5100
5101static __inline__ void __DEFAULT_FN_ATTRS256
5102_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
5103{
5104 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5105 (__v8si) __A,
5106 (__mmask8) __U);
5107}
5108
5109static __inline__ __m128i __DEFAULT_FN_ATTRS128
5110_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5111{
5112 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5113 (__v2di) __A,
5114 (__v2di) __W);
5115}
5116
5117static __inline__ __m128i __DEFAULT_FN_ATTRS128
5119{
5120 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5121 (__v2di) __A,
5122 (__v2di) _mm_setzero_si128 ());
5123}
5124
5125static __inline__ __m256i __DEFAULT_FN_ATTRS256
5126_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5127{
5128 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5129 (__v4di) __A,
5130 (__v4di) __W);
5131}
5132
5133static __inline__ __m256i __DEFAULT_FN_ATTRS256
5135{
5136 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5137 (__v4di) __A,
5138 (__v4di) _mm256_setzero_si256 ());
5139}
5140
5141static __inline __m128i __DEFAULT_FN_ATTRS128
5142_mm_load_epi64 (void const *__P)
5143{
5144 return *(const __m128i *) __P;
5145}
5146
5147static __inline__ __m128i __DEFAULT_FN_ATTRS128
5148_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5149{
5150 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
5151 (__v2di) __W,
5152 (__mmask8)
5153 __U);
5154}
5155
5156static __inline__ __m128i __DEFAULT_FN_ATTRS128
5158{
5159 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
5160 (__v2di)
5162 (__mmask8)
5163 __U);
5164}
5165
5166static __inline __m256i __DEFAULT_FN_ATTRS256
5168{
5169 return *(const __m256i *) __P;
5170}
5171
5172static __inline__ __m256i __DEFAULT_FN_ATTRS256
5173_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5174{
5175 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
5176 (__v4di) __W,
5177 (__mmask8)
5178 __U);
5179}
5180
5181static __inline__ __m256i __DEFAULT_FN_ATTRS256
5183{
5184 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
5185 (__v4di)
5187 (__mmask8)
5188 __U);
5189}
5190
5191static __inline void __DEFAULT_FN_ATTRS128
5192_mm_store_epi64 (void *__P, __m128i __A)
5193{
5194 *(__m128i *) __P = __A;
5195}
5196
5197static __inline__ void __DEFAULT_FN_ATTRS128
5198_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
5199{
5200 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5201 (__v2di) __A,
5202 (__mmask8) __U);
5203}
5204
5205static __inline void __DEFAULT_FN_ATTRS256
5206_mm256_store_epi64 (void *__P, __m256i __A)
5207{
5208 *(__m256i *) __P = __A;
5209}
5210
5211static __inline__ void __DEFAULT_FN_ATTRS256
5212_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
5213{
5214 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5215 (__v4di) __A,
5216 (__mmask8) __U);
5217}
5218
5219static __inline__ __m128d __DEFAULT_FN_ATTRS128
5220_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5221{
5222 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5223 (__v2df)_mm_movedup_pd(__A),
5224 (__v2df)__W);
5225}
5226
5227static __inline__ __m128d __DEFAULT_FN_ATTRS128
5229{
5230 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5231 (__v2df)_mm_movedup_pd(__A),
5232 (__v2df)_mm_setzero_pd());
5233}
5234
5235static __inline__ __m256d __DEFAULT_FN_ATTRS256
5236_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5237{
5238 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5239 (__v4df)_mm256_movedup_pd(__A),
5240 (__v4df)__W);
5241}
5242
5243static __inline__ __m256d __DEFAULT_FN_ATTRS256
5245{
5246 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5247 (__v4df)_mm256_movedup_pd(__A),
5248 (__v4df)_mm256_setzero_pd());
5249}
5250
5251static __inline__ __m128i __DEFAULT_FN_ATTRS128
5252_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
5253{
5254 return (__m128i)__builtin_ia32_selectd_128(__M,
5255 (__v4si) _mm_set1_epi32(__A),
5256 (__v4si)__O);
5257}
5258
5259static __inline__ __m128i __DEFAULT_FN_ATTRS128
5261{
5262 return (__m128i)__builtin_ia32_selectd_128(__M,
5263 (__v4si) _mm_set1_epi32(__A),
5264 (__v4si)_mm_setzero_si128());
5265}
5266
5267static __inline__ __m256i __DEFAULT_FN_ATTRS256
5268_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
5269{
5270 return (__m256i)__builtin_ia32_selectd_256(__M,
5271 (__v8si) _mm256_set1_epi32(__A),
5272 (__v8si)__O);
5273}
5274
5275static __inline__ __m256i __DEFAULT_FN_ATTRS256
5277{
5278 return (__m256i)__builtin_ia32_selectd_256(__M,
5279 (__v8si) _mm256_set1_epi32(__A),
5280 (__v8si)_mm256_setzero_si256());
5281}
5282
5283
5284static __inline__ __m128i __DEFAULT_FN_ATTRS128
5285_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
5286{
5287 return (__m128i) __builtin_ia32_selectq_128(__M,
5288 (__v2di) _mm_set1_epi64x(__A),
5289 (__v2di) __O);
5290}
5291
5292static __inline__ __m128i __DEFAULT_FN_ATTRS128
5293_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
5294{
5295 return (__m128i) __builtin_ia32_selectq_128(__M,
5296 (__v2di) _mm_set1_epi64x(__A),
5297 (__v2di) _mm_setzero_si128());
5298}
5299
5300static __inline__ __m256i __DEFAULT_FN_ATTRS256
5301_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
5302{
5303 return (__m256i) __builtin_ia32_selectq_256(__M,
5304 (__v4di) _mm256_set1_epi64x(__A),
5305 (__v4di) __O) ;
5306}
5307
5308static __inline__ __m256i __DEFAULT_FN_ATTRS256
5310{
5311 return (__m256i) __builtin_ia32_selectq_256(__M,
5312 (__v4di) _mm256_set1_epi64x(__A),
5313 (__v4di) _mm256_setzero_si256());
5314}
5315
5316#define _mm_fixupimm_pd(A, B, C, imm) \
5317 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5318 (__v2df)(__m128d)(B), \
5319 (__v2di)(__m128i)(C), (int)(imm), \
5320 (__mmask8)-1))
5321
5322#define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
5323 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5324 (__v2df)(__m128d)(B), \
5325 (__v2di)(__m128i)(C), (int)(imm), \
5326 (__mmask8)(U)))
5327
5328#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
5329 ((__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5330 (__v2df)(__m128d)(B), \
5331 (__v2di)(__m128i)(C), \
5332 (int)(imm), (__mmask8)(U)))
5333
5334#define _mm256_fixupimm_pd(A, B, C, imm) \
5335 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5336 (__v4df)(__m256d)(B), \
5337 (__v4di)(__m256i)(C), (int)(imm), \
5338 (__mmask8)-1))
5339
5340#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
5341 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5342 (__v4df)(__m256d)(B), \
5343 (__v4di)(__m256i)(C), (int)(imm), \
5344 (__mmask8)(U)))
5345
5346#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
5347 ((__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5348 (__v4df)(__m256d)(B), \
5349 (__v4di)(__m256i)(C), \
5350 (int)(imm), (__mmask8)(U)))
5351
5352#define _mm_fixupimm_ps(A, B, C, imm) \
5353 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5354 (__v4sf)(__m128)(B), \
5355 (__v4si)(__m128i)(C), (int)(imm), \
5356 (__mmask8)-1))
5357
5358#define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
5359 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5360 (__v4sf)(__m128)(B), \
5361 (__v4si)(__m128i)(C), (int)(imm), \
5362 (__mmask8)(U)))
5363
5364#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
5365 ((__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5366 (__v4sf)(__m128)(B), \
5367 (__v4si)(__m128i)(C), (int)(imm), \
5368 (__mmask8)(U)))
5369
5370#define _mm256_fixupimm_ps(A, B, C, imm) \
5371 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5372 (__v8sf)(__m256)(B), \
5373 (__v8si)(__m256i)(C), (int)(imm), \
5374 (__mmask8)-1))
5375
5376#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
5377 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5378 (__v8sf)(__m256)(B), \
5379 (__v8si)(__m256i)(C), (int)(imm), \
5380 (__mmask8)(U)))
5381
5382#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
5383 ((__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5384 (__v8sf)(__m256)(B), \
5385 (__v8si)(__m256i)(C), (int)(imm), \
5386 (__mmask8)(U)))
5387
5388static __inline__ __m128d __DEFAULT_FN_ATTRS128
5389_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
5390{
5391 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
5392 (__v2df) __W,
5393 (__mmask8) __U);
5394}
5395
5396static __inline__ __m128d __DEFAULT_FN_ATTRS128
5398{
5399 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
5400 (__v2df)
5401 _mm_setzero_pd (),
5402 (__mmask8) __U);
5403}
5404
5405static __inline__ __m256d __DEFAULT_FN_ATTRS256
5406_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
5407{
5408 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
5409 (__v4df) __W,
5410 (__mmask8) __U);
5411}
5412
5413static __inline__ __m256d __DEFAULT_FN_ATTRS256
5415{
5416 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
5417 (__v4df)
5419 (__mmask8) __U);
5420}
5421
5422static __inline__ __m128 __DEFAULT_FN_ATTRS128
5423_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
5424{
5425 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
5426 (__v4sf) __W,
5427 (__mmask8) __U);
5428}
5429
5430static __inline__ __m128 __DEFAULT_FN_ATTRS128
5432{
5433 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
5434 (__v4sf)
5435 _mm_setzero_ps (),
5436 (__mmask8) __U);
5437}
5438
5439static __inline__ __m256 __DEFAULT_FN_ATTRS256
5440_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
5441{
5442 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
5443 (__v8sf) __W,
5444 (__mmask8) __U);
5445}
5446
5447static __inline__ __m256 __DEFAULT_FN_ATTRS256
5449{
5450 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
5451 (__v8sf)
5453 (__mmask8) __U);
5454}
5455
5456static __inline __m128i __DEFAULT_FN_ATTRS128
5458{
5459 struct __loadu_epi64 {
5460 __m128i_u __v;
5461 } __attribute__((__packed__, __may_alias__));
5462 return ((const struct __loadu_epi64*)__P)->__v;
5463}
5464
5465static __inline__ __m128i __DEFAULT_FN_ATTRS128
5466_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5467{
5468 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
5469 (__v2di) __W,
5470 (__mmask8) __U);
5471}
5472
5473static __inline__ __m128i __DEFAULT_FN_ATTRS128
5475{
5476 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
5477 (__v2di)
5479 (__mmask8) __U);
5480}
5481
5482static __inline __m256i __DEFAULT_FN_ATTRS256
5484{
5485 struct __loadu_epi64 {
5486 __m256i_u __v;
5487 } __attribute__((__packed__, __may_alias__));
5488 return ((const struct __loadu_epi64*)__P)->__v;
5489}
5490
5491static __inline__ __m256i __DEFAULT_FN_ATTRS256
5492_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5493{
5494 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
5495 (__v4di) __W,
5496 (__mmask8) __U);
5497}
5498
5499static __inline__ __m256i __DEFAULT_FN_ATTRS256
5501{
5502 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
5503 (__v4di)
5505 (__mmask8) __U);
5506}
5507
5508static __inline __m128i __DEFAULT_FN_ATTRS128
5510{
5511 struct __loadu_epi32 {
5512 __m128i_u __v;
5513 } __attribute__((__packed__, __may_alias__));
5514 return ((const struct __loadu_epi32*)__P)->__v;
5515}
5516
5517static __inline__ __m128i __DEFAULT_FN_ATTRS128
5518_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5519{
5520 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
5521 (__v4si) __W,
5522 (__mmask8) __U);
5523}
5524
5525static __inline__ __m128i __DEFAULT_FN_ATTRS128
5527{
5528 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
5529 (__v4si)
5531 (__mmask8) __U);
5532}
5533
5534static __inline __m256i __DEFAULT_FN_ATTRS256
5536{
5537 struct __loadu_epi32 {
5538 __m256i_u __v;
5539 } __attribute__((__packed__, __may_alias__));
5540 return ((const struct __loadu_epi32*)__P)->__v;
5541}
5542
5543static __inline__ __m256i __DEFAULT_FN_ATTRS256
5544_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5545{
5546 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
5547 (__v8si) __W,
5548 (__mmask8) __U);
5549}
5550
5551static __inline__ __m256i __DEFAULT_FN_ATTRS256
5553{
5554 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
5555 (__v8si)
5557 (__mmask8) __U);
5558}
5559
5560static __inline__ __m128d __DEFAULT_FN_ATTRS128
5561_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
5562{
5563 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
5564 (__v2df) __W,
5565 (__mmask8) __U);
5566}
5567
5568static __inline__ __m128d __DEFAULT_FN_ATTRS128
5570{
5571 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
5572 (__v2df)
5573 _mm_setzero_pd (),
5574 (__mmask8) __U);
5575}
5576
5577static __inline__ __m256d __DEFAULT_FN_ATTRS256
5578_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5579{
5580 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
5581 (__v4df) __W,
5582 (__mmask8) __U);
5583}
5584
5585static __inline__ __m256d __DEFAULT_FN_ATTRS256
5587{
5588 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
5589 (__v4df)
5591 (__mmask8) __U);
5592}
5593
5594static __inline__ __m128 __DEFAULT_FN_ATTRS128
5595_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
5596{
5597 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
5598 (__v4sf) __W,
5599 (__mmask8) __U);
5600}
5601
5602static __inline__ __m128 __DEFAULT_FN_ATTRS128
5604{
5605 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
5606 (__v4sf)
5607 _mm_setzero_ps (),
5608 (__mmask8) __U);
5609}
5610
5611static __inline__ __m256 __DEFAULT_FN_ATTRS256
5612_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
5613{
5614 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
5615 (__v8sf) __W,
5616 (__mmask8) __U);
5617}
5618
5619static __inline__ __m256 __DEFAULT_FN_ATTRS256
5621{
5622 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
5623 (__v8sf)
5625 (__mmask8) __U);
5626}
5627
5628static __inline__ void __DEFAULT_FN_ATTRS128
5629_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
5630{
5631 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
5632 (__v2df) __A,
5633 (__mmask8) __U);
5634}
5635
5636static __inline__ void __DEFAULT_FN_ATTRS256
5637_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
5638{
5639 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
5640 (__v4df) __A,
5641 (__mmask8) __U);
5642}
5643
5644static __inline__ void __DEFAULT_FN_ATTRS128
5645_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
5646{
5647 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
5648 (__v4sf) __A,
5649 (__mmask8) __U);
5650}
5651
5652static __inline__ void __DEFAULT_FN_ATTRS256
5653_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
5654{
5655 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
5656 (__v8sf) __A,
5657 (__mmask8) __U);
5658}
5659
5660static __inline void __DEFAULT_FN_ATTRS128
5661_mm_storeu_epi64 (void *__P, __m128i __A)
5662{
5663 struct __storeu_epi64 {
5664 __m128i_u __v;
5665 } __attribute__((__packed__, __may_alias__));
5666 ((struct __storeu_epi64*)__P)->__v = __A;
5667}
5668
5669static __inline__ void __DEFAULT_FN_ATTRS128
5670_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5671{
5672 __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
5673 (__v2di) __A,
5674 (__mmask8) __U);
5675}
5676
5677static __inline void __DEFAULT_FN_ATTRS256
5678_mm256_storeu_epi64 (void *__P, __m256i __A)
5679{
5680 struct __storeu_epi64 {
5681 __m256i_u __v;
5682 } __attribute__((__packed__, __may_alias__));
5683 ((struct __storeu_epi64*)__P)->__v = __A;
5684}
5685
5686static __inline__ void __DEFAULT_FN_ATTRS256
5687_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5688{
5689 __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
5690 (__v4di) __A,
5691 (__mmask8) __U);
5692}
5693
5694static __inline void __DEFAULT_FN_ATTRS128
5695_mm_storeu_epi32 (void *__P, __m128i __A)
5696{
5697 struct __storeu_epi32 {
5698 __m128i_u __v;
5699 } __attribute__((__packed__, __may_alias__));
5700 ((struct __storeu_epi32*)__P)->__v = __A;
5701}
5702
5703static __inline__ void __DEFAULT_FN_ATTRS128
5704_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5705{
5706 __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
5707 (__v4si) __A,
5708 (__mmask8) __U);
5709}
5710
5711static __inline void __DEFAULT_FN_ATTRS256
5712_mm256_storeu_epi32 (void *__P, __m256i __A)
5713{
5714 struct __storeu_epi32 {
5715 __m256i_u __v;
5716 } __attribute__((__packed__, __may_alias__));
5717 ((struct __storeu_epi32*)__P)->__v = __A;
5718}
5719
5720static __inline__ void __DEFAULT_FN_ATTRS256
5721_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5722{
5723 __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
5724 (__v8si) __A,
5725 (__mmask8) __U);
5726}
5727
5728static __inline__ void __DEFAULT_FN_ATTRS128
5729_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
5730{
5731 __builtin_ia32_storeupd128_mask ((__v2df *) __P,
5732 (__v2df) __A,
5733 (__mmask8) __U);
5734}
5735
5736static __inline__ void __DEFAULT_FN_ATTRS256
5737_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
5738{
5739 __builtin_ia32_storeupd256_mask ((__v4df *) __P,
5740 (__v4df) __A,
5741 (__mmask8) __U);
5742}
5743
5744static __inline__ void __DEFAULT_FN_ATTRS128
5745_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
5746{
5747 __builtin_ia32_storeups128_mask ((__v4sf *) __P,
5748 (__v4sf) __A,
5749 (__mmask8) __U);
5750}
5751
5752static __inline__ void __DEFAULT_FN_ATTRS256
5753_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
5754{
5755 __builtin_ia32_storeups256_mask ((__v8sf *) __P,
5756 (__v8sf) __A,
5757 (__mmask8) __U);
5758}
5759
5760
5761static __inline__ __m128d __DEFAULT_FN_ATTRS128
5762_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5763{
5764 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5765 (__v2df)_mm_unpackhi_pd(__A, __B),
5766 (__v2df)__W);
5767}
5768
5769static __inline__ __m128d __DEFAULT_FN_ATTRS128
5770_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
5771{
5772 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5773 (__v2df)_mm_unpackhi_pd(__A, __B),
5774 (__v2df)_mm_setzero_pd());
5775}
5776
5777static __inline__ __m256d __DEFAULT_FN_ATTRS256
5778_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5779{
5780 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5781 (__v4df)_mm256_unpackhi_pd(__A, __B),
5782 (__v4df)__W);
5783}
5784
5785static __inline__ __m256d __DEFAULT_FN_ATTRS256
5786_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
5787{
5788 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5789 (__v4df)_mm256_unpackhi_pd(__A, __B),
5790 (__v4df)_mm256_setzero_pd());
5791}
5792
5793static __inline__ __m128 __DEFAULT_FN_ATTRS128
5794_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5795{
5796 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5797 (__v4sf)_mm_unpackhi_ps(__A, __B),
5798 (__v4sf)__W);
5799}
5800
5801static __inline__ __m128 __DEFAULT_FN_ATTRS128
5802_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
5803{
5804 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5805 (__v4sf)_mm_unpackhi_ps(__A, __B),
5806 (__v4sf)_mm_setzero_ps());
5807}
5808
5809static __inline__ __m256 __DEFAULT_FN_ATTRS256
5810_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5811{
5812 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5813 (__v8sf)_mm256_unpackhi_ps(__A, __B),
5814 (__v8sf)__W);
5815}
5816
5817static __inline__ __m256 __DEFAULT_FN_ATTRS256
5818_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
5819{
5820 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5821 (__v8sf)_mm256_unpackhi_ps(__A, __B),
5822 (__v8sf)_mm256_setzero_ps());
5823}
5824
5825static __inline__ __m128d __DEFAULT_FN_ATTRS128
5826_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5827{
5828 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5829 (__v2df)_mm_unpacklo_pd(__A, __B),
5830 (__v2df)__W);
5831}
5832
5833static __inline__ __m128d __DEFAULT_FN_ATTRS128
5834_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
5835{
5836 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5837 (__v2df)_mm_unpacklo_pd(__A, __B),
5838 (__v2df)_mm_setzero_pd());
5839}
5840
5841static __inline__ __m256d __DEFAULT_FN_ATTRS256
5842_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5843{
5844 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5845 (__v4df)_mm256_unpacklo_pd(__A, __B),
5846 (__v4df)__W);
5847}
5848
5849static __inline__ __m256d __DEFAULT_FN_ATTRS256
5850_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
5851{
5852 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5853 (__v4df)_mm256_unpacklo_pd(__A, __B),
5854 (__v4df)_mm256_setzero_pd());
5855}
5856
5857static __inline__ __m128 __DEFAULT_FN_ATTRS128
5858_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5859{
5860 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5861 (__v4sf)_mm_unpacklo_ps(__A, __B),
5862 (__v4sf)__W);
5863}
5864
5865static __inline__ __m128 __DEFAULT_FN_ATTRS128
5866_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
5867{
5868 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5869 (__v4sf)_mm_unpacklo_ps(__A, __B),
5870 (__v4sf)_mm_setzero_ps());
5871}
5872
5873static __inline__ __m256 __DEFAULT_FN_ATTRS256
5874_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5875{
5876 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5877 (__v8sf)_mm256_unpacklo_ps(__A, __B),
5878 (__v8sf)__W);
5879}
5880
5881static __inline__ __m256 __DEFAULT_FN_ATTRS256
5882_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
5883{
5884 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5885 (__v8sf)_mm256_unpacklo_ps(__A, __B),
5886 (__v8sf)_mm256_setzero_ps());
5887}
5888
5889static __inline__ __m128d __DEFAULT_FN_ATTRS128
5890_mm_rcp14_pd (__m128d __A)
5891{
5892 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5893 (__v2df)
5894 _mm_setzero_pd (),
5895 (__mmask8) -1);
5896}
5897
5898static __inline__ __m128d __DEFAULT_FN_ATTRS128
5899_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
5900{
5901 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5902 (__v2df) __W,
5903 (__mmask8) __U);
5904}
5905
5906static __inline__ __m128d __DEFAULT_FN_ATTRS128
5908{
5909 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5910 (__v2df)
5911 _mm_setzero_pd (),
5912 (__mmask8) __U);
5913}
5914
5915static __inline__ __m256d __DEFAULT_FN_ATTRS256
5916_mm256_rcp14_pd (__m256d __A)
5917{
5918 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5919 (__v4df)
5921 (__mmask8) -1);
5922}
5923
5924static __inline__ __m256d __DEFAULT_FN_ATTRS256
5925_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
5926{
5927 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5928 (__v4df) __W,
5929 (__mmask8) __U);
5930}
5931
5932static __inline__ __m256d __DEFAULT_FN_ATTRS256
5934{
5935 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5936 (__v4df)
5938 (__mmask8) __U);
5939}
5940
5941static __inline__ __m128 __DEFAULT_FN_ATTRS128
5942_mm_rcp14_ps (__m128 __A)
5943{
5944 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5945 (__v4sf)
5946 _mm_setzero_ps (),
5947 (__mmask8) -1);
5948}
5949
5950static __inline__ __m128 __DEFAULT_FN_ATTRS128
5951_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
5952{
5953 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5954 (__v4sf) __W,
5955 (__mmask8) __U);
5956}
5957
5958static __inline__ __m128 __DEFAULT_FN_ATTRS128
5960{
5961 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5962 (__v4sf)
5963 _mm_setzero_ps (),
5964 (__mmask8) __U);
5965}
5966
5967static __inline__ __m256 __DEFAULT_FN_ATTRS256
5969{
5970 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
5971 (__v8sf)
5973 (__mmask8) -1);
5974}
5975
5976static __inline__ __m256 __DEFAULT_FN_ATTRS256
5977_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
5978{
5979 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
5980 (__v8sf) __W,
5981 (__mmask8) __U);
5982}
5983
5984static __inline__ __m256 __DEFAULT_FN_ATTRS256
5986{
5987 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
5988 (__v8sf)
5990 (__mmask8) __U);
5991}
5992
5993#define _mm_mask_permute_pd(W, U, X, C) \
5994 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
5995 (__v2df)_mm_permute_pd((X), (C)), \
5996 (__v2df)(__m128d)(W)))
5997
5998#define _mm_maskz_permute_pd(U, X, C) \
5999 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6000 (__v2df)_mm_permute_pd((X), (C)), \
6001 (__v2df)_mm_setzero_pd()))
6002
6003#define _mm256_mask_permute_pd(W, U, X, C) \
6004 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6005 (__v4df)_mm256_permute_pd((X), (C)), \
6006 (__v4df)(__m256d)(W)))
6007
6008#define _mm256_maskz_permute_pd(U, X, C) \
6009 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6010 (__v4df)_mm256_permute_pd((X), (C)), \
6011 (__v4df)_mm256_setzero_pd()))
6012
6013#define _mm_mask_permute_ps(W, U, X, C) \
6014 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6015 (__v4sf)_mm_permute_ps((X), (C)), \
6016 (__v4sf)(__m128)(W)))
6017
6018#define _mm_maskz_permute_ps(U, X, C) \
6019 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6020 (__v4sf)_mm_permute_ps((X), (C)), \
6021 (__v4sf)_mm_setzero_ps()))
6022
6023#define _mm256_mask_permute_ps(W, U, X, C) \
6024 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6025 (__v8sf)_mm256_permute_ps((X), (C)), \
6026 (__v8sf)(__m256)(W)))
6027
6028#define _mm256_maskz_permute_ps(U, X, C) \
6029 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6030 (__v8sf)_mm256_permute_ps((X), (C)), \
6031 (__v8sf)_mm256_setzero_ps()))
6032
6033static __inline__ __m128d __DEFAULT_FN_ATTRS128
6034_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
6035{
6036 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6037 (__v2df)_mm_permutevar_pd(__A, __C),
6038 (__v2df)__W);
6039}
6040
6041static __inline__ __m128d __DEFAULT_FN_ATTRS128
6042_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
6043{
6044 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6045 (__v2df)_mm_permutevar_pd(__A, __C),
6046 (__v2df)_mm_setzero_pd());
6047}
6048
6049static __inline__ __m256d __DEFAULT_FN_ATTRS256
6050_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
6051{
6052 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6053 (__v4df)_mm256_permutevar_pd(__A, __C),
6054 (__v4df)__W);
6055}
6056
6057static __inline__ __m256d __DEFAULT_FN_ATTRS256
6058_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
6059{
6060 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6061 (__v4df)_mm256_permutevar_pd(__A, __C),
6062 (__v4df)_mm256_setzero_pd());
6063}
6064
6065static __inline__ __m128 __DEFAULT_FN_ATTRS128
6066_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
6067{
6068 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6069 (__v4sf)_mm_permutevar_ps(__A, __C),
6070 (__v4sf)__W);
6071}
6072
6073static __inline__ __m128 __DEFAULT_FN_ATTRS128
6074_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
6075{
6076 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6077 (__v4sf)_mm_permutevar_ps(__A, __C),
6078 (__v4sf)_mm_setzero_ps());
6079}
6080
6081static __inline__ __m256 __DEFAULT_FN_ATTRS256
6082_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
6083{
6084 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6085 (__v8sf)_mm256_permutevar_ps(__A, __C),
6086 (__v8sf)__W);
6087}
6088
6089static __inline__ __m256 __DEFAULT_FN_ATTRS256
6090_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
6091{
6092 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6093 (__v8sf)_mm256_permutevar_ps(__A, __C),
6094 (__v8sf)_mm256_setzero_ps());
6095}
6096
6097static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6098_mm_test_epi32_mask (__m128i __A, __m128i __B)
6099{
6101}
6102
6103static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6104_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6105{
6106 return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6108}
6109
6110static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6111_mm256_test_epi32_mask (__m256i __A, __m256i __B)
6112{
6113 return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B),
6115}
6116
6117static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6118_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6119{
6120 return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6122}
6123
6124static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6125_mm_test_epi64_mask (__m128i __A, __m128i __B)
6126{
6128}
6129
6130static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6131_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6132{
6133 return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6135}
6136
6137static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6138_mm256_test_epi64_mask (__m256i __A, __m256i __B)
6139{
6140 return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B),
6142}
6143
6144static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6145_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6146{
6147 return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6149}
6150
6151static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6152_mm_testn_epi32_mask (__m128i __A, __m128i __B)
6153{
6155}
6156
6157static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6158_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6159{
6160 return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6162}
6163
6164static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6165_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
6166{
6167 return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B),
6169}
6170
6171static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6172_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6173{
6174 return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6176}
6177
6178static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6179_mm_testn_epi64_mask (__m128i __A, __m128i __B)
6180{
6182}
6183
6184static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6185_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6186{
6187 return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6189}
6190
6191static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6192_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
6193{
6194 return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B),
6196}
6197
6198static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6199_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6200{
6201 return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6203}
6204
6205static __inline__ __m128i __DEFAULT_FN_ATTRS128
6206_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6207{
6208 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6209 (__v4si)_mm_unpackhi_epi32(__A, __B),
6210 (__v4si)__W);
6211}
6212
6213static __inline__ __m128i __DEFAULT_FN_ATTRS128
6214_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6215{
6216 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6217 (__v4si)_mm_unpackhi_epi32(__A, __B),
6218 (__v4si)_mm_setzero_si128());
6219}
6220
6221static __inline__ __m256i __DEFAULT_FN_ATTRS256
6222_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6223{
6224 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6225 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6226 (__v8si)__W);
6227}
6228
6229static __inline__ __m256i __DEFAULT_FN_ATTRS256
6230_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6231{
6232 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6233 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6234 (__v8si)_mm256_setzero_si256());
6235}
6236
6237static __inline__ __m128i __DEFAULT_FN_ATTRS128
6238_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6239{
6240 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6241 (__v2di)_mm_unpackhi_epi64(__A, __B),
6242 (__v2di)__W);
6243}
6244
6245static __inline__ __m128i __DEFAULT_FN_ATTRS128
6246_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6247{
6248 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6249 (__v2di)_mm_unpackhi_epi64(__A, __B),
6250 (__v2di)_mm_setzero_si128());
6251}
6252
6253static __inline__ __m256i __DEFAULT_FN_ATTRS256
6254_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6255{
6256 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6257 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6258 (__v4di)__W);
6259}
6260
6261static __inline__ __m256i __DEFAULT_FN_ATTRS256
6262_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6263{
6264 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6265 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6266 (__v4di)_mm256_setzero_si256());
6267}
6268
6269static __inline__ __m128i __DEFAULT_FN_ATTRS128
6270_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6271{
6272 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6273 (__v4si)_mm_unpacklo_epi32(__A, __B),
6274 (__v4si)__W);
6275}
6276
6277static __inline__ __m128i __DEFAULT_FN_ATTRS128
6278_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6279{
6280 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6281 (__v4si)_mm_unpacklo_epi32(__A, __B),
6282 (__v4si)_mm_setzero_si128());
6283}
6284
6285static __inline__ __m256i __DEFAULT_FN_ATTRS256
6286_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6287{
6288 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6289 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6290 (__v8si)__W);
6291}
6292
6293static __inline__ __m256i __DEFAULT_FN_ATTRS256
6294_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6295{
6296 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6297 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6298 (__v8si)_mm256_setzero_si256());
6299}
6300
6301static __inline__ __m128i __DEFAULT_FN_ATTRS128
6302_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6303{
6304 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6305 (__v2di)_mm_unpacklo_epi64(__A, __B),
6306 (__v2di)__W);
6307}
6308
6309static __inline__ __m128i __DEFAULT_FN_ATTRS128
6310_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6311{
6312 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6313 (__v2di)_mm_unpacklo_epi64(__A, __B),
6314 (__v2di)_mm_setzero_si128());
6315}
6316
6317static __inline__ __m256i __DEFAULT_FN_ATTRS256
6318_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6319{
6320 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6321 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6322 (__v4di)__W);
6323}
6324
6325static __inline__ __m256i __DEFAULT_FN_ATTRS256
6326_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6327{
6328 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6329 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6330 (__v4di)_mm256_setzero_si256());
6331}
6332
6333static __inline__ __m128i __DEFAULT_FN_ATTRS128
6334_mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6335{
6336 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6337 (__v4si)_mm_sra_epi32(__A, __B),
6338 (__v4si)__W);
6339}
6340
6341static __inline__ __m128i __DEFAULT_FN_ATTRS128
6342_mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6343{
6344 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6345 (__v4si)_mm_sra_epi32(__A, __B),
6346 (__v4si)_mm_setzero_si128());
6347}
6348
6349static __inline__ __m256i __DEFAULT_FN_ATTRS256
6350_mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6351{
6352 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6353 (__v8si)_mm256_sra_epi32(__A, __B),
6354 (__v8si)__W);
6355}
6356
6357static __inline__ __m256i __DEFAULT_FN_ATTRS256
6358_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
6359{
6360 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6361 (__v8si)_mm256_sra_epi32(__A, __B),
6362 (__v8si)_mm256_setzero_si256());
6363}
6364
6365static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6366_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) {
6367 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6368 (__v4si)_mm_srai_epi32(__A, (int)__B),
6369 (__v4si)__W);
6370}
6371
6372static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6373_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B) {
6374 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6375 (__v4si)_mm_srai_epi32(__A, (int)__B),
6376 (__v4si)_mm_setzero_si128());
6377}
6378
6379static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6380_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A,
6381 unsigned int __B) {
6382 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6383 (__v8si)_mm256_srai_epi32(__A, (int)__B),
6384 (__v8si)__W);
6385}
6386
6387static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6388_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B) {
6389 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6390 (__v8si)_mm256_srai_epi32(__A, (int)__B),
6391 (__v8si)_mm256_setzero_si256());
6392}
6393
6394static __inline__ __m128i __DEFAULT_FN_ATTRS128
6395_mm_sra_epi64(__m128i __A, __m128i __B)
6396{
6397 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
6398}
6399
6400static __inline__ __m128i __DEFAULT_FN_ATTRS128
6401_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6402{
6403 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6404 (__v2di)_mm_sra_epi64(__A, __B), \
6405 (__v2di)__W);
6406}
6407
6408static __inline__ __m128i __DEFAULT_FN_ATTRS128
6409_mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6410{
6411 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6412 (__v2di)_mm_sra_epi64(__A, __B), \
6413 (__v2di)_mm_setzero_si128());
6414}
6415
6416static __inline__ __m256i __DEFAULT_FN_ATTRS256
6417_mm256_sra_epi64(__m256i __A, __m128i __B)
6418{
6419 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
6420}
6421
6422static __inline__ __m256i __DEFAULT_FN_ATTRS256
6423_mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6424{
6425 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6426 (__v4di)_mm256_sra_epi64(__A, __B), \
6427 (__v4di)__W);
6428}
6429
6430static __inline__ __m256i __DEFAULT_FN_ATTRS256
6431_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
6432{
6433 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6434 (__v4di)_mm256_sra_epi64(__A, __B), \
6435 (__v4di)_mm256_setzero_si256());
6436}
6437
6438static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6439_mm_srai_epi64(__m128i __A, unsigned int __imm) {
6440 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, (int)__imm);
6441}
6442
6444 __m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm) {
6445 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6446 (__v2di)_mm_srai_epi64(__A, __imm), \
6447 (__v2di)__W);
6448}
6449
6450static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
6451_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm) {
6452 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6453 (__v2di)_mm_srai_epi64(__A, __imm), \
6454 (__v2di)_mm_setzero_si128());
6455}
6456
6457static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6458_mm256_srai_epi64(__m256i __A, unsigned int __imm) {
6459 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, (int)__imm);
6460}
6461
6462static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6463_mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A,
6464 unsigned int __imm) {
6465 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6466 (__v4di)_mm256_srai_epi64(__A, __imm), \
6467 (__v4di)__W);
6468}
6469
6470static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6471_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm) {
6472 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6473 (__v4di)_mm256_srai_epi64(__A, __imm), \
6474 (__v4di)_mm256_setzero_si256());
6475}
6476
6477#define _mm_ternarylogic_epi32(A, B, C, imm) \
6478 ((__m128i)__builtin_ia32_pternlogd128_mask( \
6479 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \
6480 (unsigned char)(imm), (__mmask8)-1))
6481
6482#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \
6483 ((__m128i)__builtin_ia32_pternlogd128_mask( \
6484 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \
6485 (unsigned char)(imm), (__mmask8)(U)))
6486
6487#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6488 ((__m128i)__builtin_ia32_pternlogd128_maskz( \
6489 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \
6490 (unsigned char)(imm), (__mmask8)(U)))
6491
6492#define _mm256_ternarylogic_epi32(A, B, C, imm) \
6493 ((__m256i)__builtin_ia32_pternlogd256_mask( \
6494 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \
6495 (unsigned char)(imm), (__mmask8)-1))
6496
6497#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \
6498 ((__m256i)__builtin_ia32_pternlogd256_mask( \
6499 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \
6500 (unsigned char)(imm), (__mmask8)(U)))
6501
6502#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6503 ((__m256i)__builtin_ia32_pternlogd256_maskz( \
6504 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \
6505 (unsigned char)(imm), (__mmask8)(U)))
6506
6507#define _mm_ternarylogic_epi64(A, B, C, imm) \
6508 ((__m128i)__builtin_ia32_pternlogq128_mask( \
6509 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \
6510 (unsigned char)(imm), (__mmask8)-1))
6511
6512#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \
6513 ((__m128i)__builtin_ia32_pternlogq128_mask( \
6514 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \
6515 (unsigned char)(imm), (__mmask8)(U)))
6516
6517#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6518 ((__m128i)__builtin_ia32_pternlogq128_maskz( \
6519 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \
6520 (unsigned char)(imm), (__mmask8)(U)))
6521
6522#define _mm256_ternarylogic_epi64(A, B, C, imm) \
6523 ((__m256i)__builtin_ia32_pternlogq256_mask( \
6524 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \
6525 (unsigned char)(imm), (__mmask8)-1))
6526
6527#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \
6528 ((__m256i)__builtin_ia32_pternlogq256_mask( \
6529 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \
6530 (unsigned char)(imm), (__mmask8)(U)))
6531
6532#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6533 ((__m256i)__builtin_ia32_pternlogq256_maskz( \
6534 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \
6535 (unsigned char)(imm), (__mmask8)(U)))
6536
6537#define _mm256_shuffle_f32x4(A, B, imm) \
6538 ((__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
6539 (__v8sf)(__m256)(B), (int)(imm)))
6540
6541#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \
6542 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6543 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6544 (__v8sf)(__m256)(W)))
6545
6546#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \
6547 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6548 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6549 (__v8sf)_mm256_setzero_ps()))
6550
6551#define _mm256_shuffle_f64x2(A, B, imm) \
6552 ((__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
6553 (__v4df)(__m256d)(B), (int)(imm)))
6554
6555#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \
6556 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6557 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6558 (__v4df)(__m256d)(W)))
6559
6560#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \
6561 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6562 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6563 (__v4df)_mm256_setzero_pd()))
6564
6565#define _mm256_shuffle_i32x4(A, B, imm) \
6566 ((__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
6567 (__v8si)(__m256i)(B), (int)(imm)))
6568
6569#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \
6570 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6571 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6572 (__v8si)(__m256i)(W)))
6573
6574#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \
6575 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6576 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6577 (__v8si)_mm256_setzero_si256()))
6578
6579#define _mm256_shuffle_i64x2(A, B, imm) \
6580 ((__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
6581 (__v4di)(__m256i)(B), (int)(imm)))
6582
6583#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \
6584 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6585 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6586 (__v4di)(__m256i)(W)))
6587
6588
6589#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \
6590 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6591 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6592 (__v4di)_mm256_setzero_si256()))
6593
6594#define _mm_mask_shuffle_pd(W, U, A, B, M) \
6595 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6596 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6597 (__v2df)(__m128d)(W)))
6598
6599#define _mm_maskz_shuffle_pd(U, A, B, M) \
6600 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6601 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6602 (__v2df)_mm_setzero_pd()))
6603
6604#define _mm256_mask_shuffle_pd(W, U, A, B, M) \
6605 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6606 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6607 (__v4df)(__m256d)(W)))
6608
6609#define _mm256_maskz_shuffle_pd(U, A, B, M) \
6610 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6611 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6612 (__v4df)_mm256_setzero_pd()))
6613
6614#define _mm_mask_shuffle_ps(W, U, A, B, M) \
6615 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6616 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6617 (__v4sf)(__m128)(W)))
6618
6619#define _mm_maskz_shuffle_ps(U, A, B, M) \
6620 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6621 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6622 (__v4sf)_mm_setzero_ps()))
6623
6624#define _mm256_mask_shuffle_ps(W, U, A, B, M) \
6625 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6626 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6627 (__v8sf)(__m256)(W)))
6628
6629#define _mm256_maskz_shuffle_ps(U, A, B, M) \
6630 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6631 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6632 (__v8sf)_mm256_setzero_ps()))
6633
6634static __inline__ __m128d __DEFAULT_FN_ATTRS128
6635_mm_rsqrt14_pd (__m128d __A)
6636{
6637 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6638 (__v2df)
6639 _mm_setzero_pd (),
6640 (__mmask8) -1);
6641}
6642
6643static __inline__ __m128d __DEFAULT_FN_ATTRS128
6644_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
6645{
6646 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6647 (__v2df) __W,
6648 (__mmask8) __U);
6649}
6650
6651static __inline__ __m128d __DEFAULT_FN_ATTRS128
6653{
6654 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6655 (__v2df)
6656 _mm_setzero_pd (),
6657 (__mmask8) __U);
6658}
6659
6660static __inline__ __m256d __DEFAULT_FN_ATTRS256
6662{
6663 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6664 (__v4df)
6666 (__mmask8) -1);
6667}
6668
6669static __inline__ __m256d __DEFAULT_FN_ATTRS256
6670_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
6671{
6672 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6673 (__v4df) __W,
6674 (__mmask8) __U);
6675}
6676
6677static __inline__ __m256d __DEFAULT_FN_ATTRS256
6679{
6680 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6681 (__v4df)
6683 (__mmask8) __U);
6684}
6685
6686static __inline__ __m128 __DEFAULT_FN_ATTRS128
6687_mm_rsqrt14_ps (__m128 __A)
6688{
6689 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6690 (__v4sf)
6691 _mm_setzero_ps (),
6692 (__mmask8) -1);
6693}
6694
6695static __inline__ __m128 __DEFAULT_FN_ATTRS128
6696_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6697{
6698 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6699 (__v4sf) __W,
6700 (__mmask8) __U);
6701}
6702
6703static __inline__ __m128 __DEFAULT_FN_ATTRS128
6705{
6706 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6707 (__v4sf)
6708 _mm_setzero_ps (),
6709 (__mmask8) __U);
6710}
6711
6712static __inline__ __m256 __DEFAULT_FN_ATTRS256
6714{
6715 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6716 (__v8sf)
6718 (__mmask8) -1);
6719}
6720
6721static __inline__ __m256 __DEFAULT_FN_ATTRS256
6722_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6723{
6724 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6725 (__v8sf) __W,
6726 (__mmask8) __U);
6727}
6728
6729static __inline__ __m256 __DEFAULT_FN_ATTRS256
6731{
6732 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6733 (__v8sf)
6735 (__mmask8) __U);
6736}
6737
6738static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
6740 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6741 0, 1, 2, 3, 0, 1, 2, 3);
6742}
6743
6744static __inline__ __m256 __DEFAULT_FN_ATTRS256
6745_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
6746{
6747 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6748 (__v8sf)_mm256_broadcast_f32x4(__A),
6749 (__v8sf)__O);
6750}
6751
6752static __inline__ __m256 __DEFAULT_FN_ATTRS256
6754{
6755 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6756 (__v8sf)_mm256_broadcast_f32x4(__A),
6757 (__v8sf)_mm256_setzero_ps());
6758}
6759
6760static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
6762 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6763 0, 1, 2, 3, 0, 1, 2, 3);
6764}
6765
6766static __inline__ __m256i __DEFAULT_FN_ATTRS256
6767_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
6768{
6769 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6770 (__v8si)_mm256_broadcast_i32x4(__A),
6771 (__v8si)__O);
6772}
6773
6774static __inline__ __m256i __DEFAULT_FN_ATTRS256
6776{
6777 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6778 (__v8si)_mm256_broadcast_i32x4(__A),
6779 (__v8si)_mm256_setzero_si256());
6780}
6781
6782static __inline__ __m256d __DEFAULT_FN_ATTRS256
6783_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
6784{
6785 return (__m256d)__builtin_ia32_selectpd_256(__M,
6786 (__v4df) _mm256_broadcastsd_pd(__A),
6787 (__v4df) __O);
6788}
6789
6790static __inline__ __m256d __DEFAULT_FN_ATTRS256
6792{
6793 return (__m256d)__builtin_ia32_selectpd_256(__M,
6794 (__v4df) _mm256_broadcastsd_pd(__A),
6795 (__v4df) _mm256_setzero_pd());
6796}
6797
6798static __inline__ __m128 __DEFAULT_FN_ATTRS128
6799_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
6800{
6801 return (__m128)__builtin_ia32_selectps_128(__M,
6802 (__v4sf) _mm_broadcastss_ps(__A),
6803 (__v4sf) __O);
6804}
6805
6806static __inline__ __m128 __DEFAULT_FN_ATTRS128
6808{
6809 return (__m128)__builtin_ia32_selectps_128(__M,
6810 (__v4sf) _mm_broadcastss_ps(__A),
6811 (__v4sf) _mm_setzero_ps());
6812}
6813
6814static __inline__ __m256 __DEFAULT_FN_ATTRS256
6815_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
6816{
6817 return (__m256)__builtin_ia32_selectps_256(__M,
6818 (__v8sf) _mm256_broadcastss_ps(__A),
6819 (__v8sf) __O);
6820}
6821
6822static __inline__ __m256 __DEFAULT_FN_ATTRS256
6824{
6825 return (__m256)__builtin_ia32_selectps_256(__M,
6826 (__v8sf) _mm256_broadcastss_ps(__A),
6827 (__v8sf) _mm256_setzero_ps());
6828}
6829
6830static __inline__ __m128i __DEFAULT_FN_ATTRS128
6831_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
6832{
6833 return (__m128i)__builtin_ia32_selectd_128(__M,
6834 (__v4si) _mm_broadcastd_epi32(__A),
6835 (__v4si) __O);
6836}
6837
6838static __inline__ __m128i __DEFAULT_FN_ATTRS128
6840{
6841 return (__m128i)__builtin_ia32_selectd_128(__M,
6842 (__v4si) _mm_broadcastd_epi32(__A),
6843 (__v4si) _mm_setzero_si128());
6844}
6845
6846static __inline__ __m256i __DEFAULT_FN_ATTRS256
6847_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
6848{
6849 return (__m256i)__builtin_ia32_selectd_256(__M,
6850 (__v8si) _mm256_broadcastd_epi32(__A),
6851 (__v8si) __O);
6852}
6853
6854static __inline__ __m256i __DEFAULT_FN_ATTRS256
6856{
6857 return (__m256i)__builtin_ia32_selectd_256(__M,
6858 (__v8si) _mm256_broadcastd_epi32(__A),
6859 (__v8si) _mm256_setzero_si256());
6860}
6861
6862static __inline__ __m128i __DEFAULT_FN_ATTRS128
6863_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
6864{
6865 return (__m128i)__builtin_ia32_selectq_128(__M,
6866 (__v2di) _mm_broadcastq_epi64(__A),
6867 (__v2di) __O);
6868}
6869
6870static __inline__ __m128i __DEFAULT_FN_ATTRS128
6872{
6873 return (__m128i)__builtin_ia32_selectq_128(__M,
6874 (__v2di) _mm_broadcastq_epi64(__A),
6875 (__v2di) _mm_setzero_si128());
6876}
6877
6878static __inline__ __m256i __DEFAULT_FN_ATTRS256
6879_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
6880{
6881 return (__m256i)__builtin_ia32_selectq_256(__M,
6882 (__v4di) _mm256_broadcastq_epi64(__A),
6883 (__v4di) __O);
6884}
6885
6886static __inline__ __m256i __DEFAULT_FN_ATTRS256
6888{
6889 return (__m256i)__builtin_ia32_selectq_256(__M,
6890 (__v4di) _mm256_broadcastq_epi64(__A),
6891 (__v4di) _mm256_setzero_si256());
6892}
6893
6894static __inline__ __m128i __DEFAULT_FN_ATTRS128
6896{
6897 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6898 (__v16qi)_mm_undefined_si128(),
6899 (__mmask8) -1);
6900}
6901
6902static __inline__ __m128i __DEFAULT_FN_ATTRS128
6903_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6904{
6905 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6906 (__v16qi) __O, __M);
6907}
6908
6909static __inline__ __m128i __DEFAULT_FN_ATTRS128
6911{
6912 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6913 (__v16qi) _mm_setzero_si128 (),
6914 __M);
6915}
6916
6917static __inline__ void __DEFAULT_FN_ATTRS128
6919{
6920 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
6921}
6922
6923static __inline__ __m128i __DEFAULT_FN_ATTRS256
6925{
6926 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6927 (__v16qi)_mm_undefined_si128(),
6928 (__mmask8) -1);
6929}
6930
6931static __inline__ __m128i __DEFAULT_FN_ATTRS256
6932_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
6933{
6934 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6935 (__v16qi) __O, __M);
6936}
6937
6938static __inline__ __m128i __DEFAULT_FN_ATTRS256
6940{
6941 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6942 (__v16qi) _mm_setzero_si128 (),
6943 __M);
6944}
6945
6946static __inline__ void __DEFAULT_FN_ATTRS256
6948{
6949 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
6950}
6951
6952static __inline__ __m128i __DEFAULT_FN_ATTRS128
6954{
6955 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
6956 (__v8hi)_mm_setzero_si128 (),
6957 (__mmask8) -1);
6958}
6959
6960static __inline__ __m128i __DEFAULT_FN_ATTRS128
6961_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
6962{
6963 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
6964 (__v8hi)__O,
6965 __M);
6966}
6967
6968static __inline__ __m128i __DEFAULT_FN_ATTRS128
6970{
6971 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
6972 (__v8hi) _mm_setzero_si128 (),
6973 __M);
6974}
6975
6976static __inline__ void __DEFAULT_FN_ATTRS128
6978{
6979 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
6980}
6981
6982static __inline__ __m128i __DEFAULT_FN_ATTRS256
6984{
6985 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
6986 (__v8hi)_mm_undefined_si128(),
6987 (__mmask8) -1);
6988}
6989
6990static __inline__ __m128i __DEFAULT_FN_ATTRS256
6991_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
6992{
6993 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
6994 (__v8hi) __O, __M);
6995}
6996
6997static __inline__ __m128i __DEFAULT_FN_ATTRS256
6999{
7000 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7001 (__v8hi) _mm_setzero_si128 (),
7002 __M);
7003}
7004
7005static __inline__ void __DEFAULT_FN_ATTRS256
7007{
7008 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7009}
7010
7011static __inline__ __m128i __DEFAULT_FN_ATTRS128
7013{
7014 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7015 (__v16qi)_mm_undefined_si128(),
7016 (__mmask8) -1);
7017}
7018
7019static __inline__ __m128i __DEFAULT_FN_ATTRS128
7020_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7021{
7022 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7023 (__v16qi) __O, __M);
7024}
7025
7026static __inline__ __m128i __DEFAULT_FN_ATTRS128
7028{
7029 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7030 (__v16qi) _mm_setzero_si128 (),
7031 __M);
7032}
7033
7034static __inline__ void __DEFAULT_FN_ATTRS128
7036{
7037 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7038}
7039
7040static __inline__ __m128i __DEFAULT_FN_ATTRS256
7042{
7043 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7044 (__v16qi)_mm_undefined_si128(),
7045 (__mmask8) -1);
7046}
7047
7048static __inline__ __m128i __DEFAULT_FN_ATTRS256
7049_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7050{
7051 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7052 (__v16qi) __O, __M);
7053}
7054
7055static __inline__ __m128i __DEFAULT_FN_ATTRS256
7057{
7058 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7059 (__v16qi) _mm_setzero_si128 (),
7060 __M);
7061}
7062
7063static __inline__ void __DEFAULT_FN_ATTRS256
7065{
7066 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7067}
7068
7069static __inline__ __m128i __DEFAULT_FN_ATTRS128
7071{
7072 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7073 (__v4si)_mm_undefined_si128(),
7074 (__mmask8) -1);
7075}
7076
7077static __inline__ __m128i __DEFAULT_FN_ATTRS128
7078_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7079{
7080 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7081 (__v4si) __O, __M);
7082}
7083
7084static __inline__ __m128i __DEFAULT_FN_ATTRS128
7086{
7087 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7088 (__v4si) _mm_setzero_si128 (),
7089 __M);
7090}
7091
7092static __inline__ void __DEFAULT_FN_ATTRS128
7094{
7095 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7096}
7097
7098static __inline__ __m128i __DEFAULT_FN_ATTRS256
7100{
7101 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7102 (__v4si)_mm_undefined_si128(),
7103 (__mmask8) -1);
7104}
7105
7106static __inline__ __m128i __DEFAULT_FN_ATTRS256
7107_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7108{
7109 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7110 (__v4si)__O,
7111 __M);
7112}
7113
7114static __inline__ __m128i __DEFAULT_FN_ATTRS256
7116{
7117 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7118 (__v4si) _mm_setzero_si128 (),
7119 __M);
7120}
7121
7122static __inline__ void __DEFAULT_FN_ATTRS256
7124{
7125 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7126}
7127
7128static __inline__ __m128i __DEFAULT_FN_ATTRS128
7130{
7131 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7132 (__v8hi)_mm_undefined_si128(),
7133 (__mmask8) -1);
7134}
7135
7136static __inline__ __m128i __DEFAULT_FN_ATTRS128
7137_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7138{
7139 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7140 (__v8hi) __O, __M);
7141}
7142
7143static __inline__ __m128i __DEFAULT_FN_ATTRS128
7145{
7146 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7147 (__v8hi) _mm_setzero_si128 (),
7148 __M);
7149}
7150
7151static __inline__ void __DEFAULT_FN_ATTRS128
7153{
7154 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7155}
7156
7157static __inline__ __m128i __DEFAULT_FN_ATTRS256
7159{
7160 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7161 (__v8hi)_mm_undefined_si128(),
7162 (__mmask8) -1);
7163}
7164
7165static __inline__ __m128i __DEFAULT_FN_ATTRS256
7166_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7167{
7168 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7169 (__v8hi) __O, __M);
7170}
7171
7172static __inline__ __m128i __DEFAULT_FN_ATTRS256
7174{
7175 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7176 (__v8hi) _mm_setzero_si128 (),
7177 __M);
7178}
7179
7180static __inline__ void __DEFAULT_FN_ATTRS256
7182{
7183 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7184}
7185
7186static __inline__ __m128i __DEFAULT_FN_ATTRS128
7188{
7189 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7190 (__v16qi)_mm_undefined_si128(),
7191 (__mmask8) -1);
7192}
7193
7194static __inline__ __m128i __DEFAULT_FN_ATTRS128
7195_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7196{
7197 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7198 (__v16qi) __O,
7199 __M);
7200}
7201
7202static __inline__ __m128i __DEFAULT_FN_ATTRS128
7204{
7205 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7206 (__v16qi) _mm_setzero_si128 (),
7207 __M);
7208}
7209
7210static __inline__ void __DEFAULT_FN_ATTRS128
7212{
7213 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7214}
7215
7216static __inline__ __m128i __DEFAULT_FN_ATTRS256
7218{
7219 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7220 (__v16qi)_mm_undefined_si128(),
7221 (__mmask8) -1);
7222}
7223
7224static __inline__ __m128i __DEFAULT_FN_ATTRS256
7225_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7226{
7227 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7228 (__v16qi) __O,
7229 __M);
7230}
7231
7232static __inline__ __m128i __DEFAULT_FN_ATTRS256
7234{
7235 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7236 (__v16qi) _mm_setzero_si128 (),
7237 __M);
7238}
7239
7240static __inline__ void __DEFAULT_FN_ATTRS256
7242{
7243 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7244}
7245
7246static __inline__ __m128i __DEFAULT_FN_ATTRS128
7248{
7249 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7250 (__v8hi)_mm_undefined_si128(),
7251 (__mmask8) -1);
7252}
7253
7254static __inline__ __m128i __DEFAULT_FN_ATTRS128
7255_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7256{
7257 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7258 (__v8hi) __O, __M);
7259}
7260
7261static __inline__ __m128i __DEFAULT_FN_ATTRS128
7263{
7264 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7265 (__v8hi) _mm_setzero_si128 (),
7266 __M);
7267}
7268
7269static __inline__ void __DEFAULT_FN_ATTRS128
7271{
7272 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7273}
7274
7275static __inline__ __m128i __DEFAULT_FN_ATTRS256
7277{
7278 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7279 (__v8hi) _mm_undefined_si128(),
7280 (__mmask8) -1);
7281}
7282
7283static __inline__ __m128i __DEFAULT_FN_ATTRS256
7284_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7285{
7286 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7287 (__v8hi) __O, __M);
7288}
7289
7290static __inline__ __m128i __DEFAULT_FN_ATTRS256
7292{
7293 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7294 (__v8hi) _mm_setzero_si128 (),
7295 __M);
7296}
7297
7298static __inline__ void __DEFAULT_FN_ATTRS256
7300{
7301 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7302}
7303
7304static __inline__ __m128i __DEFAULT_FN_ATTRS128
7306{
7307 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7308 (__v16qi)_mm_undefined_si128(),
7309 (__mmask8) -1);
7310}
7311
7312static __inline__ __m128i __DEFAULT_FN_ATTRS128
7313_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7314{
7315 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7316 (__v16qi) __O,
7317 __M);
7318}
7319
7320static __inline__ __m128i __DEFAULT_FN_ATTRS128
7322{
7323 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7324 (__v16qi) _mm_setzero_si128 (),
7325 __M);
7326}
7327
7328static __inline__ void __DEFAULT_FN_ATTRS128
7330{
7331 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7332}
7333
7334static __inline__ __m128i __DEFAULT_FN_ATTRS256
7336{
7337 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7338 (__v16qi)_mm_undefined_si128(),
7339 (__mmask8) -1);
7340}
7341
7342static __inline__ __m128i __DEFAULT_FN_ATTRS256
7343_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7344{
7345 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7346 (__v16qi) __O,
7347 __M);
7348}
7349
7350static __inline__ __m128i __DEFAULT_FN_ATTRS256
7352{
7353 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7354 (__v16qi) _mm_setzero_si128 (),
7355 __M);
7356}
7357
7358static __inline__ void __DEFAULT_FN_ATTRS256
7360{
7361 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7362}
7363
7364static __inline__ __m128i __DEFAULT_FN_ATTRS128
7366{
7367 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7368 (__v4si)_mm_undefined_si128(),
7369 (__mmask8) -1);
7370}
7371
7372static __inline__ __m128i __DEFAULT_FN_ATTRS128
7373_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7374{
7375 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7376 (__v4si) __O, __M);
7377}
7378
7379static __inline__ __m128i __DEFAULT_FN_ATTRS128
7381{
7382 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7383 (__v4si) _mm_setzero_si128 (),
7384 __M);
7385}
7386
7387static __inline__ void __DEFAULT_FN_ATTRS128
7389{
7390 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7391}
7392
7393static __inline__ __m128i __DEFAULT_FN_ATTRS256
7395{
7396 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7397 (__v4si)_mm_undefined_si128(),
7398 (__mmask8) -1);
7399}
7400
7401static __inline__ __m128i __DEFAULT_FN_ATTRS256
7402_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7403{
7404 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7405 (__v4si) __O, __M);
7406}
7407
7408static __inline__ __m128i __DEFAULT_FN_ATTRS256
7410{
7411 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7412 (__v4si) _mm_setzero_si128 (),
7413 __M);
7414}
7415
7416static __inline__ void __DEFAULT_FN_ATTRS256
7418{
7419 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7420}
7421
7422static __inline__ __m128i __DEFAULT_FN_ATTRS128
7424{
7425 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7426 (__v8hi)_mm_undefined_si128(),
7427 (__mmask8) -1);
7428}
7429
7430static __inline__ __m128i __DEFAULT_FN_ATTRS128
7431_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7432{
7433 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7434 (__v8hi) __O, __M);
7435}
7436
7437static __inline__ __m128i __DEFAULT_FN_ATTRS128
7439{
7440 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7441 (__v8hi) _mm_setzero_si128 (),
7442 __M);
7443}
7444
7445static __inline__ void __DEFAULT_FN_ATTRS128
7447{
7448 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7449}
7450
7451static __inline__ __m128i __DEFAULT_FN_ATTRS256
7453{
7454 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7455 (__v8hi)_mm_undefined_si128(),
7456 (__mmask8) -1);
7457}
7458
7459static __inline__ __m128i __DEFAULT_FN_ATTRS256
7460_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7461{
7462 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7463 (__v8hi) __O, __M);
7464}
7465
7466static __inline__ __m128i __DEFAULT_FN_ATTRS256
7468{
7469 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7470 (__v8hi) _mm_setzero_si128 (),
7471 __M);
7472}
7473
7474static __inline__ void __DEFAULT_FN_ATTRS256
7476{
7477 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7478}
7479
7480static __inline__ __m128i __DEFAULT_FN_ATTRS128
7482{
7483 return (__m128i)__builtin_shufflevector(
7484 __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7485 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7486}
7487
7488static __inline__ __m128i __DEFAULT_FN_ATTRS128
7489_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7490{
7491 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7492 (__v16qi) __O, __M);
7493}
7494
7495static __inline__ __m128i __DEFAULT_FN_ATTRS128
7497{
7498 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7499 (__v16qi)
7501 __M);
7502}
7503
7504static __inline__ void __DEFAULT_FN_ATTRS128
7506{
7507 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7508}
7509
7510static __inline__ __m128i __DEFAULT_FN_ATTRS256
7512{
7513 return (__m128i)__builtin_shufflevector(
7514 __builtin_convertvector((__v8si)__A, __v8qi),
7515 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
7516 12, 13, 14, 15);
7517}
7518
7519static __inline__ __m128i __DEFAULT_FN_ATTRS256
7520_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7521{
7522 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7523 (__v16qi) __O, __M);
7524}
7525
7526static __inline__ __m128i __DEFAULT_FN_ATTRS256
7528{
7529 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7530 (__v16qi) _mm_setzero_si128 (),
7531 __M);
7532}
7533
7534static __inline__ void __DEFAULT_FN_ATTRS256
7536{
7537 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7538}
7539
7540static __inline__ __m128i __DEFAULT_FN_ATTRS128
7542{
7543 return (__m128i)__builtin_shufflevector(
7544 __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7545 2, 3, 4, 5, 6, 7);
7546}
7547
7548static __inline__ __m128i __DEFAULT_FN_ATTRS128
7549_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7550{
7551 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7552 (__v8hi) __O, __M);
7553}
7554
7555static __inline__ __m128i __DEFAULT_FN_ATTRS128
7557{
7558 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7559 (__v8hi) _mm_setzero_si128 (),
7560 __M);
7561}
7562
7563static __inline__ void __DEFAULT_FN_ATTRS128
7565{
7566 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7567}
7568
7569static __inline__ __m128i __DEFAULT_FN_ATTRS256
7571{
7572 return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
7573}
7574
7575static __inline__ __m128i __DEFAULT_FN_ATTRS256
7576_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7577{
7578 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7579 (__v8hi) __O, __M);
7580}
7581
7582static __inline__ __m128i __DEFAULT_FN_ATTRS256
7584{
7585 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7586 (__v8hi) _mm_setzero_si128 (),
7587 __M);
7588}
7589
7590static __inline__ void __DEFAULT_FN_ATTRS256
7592{
7593 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7594}
7595
7596static __inline__ __m128i __DEFAULT_FN_ATTRS128
7598{
7599 return (__m128i)__builtin_shufflevector(
7600 __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
7601 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
7602}
7603
7604static __inline__ __m128i __DEFAULT_FN_ATTRS128
7605_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7606{
7607 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7608 (__v16qi) __O, __M);
7609}
7610
7611static __inline__ __m128i __DEFAULT_FN_ATTRS128
7613{
7614 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7615 (__v16qi) _mm_setzero_si128 (),
7616 __M);
7617}
7618
7619static __inline__ void __DEFAULT_FN_ATTRS128
7621{
7622 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7623}
7624
7625static __inline__ __m128i __DEFAULT_FN_ATTRS256
7627{
7628 return (__m128i)__builtin_shufflevector(
7629 __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7630 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7631}
7632
7633static __inline__ __m128i __DEFAULT_FN_ATTRS256
7634_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7635{
7636 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7637 (__v16qi) __O, __M);
7638}
7639
7640static __inline__ __m128i __DEFAULT_FN_ATTRS256
7642{
7643 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7644 (__v16qi) _mm_setzero_si128 (),
7645 __M);
7646}
7647
7648static __inline__ void __DEFAULT_FN_ATTRS256
7650{
7651 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7652}
7653
7654static __inline__ __m128i __DEFAULT_FN_ATTRS128
7656{
7657 return (__m128i)__builtin_shufflevector(
7658 __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
7659}
7660
7661static __inline__ __m128i __DEFAULT_FN_ATTRS128
7662_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7663{
7664 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7665 (__v4si) __O, __M);
7666}
7667
7668static __inline__ __m128i __DEFAULT_FN_ATTRS128
7670{
7671 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7672 (__v4si) _mm_setzero_si128 (),
7673 __M);
7674}
7675
7676static __inline__ void __DEFAULT_FN_ATTRS128
7678{
7679 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7680}
7681
7682static __inline__ __m128i __DEFAULT_FN_ATTRS256
7684{
7685 return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
7686}
7687
7688static __inline__ __m128i __DEFAULT_FN_ATTRS256
7689_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7690{
7691 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7692 (__v4si)_mm256_cvtepi64_epi32(__A),
7693 (__v4si)__O);
7694}
7695
7696static __inline__ __m128i __DEFAULT_FN_ATTRS256
7698{
7699 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7700 (__v4si)_mm256_cvtepi64_epi32(__A),
7701 (__v4si)_mm_setzero_si128());
7702}
7703
7704static __inline__ void __DEFAULT_FN_ATTRS256
7706{
7707 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7708}
7709
7710static __inline__ __m128i __DEFAULT_FN_ATTRS128
7712{
7713 return (__m128i)__builtin_shufflevector(
7714 __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
7715 3, 3, 3, 3);
7716}
7717
7718static __inline__ __m128i __DEFAULT_FN_ATTRS128
7719_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7720{
7721 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7722 (__v8hi)__O,
7723 __M);
7724}
7725
7726static __inline__ __m128i __DEFAULT_FN_ATTRS128
7728{
7729 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7730 (__v8hi) _mm_setzero_si128 (),
7731 __M);
7732}
7733
7734static __inline__ void __DEFAULT_FN_ATTRS128
7736{
7737 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7738}
7739
7740static __inline__ __m128i __DEFAULT_FN_ATTRS256
7742{
7743 return (__m128i)__builtin_shufflevector(
7744 __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7745 2, 3, 4, 5, 6, 7);
7746}
7747
7748static __inline__ __m128i __DEFAULT_FN_ATTRS256
7749_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7750{
7751 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7752 (__v8hi) __O, __M);
7753}
7754
7755static __inline__ __m128i __DEFAULT_FN_ATTRS256
7757{
7758 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7759 (__v8hi) _mm_setzero_si128 (),
7760 __M);
7761}
7762
7763static __inline__ void __DEFAULT_FN_ATTRS256
7765{
7766 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7767}
7768
7769#define _mm256_extractf32x4_ps(A, imm) \
7770 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7771 (int)(imm), \
7772 (__v4sf)_mm_undefined_ps(), \
7773 (__mmask8)-1))
7774
7775#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
7776 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7777 (int)(imm), \
7778 (__v4sf)(__m128)(W), \
7779 (__mmask8)(U)))
7780
7781#define _mm256_maskz_extractf32x4_ps(U, A, imm) \
7782 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7783 (int)(imm), \
7784 (__v4sf)_mm_setzero_ps(), \
7785 (__mmask8)(U)))
7786
7787#define _mm256_extracti32x4_epi32(A, imm) \
7788 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7789 (int)(imm), \
7790 (__v4si)_mm_undefined_si128(), \
7791 (__mmask8)-1))
7792
7793#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
7794 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7795 (int)(imm), \
7796 (__v4si)(__m128i)(W), \
7797 (__mmask8)(U)))
7798
7799#define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
7800 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7801 (int)(imm), \
7802 (__v4si)_mm_setzero_si128(), \
7803 (__mmask8)(U)))
7804
7805#define _mm256_insertf32x4(A, B, imm) \
7806 ((__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
7807 (__v4sf)(__m128)(B), (int)(imm)))
7808
7809#define _mm256_mask_insertf32x4(W, U, A, B, imm) \
7810 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7811 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7812 (__v8sf)(__m256)(W)))
7813
7814#define _mm256_maskz_insertf32x4(U, A, B, imm) \
7815 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7816 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7817 (__v8sf)_mm256_setzero_ps()))
7818
7819#define _mm256_inserti32x4(A, B, imm) \
7820 ((__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
7821 (__v4si)(__m128i)(B), (int)(imm)))
7822
7823#define _mm256_mask_inserti32x4(W, U, A, B, imm) \
7824 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7825 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7826 (__v8si)(__m256i)(W)))
7827
7828#define _mm256_maskz_inserti32x4(U, A, B, imm) \
7829 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7830 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7831 (__v8si)_mm256_setzero_si256()))
7832
7833#define _mm_getmant_pd(A, B, C) \
7834 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7835 (int)(((C)<<2) | (B)), \
7836 (__v2df)_mm_setzero_pd(), \
7837 (__mmask8)-1))
7838
7839#define _mm_mask_getmant_pd(W, U, A, B, C) \
7840 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7841 (int)(((C)<<2) | (B)), \
7842 (__v2df)(__m128d)(W), \
7843 (__mmask8)(U)))
7844
7845#define _mm_maskz_getmant_pd(U, A, B, C) \
7846 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7847 (int)(((C)<<2) | (B)), \
7848 (__v2df)_mm_setzero_pd(), \
7849 (__mmask8)(U)))
7850
7851#define _mm256_getmant_pd(A, B, C) \
7852 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7853 (int)(((C)<<2) | (B)), \
7854 (__v4df)_mm256_setzero_pd(), \
7855 (__mmask8)-1))
7856
7857#define _mm256_mask_getmant_pd(W, U, A, B, C) \
7858 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7859 (int)(((C)<<2) | (B)), \
7860 (__v4df)(__m256d)(W), \
7861 (__mmask8)(U)))
7862
7863#define _mm256_maskz_getmant_pd(U, A, B, C) \
7864 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7865 (int)(((C)<<2) | (B)), \
7866 (__v4df)_mm256_setzero_pd(), \
7867 (__mmask8)(U)))
7868
7869#define _mm_getmant_ps(A, B, C) \
7870 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7871 (int)(((C)<<2) | (B)), \
7872 (__v4sf)_mm_setzero_ps(), \
7873 (__mmask8)-1))
7874
7875#define _mm_mask_getmant_ps(W, U, A, B, C) \
7876 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7877 (int)(((C)<<2) | (B)), \
7878 (__v4sf)(__m128)(W), \
7879 (__mmask8)(U)))
7880
7881#define _mm_maskz_getmant_ps(U, A, B, C) \
7882 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7883 (int)(((C)<<2) | (B)), \
7884 (__v4sf)_mm_setzero_ps(), \
7885 (__mmask8)(U)))
7886
7887#define _mm256_getmant_ps(A, B, C) \
7888 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7889 (int)(((C)<<2) | (B)), \
7890 (__v8sf)_mm256_setzero_ps(), \
7891 (__mmask8)-1))
7892
7893#define _mm256_mask_getmant_ps(W, U, A, B, C) \
7894 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7895 (int)(((C)<<2) | (B)), \
7896 (__v8sf)(__m256)(W), \
7897 (__mmask8)(U)))
7898
7899#define _mm256_maskz_getmant_ps(U, A, B, C) \
7900 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7901 (int)(((C)<<2) | (B)), \
7902 (__v8sf)_mm256_setzero_ps(), \
7903 (__mmask8)(U)))
7904
7905#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7906 ((__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
7907 (void const *)(addr), \
7908 (__v2di)(__m128i)(index), \
7909 (__mmask8)(mask), (int)(scale)))
7910
7911#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7912 ((__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
7913 (void const *)(addr), \
7914 (__v2di)(__m128i)(index), \
7915 (__mmask8)(mask), (int)(scale)))
7916
7917#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7918 ((__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
7919 (void const *)(addr), \
7920 (__v4di)(__m256i)(index), \
7921 (__mmask8)(mask), (int)(scale)))
7922
7923#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7924 ((__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
7925 (void const *)(addr), \
7926 (__v4di)(__m256i)(index), \
7927 (__mmask8)(mask), (int)(scale)))
7928
7929#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7930 ((__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
7931 (void const *)(addr), \
7932 (__v2di)(__m128i)(index), \
7933 (__mmask8)(mask), (int)(scale)))
7934
7935#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7936 ((__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
7937 (void const *)(addr), \
7938 (__v2di)(__m128i)(index), \
7939 (__mmask8)(mask), (int)(scale)))
7940
7941#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7942 ((__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
7943 (void const *)(addr), \
7944 (__v4di)(__m256i)(index), \
7945 (__mmask8)(mask), (int)(scale)))
7946
7947#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7948 ((__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
7949 (void const *)(addr), \
7950 (__v4di)(__m256i)(index), \
7951 (__mmask8)(mask), (int)(scale)))
7952
7953#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
7954 ((__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
7955 (void const *)(addr), \
7956 (__v4si)(__m128i)(index), \
7957 (__mmask8)(mask), (int)(scale)))
7958
7959#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7960 ((__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
7961 (void const *)(addr), \
7962 (__v4si)(__m128i)(index), \
7963 (__mmask8)(mask), (int)(scale)))
7964
7965#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
7966 ((__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
7967 (void const *)(addr), \
7968 (__v4si)(__m128i)(index), \
7969 (__mmask8)(mask), (int)(scale)))
7970
7971#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7972 ((__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
7973 (void const *)(addr), \
7974 (__v4si)(__m128i)(index), \
7975 (__mmask8)(mask), (int)(scale)))
7976
7977#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
7978 ((__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
7979 (void const *)(addr), \
7980 (__v4si)(__m128i)(index), \
7981 (__mmask8)(mask), (int)(scale)))
7982
7983#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7984 ((__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
7985 (void const *)(addr), \
7986 (__v4si)(__m128i)(index), \
7987 (__mmask8)(mask), (int)(scale)))
7988
7989#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
7990 ((__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
7991 (void const *)(addr), \
7992 (__v8si)(__m256i)(index), \
7993 (__mmask8)(mask), (int)(scale)))
7994
7995#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7996 ((__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
7997 (void const *)(addr), \
7998 (__v8si)(__m256i)(index), \
7999 (__mmask8)(mask), (int)(scale)))
8000
8001#define _mm256_permutex_pd(X, C) \
8002 ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C)))
8003
8004#define _mm256_mask_permutex_pd(W, U, X, C) \
8005 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8006 (__v4df)_mm256_permutex_pd((X), (C)), \
8007 (__v4df)(__m256d)(W)))
8008
8009#define _mm256_maskz_permutex_pd(U, X, C) \
8010 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8011 (__v4df)_mm256_permutex_pd((X), (C)), \
8012 (__v4df)_mm256_setzero_pd()))
8013
8014#define _mm256_permutex_epi64(X, C) \
8015 ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C)))
8016
8017#define _mm256_mask_permutex_epi64(W, U, X, C) \
8018 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8019 (__v4di)_mm256_permutex_epi64((X), (C)), \
8020 (__v4di)(__m256i)(W)))
8021
8022#define _mm256_maskz_permutex_epi64(U, X, C) \
8023 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8024 (__v4di)_mm256_permutex_epi64((X), (C)), \
8025 (__v4di)_mm256_setzero_si256()))
8026
8027static __inline__ __m256d __DEFAULT_FN_ATTRS256
8028_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8029{
8030 return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X);
8031}
8032
8033static __inline__ __m256d __DEFAULT_FN_ATTRS256
8034_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8035 __m256d __Y)
8036{
8037 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8038 (__v4df)_mm256_permutexvar_pd(__X, __Y),
8039 (__v4df)__W);
8040}
8041
8042static __inline__ __m256d __DEFAULT_FN_ATTRS256
8043_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8044{
8045 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8046 (__v4df)_mm256_permutexvar_pd(__X, __Y),
8047 (__v4df)_mm256_setzero_pd());
8048}
8049
8050static __inline__ __m256i __DEFAULT_FN_ATTRS256
8051_mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
8052{
8053 return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X);
8054}
8055
8056static __inline__ __m256i __DEFAULT_FN_ATTRS256
8058{
8059 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8060 (__v4di)_mm256_permutexvar_epi64(__X, __Y),
8061 (__v4di)_mm256_setzero_si256());
8062}
8063
8064static __inline__ __m256i __DEFAULT_FN_ATTRS256
8065_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
8066 __m256i __Y)
8067{
8068 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8069 (__v4di)_mm256_permutexvar_epi64(__X, __Y),
8070 (__v4di)__W);
8071}
8072
8073#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
8074
8075static __inline__ __m256 __DEFAULT_FN_ATTRS256
8076_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
8077{
8078 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8079 (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8080 (__v8sf)__W);
8081}
8082
8083static __inline__ __m256 __DEFAULT_FN_ATTRS256
8085{
8086 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8087 (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8088 (__v8sf)_mm256_setzero_ps());
8089}
8090
8091#define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
8092
8093static __inline__ __m256i __DEFAULT_FN_ATTRS256
8094_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X,
8095 __m256i __Y)
8096{
8097 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8098 (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8099 (__v8si)__W);
8100}
8101
8102static __inline__ __m256i __DEFAULT_FN_ATTRS256
8104{
8105 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8106 (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8107 (__v8si)_mm256_setzero_si256());
8108}
8109
8110#define _mm_alignr_epi32(A, B, imm) \
8111 ((__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
8112 (__v4si)(__m128i)(B), (int)(imm)))
8113
8114#define _mm_mask_alignr_epi32(W, U, A, B, imm) \
8115 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8116 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8117 (__v4si)(__m128i)(W)))
8118
8119#define _mm_maskz_alignr_epi32(U, A, B, imm) \
8120 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8121 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8122 (__v4si)_mm_setzero_si128()))
8123
8124#define _mm256_alignr_epi32(A, B, imm) \
8125 ((__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
8126 (__v8si)(__m256i)(B), (int)(imm)))
8127
8128#define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
8129 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8130 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8131 (__v8si)(__m256i)(W)))
8132
8133#define _mm256_maskz_alignr_epi32(U, A, B, imm) \
8134 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8135 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8136 (__v8si)_mm256_setzero_si256()))
8137
8138#define _mm_alignr_epi64(A, B, imm) \
8139 ((__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
8140 (__v2di)(__m128i)(B), (int)(imm)))
8141
8142#define _mm_mask_alignr_epi64(W, U, A, B, imm) \
8143 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8144 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8145 (__v2di)(__m128i)(W)))
8146
8147#define _mm_maskz_alignr_epi64(U, A, B, imm) \
8148 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8149 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8150 (__v2di)_mm_setzero_si128()))
8151
8152#define _mm256_alignr_epi64(A, B, imm) \
8153 ((__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
8154 (__v4di)(__m256i)(B), (int)(imm)))
8155
8156#define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
8157 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8158 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8159 (__v4di)(__m256i)(W)))
8160
8161#define _mm256_maskz_alignr_epi64(U, A, B, imm) \
8162 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8163 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8164 (__v4di)_mm256_setzero_si256()))
8165
8166static __inline__ __m128 __DEFAULT_FN_ATTRS128
8167_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8168{
8169 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8170 (__v4sf)_mm_movehdup_ps(__A),
8171 (__v4sf)__W);
8172}
8173
8174static __inline__ __m128 __DEFAULT_FN_ATTRS128
8176{
8177 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8178 (__v4sf)_mm_movehdup_ps(__A),
8179 (__v4sf)_mm_setzero_ps());
8180}
8181
8182static __inline__ __m256 __DEFAULT_FN_ATTRS256
8183_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8184{
8185 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8186 (__v8sf)_mm256_movehdup_ps(__A),
8187 (__v8sf)__W);
8188}
8189
8190static __inline__ __m256 __DEFAULT_FN_ATTRS256
8192{
8193 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8194 (__v8sf)_mm256_movehdup_ps(__A),
8195 (__v8sf)_mm256_setzero_ps());
8196}
8197
8198static __inline__ __m128 __DEFAULT_FN_ATTRS128
8199_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8200{
8201 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8202 (__v4sf)_mm_moveldup_ps(__A),
8203 (__v4sf)__W);
8204}
8205
8206static __inline__ __m128 __DEFAULT_FN_ATTRS128
8208{
8209 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8210 (__v4sf)_mm_moveldup_ps(__A),
8211 (__v4sf)_mm_setzero_ps());
8212}
8213
8214static __inline__ __m256 __DEFAULT_FN_ATTRS256
8215_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8216{
8217 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8218 (__v8sf)_mm256_moveldup_ps(__A),
8219 (__v8sf)__W);
8220}
8221
8222static __inline__ __m256 __DEFAULT_FN_ATTRS256
8224{
8225 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8226 (__v8sf)_mm256_moveldup_ps(__A),
8227 (__v8sf)_mm256_setzero_ps());
8228}
8229
8230#define _mm256_mask_shuffle_epi32(W, U, A, I) \
8231 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8232 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8233 (__v8si)(__m256i)(W)))
8234
8235#define _mm256_maskz_shuffle_epi32(U, A, I) \
8236 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8237 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8238 (__v8si)_mm256_setzero_si256()))
8239
8240#define _mm_mask_shuffle_epi32(W, U, A, I) \
8241 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8242 (__v4si)_mm_shuffle_epi32((A), (I)), \
8243 (__v4si)(__m128i)(W)))
8244
8245#define _mm_maskz_shuffle_epi32(U, A, I) \
8246 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8247 (__v4si)_mm_shuffle_epi32((A), (I)), \
8248 (__v4si)_mm_setzero_si128()))
8249
8250static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
8251_mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A) {
8252 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)__A,
8253 (__v2df)__W);
8254}
8255
8256static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
8257_mm_maskz_mov_pd(__mmask8 __U, __m128d __A) {
8258 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)__A,
8259 (__v2df)_mm_setzero_pd());
8260}
8261
8262static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
8263_mm256_mask_mov_pd(__m256d __W, __mmask8 __U, __m256d __A) {
8264 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)__A,
8265 (__v4df)__W);
8266}
8267
8268static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
8270 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)__A,
8271 (__v4df)_mm256_setzero_pd());
8272}
8273
8274static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8275_mm_mask_mov_ps(__m128 __W, __mmask8 __U, __m128 __A) {
8276 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)__A,
8277 (__v4sf)__W);
8278}
8279
8280static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8281_mm_maskz_mov_ps(__mmask8 __U, __m128 __A) {
8282 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)__A,
8283 (__v4sf)_mm_setzero_ps());
8284}
8285
8286static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
8287_mm256_mask_mov_ps(__m256 __W, __mmask8 __U, __m256 __A) {
8288 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)__A,
8289 (__v8sf)__W);
8290}
8291
8292static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
8294 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)__A,
8295 (__v8sf)_mm256_setzero_ps());
8296}
8297
8298static __inline__ __m128 __DEFAULT_FN_ATTRS128
8299_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8300{
8301 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8302 (__v4sf) __W,
8303 (__mmask8) __U);
8304}
8305
8306static __inline__ __m128 __DEFAULT_FN_ATTRS128
8308{
8309 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8310 (__v4sf)
8311 _mm_setzero_ps (),
8312 (__mmask8) __U);
8313}
8314
8315static __inline__ __m256 __DEFAULT_FN_ATTRS256
8316_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8317{
8318 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8319 (__v8sf) __W,
8320 (__mmask8) __U);
8321}
8322
8323static __inline__ __m256 __DEFAULT_FN_ATTRS256
8325{
8326 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8327 (__v8sf)
8329 (__mmask8) __U);
8330}
8331
8332#define _mm_mask_cvt_roundps_ph(W, U, A, I) \
8333 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8334 (__v8hi)(__m128i)(W), \
8335 (__mmask8)(U)))
8336
8337#define _mm_maskz_cvt_roundps_ph(U, A, I) \
8338 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8339 (__v8hi)_mm_setzero_si128(), \
8340 (__mmask8)(U)))
8341
8342#define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph
8343#define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph
8344
8345#define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
8346 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8347 (__v8hi)(__m128i)(W), \
8348 (__mmask8)(U)))
8349
8350#define _mm256_maskz_cvt_roundps_ph(U, A, I) \
8351 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8352 (__v8hi)_mm_setzero_si128(), \
8353 (__mmask8)(U)))
8354
8355#define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph
8356#define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph
8357
8358#undef __DEFAULT_FN_ATTRS128
8359#undef __DEFAULT_FN_ATTRS256
8360#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
8361#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
8362
8363#endif /* __AVX512VLINTRIN_H */
static __inline__ vector float vector float __b
Definition: altivec.h:578
return __v
Definition: arm_acle.h:88
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi32(__m256i __a, __m128i __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by the number of bits give...
Definition: avx2intrin.h:2422
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sllv_epi32(__m256i __X, __m256i __Y)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __X left by the number of bits given...
Definition: avx2intrin.h:3713
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi32_epi64(__m128i __V)
Sign-extends 32-bit elements from the 128-bit vector of [4 x i32] in __V and returns the 64-bit value...
Definition: avx2intrin.h:1496
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mul_epu32(__m256i __a, __m256i __b)
Multiplies unsigned 32-bit integers from even-numered elements of two 256-bit vectors of [8 x i32] an...
Definition: avx2intrin.h:1795
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi32(__m256i __a, __m128i __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a left by the number of bits given...
Definition: avx2intrin.h:2183
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srav_epi32(__m128i __X, __m128i __Y)
Shifts each 32-bit element of the 128-bit vector of [4 x i32] in __X right by the number of bits give...
Definition: avx2intrin.h:3825
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpackhi_epi32(__m256i __a, __m256i __b)
Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors of [8 x i32] in __a and __b...
Definition: avx2intrin.h:2769
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi64(__m256i __a, __m128i __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a left by the number of bits given...
Definition: avx2intrin.h:2222
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_broadcastq_epi64(__m128i __X)
Broadcasts the low element from the 128-bit vector of [2 x i64] in __X to both elements of the result...
Definition: avx2intrin.h:3238
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_slli_epi64(__m256i __a, int __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a left by __count bits,...
Definition: avx2intrin.h:2202
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_abs_epi32(__m256i __a)
Computes the absolute value of each signed 32-bit element in the 256-bit vector of [8 x i32] in __a a...
Definition: avx2intrin.h:148
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi64(__m256i __a, __m256i __b)
Subtracts 64-bit integers from corresponding elements of two 256-bit vectors of [4 x i64].
Definition: avx2intrin.h:2567
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srav_epi32(__m256i __X, __m256i __Y)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __X right by the number of bits give...
Definition: avx2intrin.h:3802
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu8_epi64(__m128i __V)
Zero-extends the first four bytes from the 128-bit integer vector in __V and returns the 64-bit value...
Definition: avx2intrin.h:1570
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu32(__m256i __a, __m256i __b)
Compares the corresponding unsigned 32-bit integers in the two 256-bit vectors of [8 x i32] in __a an...
Definition: avx2intrin.h:1205
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu32_epi64(__m128i __V)
Zero-extends 32-bit elements from the 128-bit vector of [4 x i32] in __V and returns the 64-bit value...
Definition: avx2intrin.h:1643
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu8_epi32(__m128i __V)
Zero-extends bytes from the lower half of the 128-bit integer vector in __V and returns the 32-bit va...
Definition: avx2intrin.h:1546
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srlv_epi32(__m128i __X, __m128i __Y)
Shifts each 32-bit element of the 128-bit vector of [4 x i32] in __X right by the number of bits give...
Definition: avx2intrin.h:3869
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcastss_ps(__m128 __X)
Broadcasts the 32-bit floating-point value from the low element of the 128-bit vector of [4 x float] ...
Definition: avx2intrin.h:3011
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi64(__m256i __a, __m256i __b)
Adds 64-bit integers from corresponding elements of two 256-bit vectors of [4 x i64] and returns the ...
Definition: avx2intrin.h:349
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srli_epi64(__m256i __a, int __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a right by __count bits,...
Definition: avx2intrin.h:2441
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi32(__m256i __a, __m256i __b)
Compares the corresponding signed 32-bit integers in the two 256-bit vectors of [8 x i32] in __a and ...
Definition: avx2intrin.h:1148
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors of [4 x i64] in __a and __b...
Definition: avx2intrin.h:2927
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi8_epi32(__m128i __V)
Sign-extends bytes from the lower half of the 128-bit integer vector in __V and returns the 32-bit va...
Definition: avx2intrin.h:1395
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcastq_epi64(__m128i __X)
Broadcasts the low element from the 128-bit vector of [2 x i64] in __X to all elements of the result'...
Definition: avx2intrin.h:3178
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mul_epi32(__m256i __a, __m256i __b)
Multiplies signed 32-bit integers from even-numbered elements of two 256-bit vectors of [8 x i32] and...
Definition: avx2intrin.h:1668
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mullo_epi32(__m256i __a, __m256i __b)
Multiplies signed 32-bit integer elements of two 256-bit vectors of [8 x i32], and returns the lower ...
Definition: avx2intrin.h:1770
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpackhi_epi64(__m256i __a, __m256i __b)
Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors of [4 x i64] in __a and __b...
Definition: avx2intrin.h:2798
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_broadcastss_ps(__m128 __X)
Broadcasts the 32-bit floating-point value from the low element of the 128-bit vector of [4 x float] ...
Definition: avx2intrin.h:2979
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi32(__m256i __a, __m128i __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by the number of bits give...
Definition: avx2intrin.h:2304
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srlv_epi64(__m128i __X, __m128i __Y)
Shifts each 64-bit element of the 128-bit vector of [2 x i64] in __X right by the number of bits give...
Definition: avx2intrin.h:3913
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi32(__m256i __a, __m256i __b)
Compares the corresponding signed 32-bit integers in the two 256-bit vectors of [8 x i32] in __a and ...
Definition: avx2intrin.h:1262
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srlv_epi32(__m256i __X, __m256i __Y)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __X right by the number of bits give...
Definition: avx2intrin.h:3847
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu16_epi64(__m128i __V)
Zero-extends 16-bit elements from the lower half of the 128-bit vector of [8 x i16] in __V and return...
Definition: avx2intrin.h:1619
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpacklo_epi32(__m256i __a, __m256i __b)
Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors of [8 x i32] in __a and __b...
Definition: avx2intrin.h:2898
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi8_epi64(__m128i __V)
Sign-extends the first four bytes from the 128-bit integer vector in __V and returns the 64-bit value...
Definition: avx2intrin.h:1421
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srli_epi32(__m256i __a, int __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by __count bits,...
Definition: avx2intrin.h:2402
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi16_epi64(__m128i __V)
Sign-extends 16-bit elements from the lower half of the 128-bit vector of [8 x i16] in __V and return...
Definition: avx2intrin.h:1472
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_and_si256(__m256i __a, __m256i __b)
Computes the bitwise AND of the 256-bit integer vectors in __a and __b.
Definition: avx2intrin.h:465
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_slli_epi32(__m256i __a, int __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a left by __count bits,...
Definition: avx2intrin.h:2163
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srai_epi32(__m256i __a, int __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by __count bits,...
Definition: avx2intrin.h:2283
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu32(__m256i __a, __m256i __b)
Compares the corresponding unsigned 32-bit integers in the two 256-bit vectors of [8 x i32] in __a an...
Definition: avx2intrin.h:1319
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi32(__m256i __a, __m256i __b)
Subtracts 32-bit integers from corresponding elements of two 256-bit vectors of [8 x i32].
Definition: avx2intrin.h:2541
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcastsd_pd(__m128d __X)
Broadcasts the 64-bit floating-point value from the low element of the 128-bit vector of [2 x double]...
Definition: avx2intrin.h:3027
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu16_epi32(__m128i __V)
Zero-extends 16-bit elements from the 128-bit vector of [8 x i16] in __V and returns the 32-bit value...
Definition: avx2intrin.h:1595
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sllv_epi64(__m128i __X, __m128i __Y)
Shifts each 64-bit element of the 128-bit vector of [2 x i64] in __X left by the number of bits given...
Definition: avx2intrin.h:3779
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sllv_epi32(__m128i __X, __m128i __Y)
Shifts each 32-bit element of the 128-bit vector of [4 x i32] in __X left by the number of bits given...
Definition: avx2intrin.h:3735
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi64(__m256i __a, __m128i __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a right by the number of bits give...
Definition: avx2intrin.h:2461
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi16_epi32(__m128i __V)
Sign-extends 16-bit elements from the 128-bit vector of [8 x i16] in __V and returns the 32-bit value...
Definition: avx2intrin.h:1448
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_broadcastd_epi32(__m128i __X)
Broadcasts the low element from the 128-bit vector of [4 x i32] in __X to all elements of the result'...
Definition: avx2intrin.h:3223
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcastd_epi32(__m128i __X)
Broadcasts the low element from the 128-bit vector of [4 x i32] in __X to all elements of the result'...
Definition: avx2intrin.h:3163
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srlv_epi64(__m256i __X, __m256i __Y)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __X right by the number of bits give...
Definition: avx2intrin.h:3891
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi32(__m256i __a, __m256i __b)
Adds 32-bit integers from corresponding elements of two 256-bit vectors of [8 x i32] and returns the ...
Definition: avx2intrin.h:330
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sllv_epi64(__m256i __X, __m256i __Y)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __X left by the number of bits given...
Definition: avx2intrin.h:3757
unsigned char __mmask8
Definition: avx512fintrin.h:41
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu32(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu32(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi32(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
#define _mm_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi64(__m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rorv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_epi64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi8(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rolv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expand_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi16(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_pd(__mmask8 __U, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi32_mask(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_store_epi32(void *__P, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
#define _mm256_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_pd(__m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_xor_epi32(__m128i __a, __m128i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_movedup_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi32(__mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rorv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi64(void *__P, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi32_mask(__m256i __A, __m256i __B)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi64(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epu64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi8(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_pd(__mmask8 __U, __m256d __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi16(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi16(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_epi64(__m256i __a, __m256i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mov_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rcp14_pd(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_pd(__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mov_ps(__mmask8 __U, __m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu32(__mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
#define _mm256_cmpneq_epi32_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
#define _mm256_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi16(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi32(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rorv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_or_epi64(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_compress_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ps(__mmask8 __U, __m128 __A, __m128 __B)
#define _mm_cmpeq_epi64_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_pd(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_pd(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_movedup_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mov_pd(__mmask8 __U, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_load_epi32(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi32(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi8(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi64(__m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_load_epi64(void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu32(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_getexp_pd(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_load_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu32(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epi64(__m128i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_store_epi32(void *__P, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W)
#define _mm256_permutexvar_epi32(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srai_epi64(__m128i __A, unsigned int __imm)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_epi32(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_movedup_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_loadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi32_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi32(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_epi64(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi32(void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rorv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_epi64(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcast_f32x4(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_pd(__mmask8 __U, __m256d __A)
#define _mm256_cmpeq_epi64_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi16(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
#define _mm256_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi8(__m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_scalef_pd(__m256d __A, __m256d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_ps(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __imm)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mov_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ps(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rolv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expand_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_and_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_andnot_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcast_i32x4(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mov_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_compress_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi64(void *__P, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi16(__m256i __A)
#define _mm_cmpneq_epi64_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi32(__mmask8 __M, int __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rolv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi32(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_pd(__m128d __W, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_load_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expand_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi32(__mmask8 __M, int __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_pd(__m128d __W, __mmask8 __U, __m128d __A)
#define __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rolv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
#define _mm256_cmpeq_epi32_mask(A, B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_permutexvar_pd(__m256i __X, __m256d __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, __m256d __B)
#define _mm_cmpeq_epi32_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rolv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi32_mask(__m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_pd(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_and_epi32(__m128i __a, __m128i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ps(__m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rorv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi32(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_compress_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu32(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi16(__m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epu64(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
short __v2hi __attribute__((__vector_size__(4)))
#define __DEFAULT_FN_ATTRS256
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_or_epi32(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_scalef_ps(__m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi16(__m128i __A)
#define _mm256_permutexvar_ps(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi16(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi16(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_pd(__m256d __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rorv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_abs_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mov_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_abs_epi64(__mmask8 __U, __m128i __A)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_load_epi64(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
#define __DEFAULT_FN_ATTRS128
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rcp14_ps(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi64(__m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_and_epi64(__m128i __a, __m128i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_pd(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi32(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_ps(__m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_compress_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_ps(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rolv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_movedup_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B)
#define _mm_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_abs_epi64(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu32(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_ps(__mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_ps(__m256 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi32(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi64(__m256i __X, __m256i __Y)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi32(void *__P, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi16(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepu32_pd(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_loadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_epi32(__m256i __a, __m256i __b)
#define _mm_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi64_mask(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_pd(__mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expand_pd(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sra_epi64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rolv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
#define _mm_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expand_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
#define _mm256_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi64(__m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_andnot_epi32(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi16(__mmask8 __M, __m128i __A)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_store_epi64(void *__P, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu64(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rorv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_epi32(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ps(__m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_pd(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ps(__m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu32(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_getexp_ps(__m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srai_epi64(__m256i __A, unsigned int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi32(__mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi64_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu32(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi8(__m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_compress_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi32(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi32(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_load_epi32(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rolv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi32(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi64(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi32(void *__P, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ps(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi64(void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu32(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_store_epi64(void *__P, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
#define _mm256_cmpneq_epi64_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi32(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
#define _mm_cmpneq_epi32_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi32(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rorv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi32(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mov_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_ps(__mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi16(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_abs_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_xor_epi64(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_pd(__mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvtpd_epi32(__m256d __a)
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x i32].
Definition: avxintrin.h:2290
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sqrt_ps(__m256 __a)
Calculates the square roots of the values in a 256-bit vector of [8 x float].
Definition: avxintrin.h:365
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_pd(__m256d __a, __m256d __b)
Subtracts two 256-bit vectors of [4 x double].
Definition: avxintrin.h:125
static __inline __m128 __DEFAULT_FN_ATTRS _mm256_cvtpd_ps(__m256d __a)
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x float].
Definition: avxintrin.h:2215
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_mul_pd(__m256d __a, __m256d __b)
Multiplies two 256-bit vectors of [4 x double].
Definition: avxintrin.h:315
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_permutevar_ps(__m256 __a, __m256i __c)
Copies the values stored in a 256-bit vector of [8 x float] as specified by the 256-bit integer vecto...
Definition: avxintrin.h:991
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_div_pd(__m256d __a, __m256d __b)
Divides two 256-bit vectors of [4 x double].
Definition: avxintrin.h:197
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_movehdup_ps(__m256 __a)
Moves and duplicates odd-indexed values from a 256-bit vector of [8 x float] to float values in a 256...
Definition: avxintrin.h:2385
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a, __m256 __b)
Subtracts two 256-bit vectors of [8 x float].
Definition: avxintrin.h:141
static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_permutevar_ps(__m128 __a, __m128i __c)
Copies the values stored in a 128-bit vector of [4 x float] as specified by the 128-bit integer vecto...
Definition: avxintrin.h:900
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_cvtepi32_ps(__m256i __a)
Converts a vector of [8 x i32] into a vector of [8 x float].
Definition: avxintrin.h:2200
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_unpacklo_ps(__m256 __a, __m256 __b)
Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the two 256-bit vectors of [8 x float] ...
Definition: avxintrin.h:2528
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32(__m256d __a)
Converts a 256-bit vector of [4 x double] into four signed truncated (rounded toward zero) 32-bit int...
Definition: avxintrin.h:2270
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_cvtps_pd(__m128 __a)
Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4 x double].
Definition: avxintrin.h:2250
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvtps_epi32(__m256 __a)
Converts a vector of [8 x float] into a vector of [8 x i32].
Definition: avxintrin.h:2234
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_unpackhi_ps(__m256 __a, __m256 __b)
Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the two 256-bit vectors of [8 x float] ...
Definition: avxintrin.h:2502
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_min_ps(__m256 __a, __m256 __b)
Compares two 256-bit vectors of [8 x float] and returns the lesser of each pair of values.
Definition: avxintrin.h:297
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_cvtepi32_pd(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x double].
Definition: avxintrin.h:2186
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32(__m256 __a)
Converts a vector of [8 x float] into eight signed truncated (rounded toward zero) 32-bit integers re...
Definition: avxintrin.h:2310
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_max_ps(__m256 __a, __m256 __b)
Compares two 256-bit vectors of [8 x float] and returns the greater of each pair of values.
Definition: avxintrin.h:255
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition: avxintrin.h:4339
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi32(int __i)
Constructs a 256-bit integer vector of [8 x i32], with each of the 32-bit integral vector elements se...
Definition: avxintrin.h:4256
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_add_pd(__m256d __a, __m256d __b)
Adds two 256-bit vectors of [4 x double].
Definition: avxintrin.h:91
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sqrt_pd(__m256d __a)
Calculates the square roots of the values in a 256-bit vector of [4 x double].
Definition: avxintrin.h:348
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi64x(long long __q)
Constructs a 256-bit integer vector of [4 x i64], with each of the 64-bit integral vector elements se...
Definition: avxintrin.h:4313
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_add_ps(__m256 __a, __m256 __b)
Adds two 256-bit vectors of [8 x float].
Definition: avxintrin.h:107
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_moveldup_ps(__m256 __a)
Moves and duplicates even-indexed values from a 256-bit vector of [8 x float] to float values in a 25...
Definition: avxintrin.h:2410
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_movedup_pd(__m256d __a)
Moves and duplicates double-precision floating point values from a 256-bit vector of [4 x double] to ...
Definition: avxintrin.h:2432
static __inline __m128d __DEFAULT_FN_ATTRS128 _mm_permutevar_pd(__m128d __a, __m128i __c)
Copies the values in a 128-bit vector of [2 x double] as specified by the 128-bit integer vector oper...
Definition: avxintrin.h:806
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition: avxintrin.h:4327
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_mul_ps(__m256 __a, __m256 __b)
Multiplies two 256-bit vectors of [8 x float].
Definition: avxintrin.h:331
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_min_pd(__m256d __a, __m256d __b)
Compares two 256-bit vectors of [4 x double] and returns the lesser of each pair of values.
Definition: avxintrin.h:276
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition: avxintrin.h:4351
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_div_ps(__m256 __a, __m256 __b)
Divides two 256-bit vectors of [8 x float].
Definition: avxintrin.h:213
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_unpacklo_pd(__m256d __a, __m256d __b)
Unpacks the even-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves the...
Definition: avxintrin.h:2476
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_unpackhi_pd(__m256d __a, __m256d __b)
Unpacks the odd-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves them...
Definition: avxintrin.h:2455
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_permutevar_pd(__m256d __a, __m256i __c)
Copies the values in a 256-bit vector of [4 x double] as specified by the 256-bit integer vector oper...
Definition: avxintrin.h:845
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_max_pd(__m256d __a, __m256d __b)
Compares two 256-bit vectors of [4 x double] and returns the greater of each pair of values.
Definition: avxintrin.h:234
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_pd(__m128d __a, __m128d __b)
Performs an element-by-element division of two 128-bit vectors of [2 x double].
Definition: emmintrin.h:225
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_srli_epi64(__m128i __a, int __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:3057
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, __m128i __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2829
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns a vecto...
Definition: emmintrin.h:311
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, __m128i __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:3038
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_epi32(__m128i __a, __m128i __b)
Subtracts the corresponding 32-bit integer values in the operands.
Definition: emmintrin.h:2543
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_slli_epi64(__m128i __a, int __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2848
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi32_pd(__m128i __a)
Converts the lower two integer elements of a 128-bit vector of [4 x i32] into two double-precision fl...
Definition: emmintrin.h:1330
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition: emmintrin.h:3885
static __inline__ void int __a
Definition: emmintrin.h:4084
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_epi64(__m128i __a, __m128i __b)
Subtracts the corresponding elements of two [2 x i64] vectors.
Definition: emmintrin.h:2578
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a)
Calculates the square root of the each of two values stored in a 128-bit vector of [2 x double].
Definition: emmintrin.h:266
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_pd(__m128d __a, __m128d __b)
Multiplies two 128-bit vectors of [2 x double].
Definition: emmintrin.h:184
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition: emmintrin.h:3500
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi64x(long long __q)
Initializes both values in a 128-bit integer vector with the specified 64-bit integer value.
Definition: emmintrin.h:3681
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpacklo_epi32(__m128i __a, __m128i __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x i32] and interleaves them i...
Definition: emmintrin.h:4582
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_srli_epi32(__m128i __a, int __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:3021
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, __m128i __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2865
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpacklo_epi64(__m128i __a, __m128i __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them into...
Definition: emmintrin.h:4603
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition: emmintrin.h:1874
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_epu32(__m128i __a, __m128i __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the corresponding elements o...
Definition: emmintrin.h:2471
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_epi32(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [4 x i32], saving the lower 32 bits of each...
Definition: emmintrin.h:2112
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, __m128i __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:3074
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtps_pd(__m128 __a)
Converts the lower two single-precision floating-point elements of a 128-bit vector of [4 x float] in...
Definition: emmintrin.h:1308
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a)
Converts a vector of [4 x float] into four signed truncated (rounded toward zero) 32-bit integers,...
Definition: emmintrin.h:3369
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpackhi_epi32(__m128i __a, __m128i __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x i32] and interleaves them ...
Definition: emmintrin.h:4475
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_pd(__m128d __a, __m128d __b)
Adds two 128-bit vectors of [2 x double].
Definition: emmintrin.h:105
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpackhi_pd(__m128d __a, __m128d __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them ...
Definition: emmintrin.h:4672
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
Definition: emmintrin.h:2681
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns a vecto...
Definition: emmintrin.h:357
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpacklo_pd(__m128d __a, __m128d __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them i...
Definition: emmintrin.h:4692
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpackhi_epi64(__m128i __a, __m128i __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them int...
Definition: emmintrin.h:4496
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_slli_epi32(__m128i __a, int __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2812
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_epi64(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [2 x i64], saving the lower 64 bits of each...
Definition: emmintrin.h:2150
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi32_ps(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x float].
Definition: emmintrin.h:3332
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value.
Definition: emmintrin.h:3716
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, __m128i __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2941
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_pd(__m128d __a, __m128d __b)
Subtracts two 128-bit vectors of [2 x double].
Definition: emmintrin.h:145
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_srai_epi32(__m128i __a, int __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2923
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32].
Definition: emmintrin.h:3350
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movedup_pd(__m128d __a)
Moves and duplicates the double-precision value in the lower bits of a 128-bit vector of [2 x double]...
Definition: pmmintrin.h:260
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_moveldup_ps(__m128 __a)
Duplicates even-indexed values from a 128-bit vector of [4 x float] to float values stored in a 128-b...
Definition: pmmintrin.h:159
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movehdup_ps(__m128 __a)
Moves and duplicates odd-indexed values from a 128-bit vector of [4 x float] to float values stored i...
Definition: pmmintrin.h:138
__inline unsigned int unsigned int unsigned int * __P
Definition: bmi2intrin.h:25
__inline unsigned int unsigned int __Y
Definition: bmi2intrin.h:19
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_epi32(__m128i __V1, __m128i __V2)
Multiplies corresponding even-indexed elements of two 128-bit vectors of [4 x i32] and returns a 128-...
Definition: smmintrin.h:571
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x u32] and returns a 128-bit vector ...
Definition: smmintrin.h:804
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepu16_epi64(__m128i __V)
Zero-extends each of the lower two 16-bit integer elements of a 128-bit integer vector of [8 x i16] t...
Definition: smmintrin.h:1435
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x i32] and returns a 128-bit vector ...
Definition: smmintrin.h:750
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepu32_epi64(__m128i __V)
Zero-extends each of the lower two 32-bit integer elements of a 128-bit integer vector of [4 x i32] t...
Definition: smmintrin.h:1454
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi8_epi32(__m128i __V)
Sign-extends each of the lower four 8-bit integer elements of a 128-bit vector of [16 x i8] to 32-bit...
Definition: smmintrin.h:1257
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi16_epi64(__m128i __V)
Sign-extends each of the lower two 16-bit integer elements of a 128-bit integer vector of [8 x i16] t...
Definition: smmintrin.h:1318
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepu8_epi32(__m128i __V)
Zero-extends each of the lower four 8-bit integer elements of a 128-bit vector of [16 x i8] to 32-bit...
Definition: smmintrin.h:1378
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi8_epi64(__m128i __V)
Sign-extends each of the lower two 8-bit integer elements of a 128-bit integer vector of [16 x i8] to...
Definition: smmintrin.h:1278
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x i32] and returns a 128-bit vector ...
Definition: smmintrin.h:768
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mullo_epi32(__m128i __V1, __m128i __V2)
Multiples corresponding elements of two 128-bit vectors of [4 x i32] and returns the lower 32 bits of...
Definition: smmintrin.h:552
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi16_epi32(__m128i __V)
Sign-extends each of the lower four 16-bit integer elements of a 128-bit integer vector of [8 x i16] ...
Definition: smmintrin.h:1299
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi32_epi64(__m128i __V)
Sign-extends each of the lower two 32-bit integer elements of a 128-bit integer vector of [4 x i32] t...
Definition: smmintrin.h:1337
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepu16_epi32(__m128i __V)
Zero-extends each of the lower four 16-bit integer elements of a 128-bit integer vector of [8 x i16] ...
Definition: smmintrin.h:1416
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x u32] and returns a 128-bit vector ...
Definition: smmintrin.h:786
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepu8_epi64(__m128i __V)
Zero-extends each of the lower two 8-bit integer elements of a 128-bit integer vector of [16 x i8] to...
Definition: smmintrin.h:1397
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition: tmmintrin.h:137
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpacklo_ps(__m128 __a, __m128 __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x float] and interleaves them...
Definition: xmmintrin.h:2798
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_ps(__m128 __a, __m128 __b)
Adds two 128-bit vectors of [4 x float], and returns the results of the addition.
Definition: xmmintrin.h:106
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpackhi_ps(__m128 __a, __m128 __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x float] and interleaves the...
Definition: xmmintrin.h:2777
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_ps(__m128 __a, __m128 __b)
Divides two 128-bit vectors of [4 x float].
Definition: xmmintrin.h:226
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the greater of each pair of values.
Definition: xmmintrin.h:423
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the lesser of each pair of values.
Definition: xmmintrin.h:377
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ps(__m128 __a)
Calculates the square roots of the values stored in a 128-bit vector of [4 x float].
Definition: xmmintrin.h:260
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition: xmmintrin.h:2029
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_ps(__m128 __a, __m128 __b)
Subtracts each of the values of the second operand from the first operand, both of which are 128-bit ...
Definition: xmmintrin.h:147
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_ps(__m128 __a, __m128 __b)
Multiplies two 128-bit vectors of [4 x float] and returns the results of the multiplication.
Definition: xmmintrin.h:187