11#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
14#ifndef __AVX512VLINTRIN_H
15#define __AVX512VLINTRIN_H
17#define __DEFAULT_FN_ATTRS128 \
18 __attribute__((__always_inline__, __nodebug__, \
19 __target__("avx512vl,no-evex512"), \
20 __min_vector_width__(128)))
21#define __DEFAULT_FN_ATTRS256 \
22 __attribute__((__always_inline__, __nodebug__, \
23 __target__("avx512vl,no-evex512"), \
24 __min_vector_width__(256)))
32#define _mm_cmpeq_epi32_mask(A, B) \
33 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
34#define _mm_mask_cmpeq_epi32_mask(k, A, B) \
35 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
36#define _mm_cmpge_epi32_mask(A, B) \
37 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
38#define _mm_mask_cmpge_epi32_mask(k, A, B) \
39 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
40#define _mm_cmpgt_epi32_mask(A, B) \
41 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
42#define _mm_mask_cmpgt_epi32_mask(k, A, B) \
43 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
44#define _mm_cmple_epi32_mask(A, B) \
45 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
46#define _mm_mask_cmple_epi32_mask(k, A, B) \
47 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
48#define _mm_cmplt_epi32_mask(A, B) \
49 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
50#define _mm_mask_cmplt_epi32_mask(k, A, B) \
51 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
52#define _mm_cmpneq_epi32_mask(A, B) \
53 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
54#define _mm_mask_cmpneq_epi32_mask(k, A, B) \
55 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
57#define _mm256_cmpeq_epi32_mask(A, B) \
58 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
59#define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
60 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
61#define _mm256_cmpge_epi32_mask(A, B) \
62 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
63#define _mm256_mask_cmpge_epi32_mask(k, A, B) \
64 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
65#define _mm256_cmpgt_epi32_mask(A, B) \
66 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
67#define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
68 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
69#define _mm256_cmple_epi32_mask(A, B) \
70 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
71#define _mm256_mask_cmple_epi32_mask(k, A, B) \
72 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
73#define _mm256_cmplt_epi32_mask(A, B) \
74 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
75#define _mm256_mask_cmplt_epi32_mask(k, A, B) \
76 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
77#define _mm256_cmpneq_epi32_mask(A, B) \
78 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
79#define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
80 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
82#define _mm_cmpeq_epu32_mask(A, B) \
83 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
84#define _mm_mask_cmpeq_epu32_mask(k, A, B) \
85 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
86#define _mm_cmpge_epu32_mask(A, B) \
87 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
88#define _mm_mask_cmpge_epu32_mask(k, A, B) \
89 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
90#define _mm_cmpgt_epu32_mask(A, B) \
91 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
92#define _mm_mask_cmpgt_epu32_mask(k, A, B) \
93 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
94#define _mm_cmple_epu32_mask(A, B) \
95 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
96#define _mm_mask_cmple_epu32_mask(k, A, B) \
97 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
98#define _mm_cmplt_epu32_mask(A, B) \
99 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
100#define _mm_mask_cmplt_epu32_mask(k, A, B) \
101 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
102#define _mm_cmpneq_epu32_mask(A, B) \
103 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
104#define _mm_mask_cmpneq_epu32_mask(k, A, B) \
105 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
107#define _mm256_cmpeq_epu32_mask(A, B) \
108 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
109#define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
110 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
111#define _mm256_cmpge_epu32_mask(A, B) \
112 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
113#define _mm256_mask_cmpge_epu32_mask(k, A, B) \
114 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
115#define _mm256_cmpgt_epu32_mask(A, B) \
116 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
117#define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
118 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
119#define _mm256_cmple_epu32_mask(A, B) \
120 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
121#define _mm256_mask_cmple_epu32_mask(k, A, B) \
122 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
123#define _mm256_cmplt_epu32_mask(A, B) \
124 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
125#define _mm256_mask_cmplt_epu32_mask(k, A, B) \
126 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
127#define _mm256_cmpneq_epu32_mask(A, B) \
128 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
129#define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
130 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
132#define _mm_cmpeq_epi64_mask(A, B) \
133 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
134#define _mm_mask_cmpeq_epi64_mask(k, A, B) \
135 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
136#define _mm_cmpge_epi64_mask(A, B) \
137 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
138#define _mm_mask_cmpge_epi64_mask(k, A, B) \
139 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
140#define _mm_cmpgt_epi64_mask(A, B) \
141 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
142#define _mm_mask_cmpgt_epi64_mask(k, A, B) \
143 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
144#define _mm_cmple_epi64_mask(A, B) \
145 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
146#define _mm_mask_cmple_epi64_mask(k, A, B) \
147 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
148#define _mm_cmplt_epi64_mask(A, B) \
149 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
150#define _mm_mask_cmplt_epi64_mask(k, A, B) \
151 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
152#define _mm_cmpneq_epi64_mask(A, B) \
153 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
154#define _mm_mask_cmpneq_epi64_mask(k, A, B) \
155 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
157#define _mm256_cmpeq_epi64_mask(A, B) \
158 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
159#define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
160 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
161#define _mm256_cmpge_epi64_mask(A, B) \
162 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
163#define _mm256_mask_cmpge_epi64_mask(k, A, B) \
164 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
165#define _mm256_cmpgt_epi64_mask(A, B) \
166 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
167#define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
168 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
169#define _mm256_cmple_epi64_mask(A, B) \
170 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
171#define _mm256_mask_cmple_epi64_mask(k, A, B) \
172 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
173#define _mm256_cmplt_epi64_mask(A, B) \
174 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
175#define _mm256_mask_cmplt_epi64_mask(k, A, B) \
176 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
177#define _mm256_cmpneq_epi64_mask(A, B) \
178 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
179#define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
180 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
182#define _mm_cmpeq_epu64_mask(A, B) \
183 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
184#define _mm_mask_cmpeq_epu64_mask(k, A, B) \
185 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
186#define _mm_cmpge_epu64_mask(A, B) \
187 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
188#define _mm_mask_cmpge_epu64_mask(k, A, B) \
189 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
190#define _mm_cmpgt_epu64_mask(A, B) \
191 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
192#define _mm_mask_cmpgt_epu64_mask(k, A, B) \
193 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
194#define _mm_cmple_epu64_mask(A, B) \
195 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
196#define _mm_mask_cmple_epu64_mask(k, A, B) \
197 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
198#define _mm_cmplt_epu64_mask(A, B) \
199 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
200#define _mm_mask_cmplt_epu64_mask(k, A, B) \
201 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
202#define _mm_cmpneq_epu64_mask(A, B) \
203 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
204#define _mm_mask_cmpneq_epu64_mask(k, A, B) \
205 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
207#define _mm256_cmpeq_epu64_mask(A, B) \
208 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
209#define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
210 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
211#define _mm256_cmpge_epu64_mask(A, B) \
212 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
213#define _mm256_mask_cmpge_epu64_mask(k, A, B) \
214 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
215#define _mm256_cmpgt_epu64_mask(A, B) \
216 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
217#define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
218 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
219#define _mm256_cmple_epu64_mask(A, B) \
220 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
221#define _mm256_mask_cmple_epu64_mask(k, A, B) \
222 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
223#define _mm256_cmplt_epu64_mask(A, B) \
224 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
225#define _mm256_mask_cmplt_epu64_mask(k, A, B) \
226 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
227#define _mm256_cmpneq_epu64_mask(A, B) \
228 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
229#define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
230 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
235 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
243 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
251 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
259 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
267 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
275 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
283 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
291 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
299 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
307 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
315 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
323 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
331 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
339 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
347 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
355 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
363 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
371 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
379 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
387 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
395 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
403 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
411 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
419 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
427 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
435 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
443 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
451 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
459 return (__m256i)((__v8su)
__a & (__v8su)
__b);
465 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
479 return (__m128i)((__v4su)
__a & (__v4su)
__b);
485 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
499 return (__m256i)(~(__v8su)__A & (__v8su)__B);
505 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
520 return (__m128i)(~(__v4su)__A & (__v4su)__B);
526 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
540 return (__m256i)((__v8su)
__a | (__v8su)
__b);
546 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
560 return (__m128i)((__v4su)
__a | (__v4su)
__b);
566 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
580 return (__m256i)((__v8su)
__a ^ (__v8su)
__b);
586 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
600 return (__m128i)((__v4su)
__a ^ (__v4su)
__b);
606 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
620 return (__m256i)((__v4du)
__a & (__v4du)
__b);
626 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
640 return (__m128i)((__v2du)
__a & (__v2du)
__b);
646 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
660 return (__m256i)(~(__v4du)__A & (__v4du)__B);
666 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
681 return (__m128i)(~(__v2du)__A & (__v2du)__B);
687 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
701 return (__m256i)((__v4du)
__a | (__v4du)
__b);
707 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
721 return (__m128i)((__v2du)
__a | (__v2du)
__b);
727 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
741 return (__m256i)((__v4du)
__a ^ (__v4du)
__b);
747 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
761 return (__m128i)((__v2du)
__a ^ (__v2du)
__b);
768 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
779#define _mm_cmp_epi32_mask(a, b, p) \
780 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
781 (__v4si)(__m128i)(b), (int)(p), \
784#define _mm_mask_cmp_epi32_mask(m, a, b, p) \
785 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
786 (__v4si)(__m128i)(b), (int)(p), \
789#define _mm_cmp_epu32_mask(a, b, p) \
790 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
791 (__v4si)(__m128i)(b), (int)(p), \
794#define _mm_mask_cmp_epu32_mask(m, a, b, p) \
795 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
796 (__v4si)(__m128i)(b), (int)(p), \
799#define _mm256_cmp_epi32_mask(a, b, p) \
800 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
801 (__v8si)(__m256i)(b), (int)(p), \
804#define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
805 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
806 (__v8si)(__m256i)(b), (int)(p), \
809#define _mm256_cmp_epu32_mask(a, b, p) \
810 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
811 (__v8si)(__m256i)(b), (int)(p), \
814#define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
815 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
816 (__v8si)(__m256i)(b), (int)(p), \
819#define _mm_cmp_epi64_mask(a, b, p) \
820 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
821 (__v2di)(__m128i)(b), (int)(p), \
824#define _mm_mask_cmp_epi64_mask(m, a, b, p) \
825 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
826 (__v2di)(__m128i)(b), (int)(p), \
829#define _mm_cmp_epu64_mask(a, b, p) \
830 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
831 (__v2di)(__m128i)(b), (int)(p), \
834#define _mm_mask_cmp_epu64_mask(m, a, b, p) \
835 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
836 (__v2di)(__m128i)(b), (int)(p), \
839#define _mm256_cmp_epi64_mask(a, b, p) \
840 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
841 (__v4di)(__m256i)(b), (int)(p), \
844#define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
845 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
846 (__v4di)(__m256i)(b), (int)(p), \
849#define _mm256_cmp_epu64_mask(a, b, p) \
850 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
851 (__v4di)(__m256i)(b), (int)(p), \
854#define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
855 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
856 (__v4di)(__m256i)(b), (int)(p), \
859#define _mm256_cmp_ps_mask(a, b, p) \
860 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
861 (__v8sf)(__m256)(b), (int)(p), \
864#define _mm256_mask_cmp_ps_mask(m, a, b, p) \
865 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
866 (__v8sf)(__m256)(b), (int)(p), \
869#define _mm256_cmp_pd_mask(a, b, p) \
870 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
871 (__v4df)(__m256d)(b), (int)(p), \
874#define _mm256_mask_cmp_pd_mask(m, a, b, p) \
875 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
876 (__v4df)(__m256d)(b), (int)(p), \
879#define _mm_cmp_ps_mask(a, b, p) \
880 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
881 (__v4sf)(__m128)(b), (int)(p), \
884#define _mm_mask_cmp_ps_mask(m, a, b, p) \
885 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
886 (__v4sf)(__m128)(b), (int)(p), \
889#define _mm_cmp_pd_mask(a, b, p) \
890 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
891 (__v2df)(__m128d)(b), (int)(p), \
894#define _mm_mask_cmp_pd_mask(m, a, b, p) \
895 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
896 (__v2df)(__m128d)(b), (int)(p), \
902 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
903 __builtin_ia32_vfmaddpd ((__v2df) __A,
912 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
913 __builtin_ia32_vfmaddpd ((__v2df) __A,
922 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
923 __builtin_ia32_vfmaddpd ((__v2df) __A,
932 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
933 __builtin_ia32_vfmaddpd ((__v2df) __A,
942 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
943 __builtin_ia32_vfmaddpd ((__v2df) __A,
952 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
953 __builtin_ia32_vfmaddpd (-(__v2df) __A,
962 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
963 __builtin_ia32_vfmaddpd (-(__v2df) __A,
972 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
973 __builtin_ia32_vfmaddpd (-(__v2df) __A,
982 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
983 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
992 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
993 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1002 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1003 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1012 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1013 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1022 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1023 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1032 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1033 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1042 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1043 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1052 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1053 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1062 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1063 __builtin_ia32_vfmaddps ((__v4sf) __A,
1072 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1073 __builtin_ia32_vfmaddps ((__v4sf) __A,
1082 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1083 __builtin_ia32_vfmaddps ((__v4sf) __A,
1092 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1093 __builtin_ia32_vfmaddps ((__v4sf) __A,
1102 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1103 __builtin_ia32_vfmaddps ((__v4sf) __A,
1112 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1113 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1122 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1123 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1132 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1133 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1142 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1143 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1152 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1153 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1162 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1163 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1172 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1173 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1182 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1183 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1192 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1193 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1202 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1203 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1212 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1213 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1222 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1223 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1232 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1233 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1242 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1243 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1252 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1253 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1262 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1263 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1272 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1273 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1282 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1283 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1292 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1293 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1302 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1303 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1312 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1313 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1322 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1323 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1332 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1333 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1342 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1343 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1352 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1353 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1362 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1363 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1373 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1374 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1383 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1384 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1393 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1394 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1403 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1404 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1413 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1414 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1423 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1424 __builtin_ia32_vfmaddpd ((__v2df) __A,
1433 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1434 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1443 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1444 __builtin_ia32_vfmaddps ((__v4sf) __A,
1453 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1454 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1463 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1464 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1473 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1474 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1483 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1484 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1493 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1494 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1503 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1504 __builtin_ia32_vfmaddpd ((__v2df) __A,
1513 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1514 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1523 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1524 __builtin_ia32_vfmaddps ((__v4sf) __A,
1533 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1534 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1543 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1544 __builtin_ia32_vfmaddpd ((__v2df) __A,
1553 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1554 __builtin_ia32_vfmaddpd ((__v2df) __A,
1563 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1564 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1573 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1574 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1583 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1584 __builtin_ia32_vfmaddps ((__v4sf) __A,
1593 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1594 __builtin_ia32_vfmaddps ((__v4sf) __A,
1603 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1604 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1613 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1614 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1622 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
1629 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
1636 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
1643 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
1650 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
1657 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
1664 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
1671 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
1678 return (__m128i) __builtin_ia32_selectd_128 ((
__mmask8) __U,
1685 return (__m256i) __builtin_ia32_selectd_256 ((
__mmask8) __U,
1692 return (__m128d) __builtin_ia32_selectpd_128 ((
__mmask8) __U,
1699 return (__m256d) __builtin_ia32_selectpd_256 ((
__mmask8) __U,
1706 return (__m128) __builtin_ia32_selectps_128 ((
__mmask8) __U,
1713 return (__m256) __builtin_ia32_selectps_256 ((
__mmask8) __U,
1720 return (__m128i) __builtin_ia32_selectq_128 ((
__mmask8) __U,
1727 return (__m256i) __builtin_ia32_selectq_256 ((
__mmask8) __U,
1734 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1741 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1749 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1756 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1764 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1771 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1779 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1786 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1794 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1801 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1809 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1816 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1824 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1831 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1839 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1846 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1854 __builtin_ia32_compressstoredf128_mask ((__v2df *)
__P,
1861 __builtin_ia32_compressstoredf256_mask ((__v4df *)
__P,
1868 __builtin_ia32_compressstoredi128_mask ((__v2di *)
__P,
1875 __builtin_ia32_compressstoredi256_mask ((__v4di *)
__P,
1882 __builtin_ia32_compressstoresf128_mask ((__v4sf *)
__P,
1889 __builtin_ia32_compressstoresf256_mask ((__v8sf *)
__P,
1896 __builtin_ia32_compressstoresi128_mask ((__v4si *)
__P,
1903 __builtin_ia32_compressstoresi256_mask ((__v8si *)
__P,
1910 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
1917 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
1924 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
1931 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
1938 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
1945 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
1952 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
1959 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
1966 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1973 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1981 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
1988 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
1995 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2002 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2010 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2017 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2024 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2032 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2039 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2047 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2055 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2062 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2070 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2077 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2084 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
2091 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
2098 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2105 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2112 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2119 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2126 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2134 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2141 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2149 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2157 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2164 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2172 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2179 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2187 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2194 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2201 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2209 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2216 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2224 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2232 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2239 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2247 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2254 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2261 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
2268 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
2275 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2283 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2290 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2298 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2306 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2313 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2321 return (__m128d) __builtin_convertvector(
2322 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2327 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
2334 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
2341 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2346 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
2353 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
2360 return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
2365 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2372 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2379 return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
2384 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2391 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2398 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2405 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2412 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2419 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2426 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2433 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2440 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2447 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2454 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2461 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2469 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2476 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2484 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2491 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2499 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2506 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2514 return (__m128d) __builtin_ia32_expandloaddf128_mask ((
const __v2df *)
__P,
2522 return (__m128d) __builtin_ia32_expandloaddf128_mask ((
const __v2df *)
__P,
2531 return (__m256d) __builtin_ia32_expandloaddf256_mask ((
const __v4df *)
__P,
2539 return (__m256d) __builtin_ia32_expandloaddf256_mask ((
const __v4df *)
__P,
2548 return (__m128i) __builtin_ia32_expandloaddi128_mask ((
const __v2di *)
__P,
2556 return (__m128i) __builtin_ia32_expandloaddi128_mask ((
const __v2di *)
__P,
2566 return (__m256i) __builtin_ia32_expandloaddi256_mask ((
const __v4di *)
__P,
2574 return (__m256i) __builtin_ia32_expandloaddi256_mask ((
const __v4di *)
__P,
2583 return (__m128) __builtin_ia32_expandloadsf128_mask ((
const __v4sf *)
__P,
2590 return (__m128) __builtin_ia32_expandloadsf128_mask ((
const __v4sf *)
__P,
2599 return (__m256) __builtin_ia32_expandloadsf256_mask ((
const __v8sf *)
__P,
2606 return (__m256) __builtin_ia32_expandloadsf256_mask ((
const __v8sf *)
__P,
2615 return (__m128i) __builtin_ia32_expandloadsi128_mask ((
const __v4si *)
__P,
2623 return (__m128i) __builtin_ia32_expandloadsi128_mask ((
const __v4si *)
__P,
2632 return (__m256i) __builtin_ia32_expandloadsi256_mask ((
const __v8si *)
__P,
2640 return (__m256i) __builtin_ia32_expandloadsi256_mask ((
const __v8si *)
__P,
2649 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2656 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2664 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2671 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2679 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2686 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2694 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2701 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2709 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2717 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2724 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2732 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2740 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2747 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2755 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2763 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2770 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2778 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2786 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2793 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2801 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2808 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2815 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2822 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2829 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2836 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2843 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2850 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2857 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2864 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2871 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2878 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2885 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2892 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2899 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2906 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2913 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2920 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2927 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2934 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2941 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2948 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2955 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2962 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2969 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2976 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2983 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
2990 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
2997 return (__m128i)__builtin_elementwise_abs((__v2di)__A);
3002 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
3009 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
3016 return (__m256i)__builtin_elementwise_abs((__v4di)__A);
3021 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
3028 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
3035 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3042 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3049 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3056 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3063 return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B);
3068 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3075 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3082 return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B);
3087 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3094 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3101 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3108 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3115 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3122 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3129 return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B);
3134 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3141 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3148 return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B);
3153 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3160 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3167 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3174 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3181 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3188 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3195 return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B);
3200 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3207 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3214 return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B);
3219 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3226 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3233 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3240 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3247 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3254 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3261 return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B);
3266 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3273 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3280 return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B);
3285 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3292 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3297#define _mm_roundscale_pd(A, imm) \
3298 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3300 (__v2df)_mm_setzero_pd(), \
3304#define _mm_mask_roundscale_pd(W, U, A, imm) \
3305 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3307 (__v2df)(__m128d)(W), \
3311#define _mm_maskz_roundscale_pd(U, A, imm) \
3312 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3314 (__v2df)_mm_setzero_pd(), \
3318#define _mm256_roundscale_pd(A, imm) \
3319 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3321 (__v4df)_mm256_setzero_pd(), \
3325#define _mm256_mask_roundscale_pd(W, U, A, imm) \
3326 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3328 (__v4df)(__m256d)(W), \
3332#define _mm256_maskz_roundscale_pd(U, A, imm) \
3333 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3335 (__v4df)_mm256_setzero_pd(), \
3338#define _mm_roundscale_ps(A, imm) \
3339 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3340 (__v4sf)_mm_setzero_ps(), \
3344#define _mm_mask_roundscale_ps(W, U, A, imm) \
3345 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3346 (__v4sf)(__m128)(W), \
3350#define _mm_maskz_roundscale_ps(U, A, imm) \
3351 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3352 (__v4sf)_mm_setzero_ps(), \
3355#define _mm256_roundscale_ps(A, imm) \
3356 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3357 (__v8sf)_mm256_setzero_ps(), \
3360#define _mm256_mask_roundscale_ps(W, U, A, imm) \
3361 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3362 (__v8sf)(__m256)(W), \
3366#define _mm256_maskz_roundscale_ps(U, A, imm) \
3367 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3368 (__v8sf)_mm256_setzero_ps(), \
3373 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3383 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3391 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3400 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3410 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3418 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3427 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3436 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3444 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3453 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3463 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3471 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3478#define _mm_i64scatter_pd(addr, index, v1, scale) \
3479 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
3480 (__v2di)(__m128i)(index), \
3481 (__v2df)(__m128d)(v1), (int)(scale))
3483#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3484 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
3485 (__v2di)(__m128i)(index), \
3486 (__v2df)(__m128d)(v1), (int)(scale))
3488#define _mm_i64scatter_epi64(addr, index, v1, scale) \
3489 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
3490 (__v2di)(__m128i)(index), \
3491 (__v2di)(__m128i)(v1), (int)(scale))
3493#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3494 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
3495 (__v2di)(__m128i)(index), \
3496 (__v2di)(__m128i)(v1), (int)(scale))
3498#define _mm256_i64scatter_pd(addr, index, v1, scale) \
3499 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
3500 (__v4di)(__m256i)(index), \
3501 (__v4df)(__m256d)(v1), (int)(scale))
3503#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3504 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
3505 (__v4di)(__m256i)(index), \
3506 (__v4df)(__m256d)(v1), (int)(scale))
3508#define _mm256_i64scatter_epi64(addr, index, v1, scale) \
3509 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
3510 (__v4di)(__m256i)(index), \
3511 (__v4di)(__m256i)(v1), (int)(scale))
3513#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3514 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
3515 (__v4di)(__m256i)(index), \
3516 (__v4di)(__m256i)(v1), (int)(scale))
3518#define _mm_i64scatter_ps(addr, index, v1, scale) \
3519 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
3520 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3523#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3524 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
3525 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3528#define _mm_i64scatter_epi32(addr, index, v1, scale) \
3529 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
3530 (__v2di)(__m128i)(index), \
3531 (__v4si)(__m128i)(v1), (int)(scale))
3533#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3534 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
3535 (__v2di)(__m128i)(index), \
3536 (__v4si)(__m128i)(v1), (int)(scale))
3538#define _mm256_i64scatter_ps(addr, index, v1, scale) \
3539 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
3540 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3543#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3544 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
3545 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3548#define _mm256_i64scatter_epi32(addr, index, v1, scale) \
3549 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
3550 (__v4di)(__m256i)(index), \
3551 (__v4si)(__m128i)(v1), (int)(scale))
3553#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3554 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
3555 (__v4di)(__m256i)(index), \
3556 (__v4si)(__m128i)(v1), (int)(scale))
3558#define _mm_i32scatter_pd(addr, index, v1, scale) \
3559 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
3560 (__v4si)(__m128i)(index), \
3561 (__v2df)(__m128d)(v1), (int)(scale))
3563#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3564 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
3565 (__v4si)(__m128i)(index), \
3566 (__v2df)(__m128d)(v1), (int)(scale))
3568#define _mm_i32scatter_epi64(addr, index, v1, scale) \
3569 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
3570 (__v4si)(__m128i)(index), \
3571 (__v2di)(__m128i)(v1), (int)(scale))
3573#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3574 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
3575 (__v4si)(__m128i)(index), \
3576 (__v2di)(__m128i)(v1), (int)(scale))
3578#define _mm256_i32scatter_pd(addr, index, v1, scale) \
3579 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
3580 (__v4si)(__m128i)(index), \
3581 (__v4df)(__m256d)(v1), (int)(scale))
3583#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3584 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
3585 (__v4si)(__m128i)(index), \
3586 (__v4df)(__m256d)(v1), (int)(scale))
3588#define _mm256_i32scatter_epi64(addr, index, v1, scale) \
3589 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
3590 (__v4si)(__m128i)(index), \
3591 (__v4di)(__m256i)(v1), (int)(scale))
3593#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3594 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
3595 (__v4si)(__m128i)(index), \
3596 (__v4di)(__m256i)(v1), (int)(scale))
3598#define _mm_i32scatter_ps(addr, index, v1, scale) \
3599 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
3600 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3603#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3604 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
3605 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3608#define _mm_i32scatter_epi32(addr, index, v1, scale) \
3609 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
3610 (__v4si)(__m128i)(index), \
3611 (__v4si)(__m128i)(v1), (int)(scale))
3613#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3614 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
3615 (__v4si)(__m128i)(index), \
3616 (__v4si)(__m128i)(v1), (int)(scale))
3618#define _mm256_i32scatter_ps(addr, index, v1, scale) \
3619 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
3620 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3623#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3624 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
3625 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3628#define _mm256_i32scatter_epi32(addr, index, v1, scale) \
3629 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
3630 (__v8si)(__m256i)(index), \
3631 (__v8si)(__m256i)(v1), (int)(scale))
3633#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3634 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
3635 (__v8si)(__m256i)(index), \
3636 (__v8si)(__m256i)(v1), (int)(scale))
3640 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3647 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3654 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3661 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3668 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3675 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3682 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3689 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3696 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3703 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3710 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3717 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3724 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3731 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3738 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3745 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3752 return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
3759 return (__m128i)__builtin_ia32_selectd_128(__U,
3767 return (__m128i)__builtin_ia32_selectd_128(__U,
3775 return (__m128i)__builtin_ia32_selectd_128(__U,
3782 return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
3789 return (__m256i)__builtin_ia32_selectd_256(__U,
3797 return (__m256i)__builtin_ia32_selectd_256(__U,
3805 return (__m256i)__builtin_ia32_selectd_256(__U,
3812 return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
3818 return (__m128d)__builtin_ia32_selectpd_128(__U,
3825 return (__m128d)__builtin_ia32_selectpd_128(__U,
3827 (__v2df)(__m128d)__I);
3832 return (__m128d)__builtin_ia32_selectpd_128(__U,
3839 return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
3846 return (__m256d)__builtin_ia32_selectpd_256(__U,
3854 return (__m256d)__builtin_ia32_selectpd_256(__U,
3856 (__v4df)(__m256d)__I);
3862 return (__m256d)__builtin_ia32_selectpd_256(__U,
3869 return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
3875 return (__m128)__builtin_ia32_selectps_128(__U,
3882 return (__m128)__builtin_ia32_selectps_128(__U,
3884 (__v4sf)(__m128)__I);
3889 return (__m128)__builtin_ia32_selectps_128(__U,
3896 return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
3902 return (__m256)__builtin_ia32_selectps_256(__U,
3910 return (__m256)__builtin_ia32_selectps_256(__U,
3912 (__v8sf)(__m256)__I);
3918 return (__m256)__builtin_ia32_selectps_256(__U,
3925 return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
3932 return (__m128i)__builtin_ia32_selectq_128(__U,
3940 return (__m128i)__builtin_ia32_selectq_128(__U,
3948 return (__m128i)__builtin_ia32_selectq_128(__U,
3956 return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
3963 return (__m256i)__builtin_ia32_selectq_256(__U,
3971 return (__m256i)__builtin_ia32_selectq_256(__U,
3979 return (__m256i)__builtin_ia32_selectq_256(__U,
3987 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
3995 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4003 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4011 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4019 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4027 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4035 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4043 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4051 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4059 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4067 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4075 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4083 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4091 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4099 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4107 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4115 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4123 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4131 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4139 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4148 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4156 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4164 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4172 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4180 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4188 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4196 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4204 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4212 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4220 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4228 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4236 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4244 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4252 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4260 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4268 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4276 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4284 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4292 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4300 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4306#define _mm_rol_epi32(a, b) \
4307 ((__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b)))
4309#define _mm_mask_rol_epi32(w, u, a, b) \
4310 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4311 (__v4si)_mm_rol_epi32((a), (b)), \
4312 (__v4si)(__m128i)(w)))
4314#define _mm_maskz_rol_epi32(u, a, b) \
4315 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4316 (__v4si)_mm_rol_epi32((a), (b)), \
4317 (__v4si)_mm_setzero_si128()))
4319#define _mm256_rol_epi32(a, b) \
4320 ((__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b)))
4322#define _mm256_mask_rol_epi32(w, u, a, b) \
4323 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4324 (__v8si)_mm256_rol_epi32((a), (b)), \
4325 (__v8si)(__m256i)(w)))
4327#define _mm256_maskz_rol_epi32(u, a, b) \
4328 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4329 (__v8si)_mm256_rol_epi32((a), (b)), \
4330 (__v8si)_mm256_setzero_si256()))
4332#define _mm_rol_epi64(a, b) \
4333 ((__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b)))
4335#define _mm_mask_rol_epi64(w, u, a, b) \
4336 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4337 (__v2di)_mm_rol_epi64((a), (b)), \
4338 (__v2di)(__m128i)(w)))
4340#define _mm_maskz_rol_epi64(u, a, b) \
4341 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4342 (__v2di)_mm_rol_epi64((a), (b)), \
4343 (__v2di)_mm_setzero_si128()))
4345#define _mm256_rol_epi64(a, b) \
4346 ((__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b)))
4348#define _mm256_mask_rol_epi64(w, u, a, b) \
4349 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4350 (__v4di)_mm256_rol_epi64((a), (b)), \
4351 (__v4di)(__m256i)(w)))
4353#define _mm256_maskz_rol_epi64(u, a, b) \
4354 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4355 (__v4di)_mm256_rol_epi64((a), (b)), \
4356 (__v4di)_mm256_setzero_si256()))
4361 return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
4367 return (__m128i)__builtin_ia32_selectd_128(__U,
4375 return (__m128i)__builtin_ia32_selectd_128(__U,
4383 return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
4389 return (__m256i)__builtin_ia32_selectd_256(__U,
4397 return (__m256i)__builtin_ia32_selectd_256(__U,
4405 return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
4411 return (__m128i)__builtin_ia32_selectq_128(__U,
4419 return (__m128i)__builtin_ia32_selectq_128(__U,
4427 return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
4433 return (__m256i)__builtin_ia32_selectq_256(__U,
4441 return (__m256i)__builtin_ia32_selectq_256(__U,
4446#define _mm_ror_epi32(a, b) \
4447 ((__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b)))
4449#define _mm_mask_ror_epi32(w, u, a, b) \
4450 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4451 (__v4si)_mm_ror_epi32((a), (b)), \
4452 (__v4si)(__m128i)(w)))
4454#define _mm_maskz_ror_epi32(u, a, b) \
4455 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4456 (__v4si)_mm_ror_epi32((a), (b)), \
4457 (__v4si)_mm_setzero_si128()))
4459#define _mm256_ror_epi32(a, b) \
4460 ((__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b)))
4462#define _mm256_mask_ror_epi32(w, u, a, b) \
4463 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4464 (__v8si)_mm256_ror_epi32((a), (b)), \
4465 (__v8si)(__m256i)(w)))
4467#define _mm256_maskz_ror_epi32(u, a, b) \
4468 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4469 (__v8si)_mm256_ror_epi32((a), (b)), \
4470 (__v8si)_mm256_setzero_si256()))
4472#define _mm_ror_epi64(a, b) \
4473 ((__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b)))
4475#define _mm_mask_ror_epi64(w, u, a, b) \
4476 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4477 (__v2di)_mm_ror_epi64((a), (b)), \
4478 (__v2di)(__m128i)(w)))
4480#define _mm_maskz_ror_epi64(u, a, b) \
4481 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4482 (__v2di)_mm_ror_epi64((a), (b)), \
4483 (__v2di)_mm_setzero_si128()))
4485#define _mm256_ror_epi64(a, b) \
4486 ((__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b)))
4488#define _mm256_mask_ror_epi64(w, u, a, b) \
4489 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4490 (__v4di)_mm256_ror_epi64((a), (b)), \
4491 (__v4di)(__m256i)(w)))
4493#define _mm256_maskz_ror_epi64(u, a, b) \
4494 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4495 (__v4di)_mm256_ror_epi64((a), (b)), \
4496 (__v4di)_mm256_setzero_si256()))
4501 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4509 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4517 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4525 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4533 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4541 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4549 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4557 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4565 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4573 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4581 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4589 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4597 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4605 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4613 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4621 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4629 return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
4635 return (__m128i)__builtin_ia32_selectd_128(__U,
4643 return (__m128i)__builtin_ia32_selectd_128(__U,
4651 return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
4657 return (__m256i)__builtin_ia32_selectd_256(__U,
4665 return (__m256i)__builtin_ia32_selectd_256(__U,
4673 return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
4679 return (__m128i)__builtin_ia32_selectq_128(__U,
4687 return (__m128i)__builtin_ia32_selectq_128(__U,
4695 return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
4701 return (__m256i)__builtin_ia32_selectq_256(__U,
4709 return (__m256i)__builtin_ia32_selectq_256(__U,
4717 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4725 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4733 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4741 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4749 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4757 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4765 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4773 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4781 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4789 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4797 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4805 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4813 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4821 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4829 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4837 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4845 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4853 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4861 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4869 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4877 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4885 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4893 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4901 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4909 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4917 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4925 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4933 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4941 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4949 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4957 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4965 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4973 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4981 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4989 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4997 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
5005 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)
__Y);
5011 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5019 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5027 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di)
__Y);
5033 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5041 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5049 return (__m128i) __builtin_ia32_selectd_128 ((
__mmask8) __U,
5057 return (__m128i) __builtin_ia32_selectd_128 ((
__mmask8) __U,
5066 return (__m256i) __builtin_ia32_selectd_256 ((
__mmask8) __U,
5074 return (__m256i) __builtin_ia32_selectd_256 ((
__mmask8) __U,
5082 return *(
const __m128i *)
__P;
5088 return (__m128i) __builtin_ia32_movdqa32load128_mask ((
const __v4si *)
__P,
5097 return (__m128i) __builtin_ia32_movdqa32load128_mask ((
const __v4si *)
__P,
5107 return *(
const __m256i *)
__P;
5113 return (__m256i) __builtin_ia32_movdqa32load256_mask ((
const __v8si *)
__P,
5122 return (__m256i) __builtin_ia32_movdqa32load256_mask ((
const __v8si *)
__P,
5132 *(__m128i *)
__P = __A;
5138 __builtin_ia32_movdqa32store128_mask ((__v4si *)
__P,
5146 *(__m256i *)
__P = __A;
5152 __builtin_ia32_movdqa32store256_mask ((__v8si *)
__P,
5160 return (__m128i) __builtin_ia32_selectq_128 ((
__mmask8) __U,
5168 return (__m128i) __builtin_ia32_selectq_128 ((
__mmask8) __U,
5176 return (__m256i) __builtin_ia32_selectq_256 ((
__mmask8) __U,
5184 return (__m256i) __builtin_ia32_selectq_256 ((
__mmask8) __U,
5192 return *(
const __m128i *)
__P;
5198 return (__m128i) __builtin_ia32_movdqa64load128_mask ((
const __v2di *)
__P,
5207 return (__m128i) __builtin_ia32_movdqa64load128_mask ((
const __v2di *)
__P,
5217 return *(
const __m256i *)
__P;
5223 return (__m256i) __builtin_ia32_movdqa64load256_mask ((
const __v4di *)
__P,
5232 return (__m256i) __builtin_ia32_movdqa64load256_mask ((
const __v4di *)
__P,
5242 *(__m128i *)
__P = __A;
5248 __builtin_ia32_movdqa64store128_mask ((__v2di *)
__P,
5256 *(__m256i *)
__P = __A;
5262 __builtin_ia32_movdqa64store256_mask ((__v4di *)
__P,
5270 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5278 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5286 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5294 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5302 return (__m128i)__builtin_ia32_selectd_128(__M,
5310 return (__m128i)__builtin_ia32_selectd_128(__M,
5318 return (__m256i)__builtin_ia32_selectd_256(__M,
5326 return (__m256i)__builtin_ia32_selectd_256(__M,
5335 return (__m128i) __builtin_ia32_selectq_128(__M,
5343 return (__m128i) __builtin_ia32_selectq_128(__M,
5351 return (__m256i) __builtin_ia32_selectq_256(__M,
5359 return (__m256i) __builtin_ia32_selectq_256(__M,
5364#define _mm_fixupimm_pd(A, B, C, imm) \
5365 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5366 (__v2df)(__m128d)(B), \
5367 (__v2di)(__m128i)(C), (int)(imm), \
5370#define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
5371 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5372 (__v2df)(__m128d)(B), \
5373 (__v2di)(__m128i)(C), (int)(imm), \
5376#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
5377 ((__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5378 (__v2df)(__m128d)(B), \
5379 (__v2di)(__m128i)(C), \
5380 (int)(imm), (__mmask8)(U)))
5382#define _mm256_fixupimm_pd(A, B, C, imm) \
5383 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5384 (__v4df)(__m256d)(B), \
5385 (__v4di)(__m256i)(C), (int)(imm), \
5388#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
5389 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5390 (__v4df)(__m256d)(B), \
5391 (__v4di)(__m256i)(C), (int)(imm), \
5394#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
5395 ((__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5396 (__v4df)(__m256d)(B), \
5397 (__v4di)(__m256i)(C), \
5398 (int)(imm), (__mmask8)(U)))
5400#define _mm_fixupimm_ps(A, B, C, imm) \
5401 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5402 (__v4sf)(__m128)(B), \
5403 (__v4si)(__m128i)(C), (int)(imm), \
5406#define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
5407 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5408 (__v4sf)(__m128)(B), \
5409 (__v4si)(__m128i)(C), (int)(imm), \
5412#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
5413 ((__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5414 (__v4sf)(__m128)(B), \
5415 (__v4si)(__m128i)(C), (int)(imm), \
5418#define _mm256_fixupimm_ps(A, B, C, imm) \
5419 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5420 (__v8sf)(__m256)(B), \
5421 (__v8si)(__m256i)(C), (int)(imm), \
5424#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
5425 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5426 (__v8sf)(__m256)(B), \
5427 (__v8si)(__m256i)(C), (int)(imm), \
5430#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
5431 ((__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5432 (__v8sf)(__m256)(B), \
5433 (__v8si)(__m256i)(C), (int)(imm), \
5439 return (__m128d) __builtin_ia32_loadapd128_mask ((
const __v2df *)
__P,
5447 return (__m128d) __builtin_ia32_loadapd128_mask ((
const __v2df *)
__P,
5456 return (__m256d) __builtin_ia32_loadapd256_mask ((
const __v4df *)
__P,
5464 return (__m256d) __builtin_ia32_loadapd256_mask ((
const __v4df *)
__P,
5473 return (__m128) __builtin_ia32_loadaps128_mask ((
const __v4sf *)
__P,
5481 return (__m128) __builtin_ia32_loadaps128_mask ((
const __v4sf *)
__P,
5490 return (__m256) __builtin_ia32_loadaps256_mask ((
const __v8sf *)
__P,
5498 return (__m256) __builtin_ia32_loadaps256_mask ((
const __v8sf *)
__P,
5507 struct __loadu_epi64 {
5510 return ((
const struct __loadu_epi64*)
__P)->__v;
5516 return (__m128i) __builtin_ia32_loaddqudi128_mask ((
const __v2di *)
__P,
5524 return (__m128i) __builtin_ia32_loaddqudi128_mask ((
const __v2di *)
__P,
5533 struct __loadu_epi64 {
5536 return ((
const struct __loadu_epi64*)
__P)->__v;
5542 return (__m256i) __builtin_ia32_loaddqudi256_mask ((
const __v4di *)
__P,
5550 return (__m256i) __builtin_ia32_loaddqudi256_mask ((
const __v4di *)
__P,
5559 struct __loadu_epi32 {
5562 return ((
const struct __loadu_epi32*)
__P)->__v;
5568 return (__m128i) __builtin_ia32_loaddqusi128_mask ((
const __v4si *)
__P,
5576 return (__m128i) __builtin_ia32_loaddqusi128_mask ((
const __v4si *)
__P,
5585 struct __loadu_epi32 {
5588 return ((
const struct __loadu_epi32*)
__P)->__v;
5594 return (__m256i) __builtin_ia32_loaddqusi256_mask ((
const __v8si *)
__P,
5602 return (__m256i) __builtin_ia32_loaddqusi256_mask ((
const __v8si *)
__P,
5611 return (__m128d) __builtin_ia32_loadupd128_mask ((
const __v2df *)
__P,
5619 return (__m128d) __builtin_ia32_loadupd128_mask ((
const __v2df *)
__P,
5628 return (__m256d) __builtin_ia32_loadupd256_mask ((
const __v4df *)
__P,
5636 return (__m256d) __builtin_ia32_loadupd256_mask ((
const __v4df *)
__P,
5645 return (__m128) __builtin_ia32_loadups128_mask ((
const __v4sf *)
__P,
5653 return (__m128) __builtin_ia32_loadups128_mask ((
const __v4sf *)
__P,
5662 return (__m256) __builtin_ia32_loadups256_mask ((
const __v8sf *)
__P,
5670 return (__m256) __builtin_ia32_loadups256_mask ((
const __v8sf *)
__P,
5679 __builtin_ia32_storeapd128_mask ((__v2df *)
__P,
5687 __builtin_ia32_storeapd256_mask ((__v4df *)
__P,
5695 __builtin_ia32_storeaps128_mask ((__v4sf *)
__P,
5703 __builtin_ia32_storeaps256_mask ((__v8sf *)
__P,
5711 struct __storeu_epi64 {
5714 ((
struct __storeu_epi64*)
__P)->__v = __A;
5720 __builtin_ia32_storedqudi128_mask ((__v2di *)
__P,
5728 struct __storeu_epi64 {
5731 ((
struct __storeu_epi64*)
__P)->__v = __A;
5737 __builtin_ia32_storedqudi256_mask ((__v4di *)
__P,
5745 struct __storeu_epi32 {
5748 ((
struct __storeu_epi32*)
__P)->__v = __A;
5754 __builtin_ia32_storedqusi128_mask ((__v4si *)
__P,
5762 struct __storeu_epi32 {
5765 ((
struct __storeu_epi32*)
__P)->__v = __A;
5771 __builtin_ia32_storedqusi256_mask ((__v8si *)
__P,
5779 __builtin_ia32_storeupd128_mask ((__v2df *)
__P,
5787 __builtin_ia32_storeupd256_mask ((__v4df *)
__P,
5795 __builtin_ia32_storeups128_mask ((__v4sf *)
__P,
5803 __builtin_ia32_storeups256_mask ((__v8sf *)
__P,
5812 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5820 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5828 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5836 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5844 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
5852 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
5860 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
5868 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
5876 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5884 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5892 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5900 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5908 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
5916 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
5924 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
5932 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
5940 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5949 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5957 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5966 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5975 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5983 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5992 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6001 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6009 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6018 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6027 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6035 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6041#define _mm_mask_permute_pd(W, U, X, C) \
6042 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6043 (__v2df)_mm_permute_pd((X), (C)), \
6044 (__v2df)(__m128d)(W)))
6046#define _mm_maskz_permute_pd(U, X, C) \
6047 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6048 (__v2df)_mm_permute_pd((X), (C)), \
6049 (__v2df)_mm_setzero_pd()))
6051#define _mm256_mask_permute_pd(W, U, X, C) \
6052 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6053 (__v4df)_mm256_permute_pd((X), (C)), \
6054 (__v4df)(__m256d)(W)))
6056#define _mm256_maskz_permute_pd(U, X, C) \
6057 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6058 (__v4df)_mm256_permute_pd((X), (C)), \
6059 (__v4df)_mm256_setzero_pd()))
6061#define _mm_mask_permute_ps(W, U, X, C) \
6062 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6063 (__v4sf)_mm_permute_ps((X), (C)), \
6064 (__v4sf)(__m128)(W)))
6066#define _mm_maskz_permute_ps(U, X, C) \
6067 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6068 (__v4sf)_mm_permute_ps((X), (C)), \
6069 (__v4sf)_mm_setzero_ps()))
6071#define _mm256_mask_permute_ps(W, U, X, C) \
6072 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6073 (__v8sf)_mm256_permute_ps((X), (C)), \
6074 (__v8sf)(__m256)(W)))
6076#define _mm256_maskz_permute_ps(U, X, C) \
6077 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6078 (__v8sf)_mm256_permute_ps((X), (C)), \
6079 (__v8sf)_mm256_setzero_ps()))
6084 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
6092 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
6100 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
6108 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
6116 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
6124 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
6132 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
6140 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
6256 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6264 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6272 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6280 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6288 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6296 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6304 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6312 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6320 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6328 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6336 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6344 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6352 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6360 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6368 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6376 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6384 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6392 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6400 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6408 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6416 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6424 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6432 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6440 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6448 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
6454 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U, \
6462 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U, \
6470 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
6476 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U, \
6484 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U, \
6492 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, (
int)__imm);
6498 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U, \
6506 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U, \
6514 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, (
int)__imm);
6521 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U, \
6529 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U, \
6534#define _mm_ternarylogic_epi32(A, B, C, imm) \
6535 ((__m128i)__builtin_ia32_pternlogd128_mask( \
6536 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \
6537 (unsigned char)(imm), (__mmask8)-1))
6539#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \
6540 ((__m128i)__builtin_ia32_pternlogd128_mask( \
6541 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \
6542 (unsigned char)(imm), (__mmask8)(U)))
6544#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6545 ((__m128i)__builtin_ia32_pternlogd128_maskz( \
6546 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \
6547 (unsigned char)(imm), (__mmask8)(U)))
6549#define _mm256_ternarylogic_epi32(A, B, C, imm) \
6550 ((__m256i)__builtin_ia32_pternlogd256_mask( \
6551 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \
6552 (unsigned char)(imm), (__mmask8)-1))
6554#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \
6555 ((__m256i)__builtin_ia32_pternlogd256_mask( \
6556 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \
6557 (unsigned char)(imm), (__mmask8)(U)))
6559#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6560 ((__m256i)__builtin_ia32_pternlogd256_maskz( \
6561 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \
6562 (unsigned char)(imm), (__mmask8)(U)))
6564#define _mm_ternarylogic_epi64(A, B, C, imm) \
6565 ((__m128i)__builtin_ia32_pternlogq128_mask( \
6566 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \
6567 (unsigned char)(imm), (__mmask8)-1))
6569#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \
6570 ((__m128i)__builtin_ia32_pternlogq128_mask( \
6571 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \
6572 (unsigned char)(imm), (__mmask8)(U)))
6574#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6575 ((__m128i)__builtin_ia32_pternlogq128_maskz( \
6576 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \
6577 (unsigned char)(imm), (__mmask8)(U)))
6579#define _mm256_ternarylogic_epi64(A, B, C, imm) \
6580 ((__m256i)__builtin_ia32_pternlogq256_mask( \
6581 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \
6582 (unsigned char)(imm), (__mmask8)-1))
6584#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \
6585 ((__m256i)__builtin_ia32_pternlogq256_mask( \
6586 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \
6587 (unsigned char)(imm), (__mmask8)(U)))
6589#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6590 ((__m256i)__builtin_ia32_pternlogq256_maskz( \
6591 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \
6592 (unsigned char)(imm), (__mmask8)(U)))
6594#define _mm256_shuffle_f32x4(A, B, imm) \
6595 ((__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
6596 (__v8sf)(__m256)(B), (int)(imm)))
6598#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \
6599 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6600 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6601 (__v8sf)(__m256)(W)))
6603#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \
6604 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6605 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6606 (__v8sf)_mm256_setzero_ps()))
6608#define _mm256_shuffle_f64x2(A, B, imm) \
6609 ((__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
6610 (__v4df)(__m256d)(B), (int)(imm)))
6612#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \
6613 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6614 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6615 (__v4df)(__m256d)(W)))
6617#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \
6618 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6619 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6620 (__v4df)_mm256_setzero_pd()))
6622#define _mm256_shuffle_i32x4(A, B, imm) \
6623 ((__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
6624 (__v8si)(__m256i)(B), (int)(imm)))
6626#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \
6627 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6628 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6629 (__v8si)(__m256i)(W)))
6631#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \
6632 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6633 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6634 (__v8si)_mm256_setzero_si256()))
6636#define _mm256_shuffle_i64x2(A, B, imm) \
6637 ((__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
6638 (__v4di)(__m256i)(B), (int)(imm)))
6640#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \
6641 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6642 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6643 (__v4di)(__m256i)(W)))
6646#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \
6647 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6648 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6649 (__v4di)_mm256_setzero_si256()))
6651#define _mm_mask_shuffle_pd(W, U, A, B, M) \
6652 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6653 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6654 (__v2df)(__m128d)(W)))
6656#define _mm_maskz_shuffle_pd(U, A, B, M) \
6657 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6658 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6659 (__v2df)_mm_setzero_pd()))
6661#define _mm256_mask_shuffle_pd(W, U, A, B, M) \
6662 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6663 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6664 (__v4df)(__m256d)(W)))
6666#define _mm256_maskz_shuffle_pd(U, A, B, M) \
6667 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6668 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6669 (__v4df)_mm256_setzero_pd()))
6671#define _mm_mask_shuffle_ps(W, U, A, B, M) \
6672 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6673 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6674 (__v4sf)(__m128)(W)))
6676#define _mm_maskz_shuffle_ps(U, A, B, M) \
6677 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6678 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6679 (__v4sf)_mm_setzero_ps()))
6681#define _mm256_mask_shuffle_ps(W, U, A, B, M) \
6682 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6683 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6684 (__v8sf)(__m256)(W)))
6686#define _mm256_maskz_shuffle_ps(U, A, B, M) \
6687 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6688 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6689 (__v8sf)_mm256_setzero_ps()))
6694 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6703 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6711 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6720 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6729 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6737 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6746 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6755 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6763 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6772 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6781 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6789 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6798 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6799 0, 1, 2, 3, 0, 1, 2, 3);
6805 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__M,
6813 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__M,
6821 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6822 0, 1, 2, 3, 0, 1, 2, 3);
6828 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
6836 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
6844 return (__m256d)__builtin_ia32_selectpd_256(__M,
6852 return (__m256d)__builtin_ia32_selectpd_256(__M,
6860 return (__m128)__builtin_ia32_selectps_128(__M,
6868 return (__m128)__builtin_ia32_selectps_128(__M,
6876 return (__m256)__builtin_ia32_selectps_256(__M,
6884 return (__m256)__builtin_ia32_selectps_256(__M,
6892 return (__m128i)__builtin_ia32_selectd_128(__M,
6900 return (__m128i)__builtin_ia32_selectd_128(__M,
6908 return (__m256i)__builtin_ia32_selectd_256(__M,
6916 return (__m256i)__builtin_ia32_selectd_256(__M,
6924 return (__m128i)__builtin_ia32_selectq_128(__M,
6932 return (__m128i)__builtin_ia32_selectq_128(__M,
6940 return (__m256i)__builtin_ia32_selectq_256(__M,
6948 return (__m256i)__builtin_ia32_selectq_256(__M,
6956 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6964 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6965 (__v16qi) __O, __M);
6971 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6979 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *)
__P, (__v4si) __A, __M);
6985 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6993 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6994 (__v16qi) __O, __M);
7000 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7008 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *)
__P, (__v8si) __A, __M);
7014 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7022 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7030 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7038 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *)
__P, (__v4si) __A, __M);
7044 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7052 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7059 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7067 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *)
__P, (__v8si) __A, __M);
7073 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7081 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7082 (__v16qi) __O, __M);
7088 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7096 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *)
__P, (__v2di) __A, __M);
7102 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7110 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7111 (__v16qi) __O, __M);
7117 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7125 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *)
__P, (__v4di) __A, __M);
7131 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7139 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7146 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7154 __builtin_ia32_pmovsqd128mem_mask ((__v4si *)
__P, (__v2di) __A, __M);
7160 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7168 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7176 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7184 __builtin_ia32_pmovsqd256mem_mask ((__v4si *)
__P, (__v4di) __A, __M);
7190 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7198 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7205 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7213 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *)
__P, (__v2di) __A, __M);
7219 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7227 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7234 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7242 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *)
__P, (__v4di) __A, __M);
7248 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7256 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7264 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7272 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *)
__P, (__v4si) __A, __M);
7278 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7286 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7294 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7302 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*)
__P, (__v8si) __A, __M);
7308 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7316 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7323 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7331 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *)
__P, (__v4si) __A, __M);
7337 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7345 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7352 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7360 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *)
__P, (__v8si) __A, __M);
7366 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7374 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7382 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7390 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *)
__P, (__v2di) __A, __M);
7396 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7404 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7412 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7420 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *)
__P, (__v4di) __A, __M);
7426 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7434 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7441 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7449 __builtin_ia32_pmovusqd128mem_mask ((__v4si *)
__P, (__v2di) __A, __M);
7455 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7463 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7470 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7478 __builtin_ia32_pmovusqd256mem_mask ((__v4si *)
__P, (__v4di) __A, __M);
7484 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7492 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7499 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7507 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *)
__P, (__v2di) __A, __M);
7513 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7521 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7528 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7536 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *)
__P, (__v4di) __A, __M);
7542 return (__m128i)__builtin_shufflevector(
7543 __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7544 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7550 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7551 (__v16qi) __O, __M);
7557 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7566 __builtin_ia32_pmovdb128mem_mask ((__v16qi *)
__P, (__v4si) __A, __M);
7572 return (__m128i)__builtin_shufflevector(
7573 __builtin_convertvector((__v8si)__A, __v8qi),
7574 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
7581 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7582 (__v16qi) __O, __M);
7588 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7596 __builtin_ia32_pmovdb256mem_mask ((__v16qi *)
__P, (__v8si) __A, __M);
7602 return (__m128i)__builtin_shufflevector(
7603 __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7610 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7617 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7625 __builtin_ia32_pmovdw128mem_mask ((__v8hi *)
__P, (__v4si) __A, __M);
7631 return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
7637 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7644 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7652 __builtin_ia32_pmovdw256mem_mask ((__v8hi *)
__P, (__v8si) __A, __M);
7658 return (__m128i)__builtin_shufflevector(
7659 __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
7660 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
7666 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7667 (__v16qi) __O, __M);
7673 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7681 __builtin_ia32_pmovqb128mem_mask ((__v16qi *)
__P, (__v2di) __A, __M);
7687 return (__m128i)__builtin_shufflevector(
7688 __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7689 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7695 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7696 (__v16qi) __O, __M);
7702 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7710 __builtin_ia32_pmovqb256mem_mask ((__v16qi *)
__P, (__v4di) __A, __M);
7716 return (__m128i)__builtin_shufflevector(
7717 __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
7723 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7730 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7738 __builtin_ia32_pmovqd128mem_mask ((__v4si *)
__P, (__v2di) __A, __M);
7744 return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
7750 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
7758 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
7766 __builtin_ia32_pmovqd256mem_mask ((__v4si *)
__P, (__v4di) __A, __M);
7772 return (__m128i)__builtin_shufflevector(
7773 __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
7780 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7788 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7796 __builtin_ia32_pmovqw128mem_mask ((__v8hi *)
__P, (__v2di) __A, __M);
7802 return (__m128i)__builtin_shufflevector(
7803 __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7810 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7817 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7825 __builtin_ia32_pmovqw256mem_mask ((__v8hi *)
__P, (__v4di) __A, __M);
7828#define _mm256_extractf32x4_ps(A, imm) \
7829 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7831 (__v4sf)_mm_undefined_ps(), \
7834#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
7835 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7837 (__v4sf)(__m128)(W), \
7840#define _mm256_maskz_extractf32x4_ps(U, A, imm) \
7841 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7843 (__v4sf)_mm_setzero_ps(), \
7846#define _mm256_extracti32x4_epi32(A, imm) \
7847 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7849 (__v4si)_mm_undefined_si128(), \
7852#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
7853 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7855 (__v4si)(__m128i)(W), \
7858#define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
7859 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7861 (__v4si)_mm_setzero_si128(), \
7864#define _mm256_insertf32x4(A, B, imm) \
7865 ((__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
7866 (__v4sf)(__m128)(B), (int)(imm)))
7868#define _mm256_mask_insertf32x4(W, U, A, B, imm) \
7869 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7870 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7871 (__v8sf)(__m256)(W)))
7873#define _mm256_maskz_insertf32x4(U, A, B, imm) \
7874 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7875 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7876 (__v8sf)_mm256_setzero_ps()))
7878#define _mm256_inserti32x4(A, B, imm) \
7879 ((__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
7880 (__v4si)(__m128i)(B), (int)(imm)))
7882#define _mm256_mask_inserti32x4(W, U, A, B, imm) \
7883 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7884 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7885 (__v8si)(__m256i)(W)))
7887#define _mm256_maskz_inserti32x4(U, A, B, imm) \
7888 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7889 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7890 (__v8si)_mm256_setzero_si256()))
7892#define _mm_getmant_pd(A, B, C) \
7893 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7894 (int)(((C)<<2) | (B)), \
7895 (__v2df)_mm_setzero_pd(), \
7898#define _mm_mask_getmant_pd(W, U, A, B, C) \
7899 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7900 (int)(((C)<<2) | (B)), \
7901 (__v2df)(__m128d)(W), \
7904#define _mm_maskz_getmant_pd(U, A, B, C) \
7905 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7906 (int)(((C)<<2) | (B)), \
7907 (__v2df)_mm_setzero_pd(), \
7910#define _mm256_getmant_pd(A, B, C) \
7911 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7912 (int)(((C)<<2) | (B)), \
7913 (__v4df)_mm256_setzero_pd(), \
7916#define _mm256_mask_getmant_pd(W, U, A, B, C) \
7917 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7918 (int)(((C)<<2) | (B)), \
7919 (__v4df)(__m256d)(W), \
7922#define _mm256_maskz_getmant_pd(U, A, B, C) \
7923 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7924 (int)(((C)<<2) | (B)), \
7925 (__v4df)_mm256_setzero_pd(), \
7928#define _mm_getmant_ps(A, B, C) \
7929 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7930 (int)(((C)<<2) | (B)), \
7931 (__v4sf)_mm_setzero_ps(), \
7934#define _mm_mask_getmant_ps(W, U, A, B, C) \
7935 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7936 (int)(((C)<<2) | (B)), \
7937 (__v4sf)(__m128)(W), \
7940#define _mm_maskz_getmant_ps(U, A, B, C) \
7941 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7942 (int)(((C)<<2) | (B)), \
7943 (__v4sf)_mm_setzero_ps(), \
7946#define _mm256_getmant_ps(A, B, C) \
7947 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7948 (int)(((C)<<2) | (B)), \
7949 (__v8sf)_mm256_setzero_ps(), \
7952#define _mm256_mask_getmant_ps(W, U, A, B, C) \
7953 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7954 (int)(((C)<<2) | (B)), \
7955 (__v8sf)(__m256)(W), \
7958#define _mm256_maskz_getmant_ps(U, A, B, C) \
7959 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7960 (int)(((C)<<2) | (B)), \
7961 (__v8sf)_mm256_setzero_ps(), \
7964#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7965 ((__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
7966 (void const *)(addr), \
7967 (__v2di)(__m128i)(index), \
7968 (__mmask8)(mask), (int)(scale)))
7970#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7971 ((__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
7972 (void const *)(addr), \
7973 (__v2di)(__m128i)(index), \
7974 (__mmask8)(mask), (int)(scale)))
7976#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7977 ((__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
7978 (void const *)(addr), \
7979 (__v4di)(__m256i)(index), \
7980 (__mmask8)(mask), (int)(scale)))
7982#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7983 ((__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
7984 (void const *)(addr), \
7985 (__v4di)(__m256i)(index), \
7986 (__mmask8)(mask), (int)(scale)))
7988#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7989 ((__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
7990 (void const *)(addr), \
7991 (__v2di)(__m128i)(index), \
7992 (__mmask8)(mask), (int)(scale)))
7994#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7995 ((__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
7996 (void const *)(addr), \
7997 (__v2di)(__m128i)(index), \
7998 (__mmask8)(mask), (int)(scale)))
8000#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
8001 ((__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
8002 (void const *)(addr), \
8003 (__v4di)(__m256i)(index), \
8004 (__mmask8)(mask), (int)(scale)))
8006#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
8007 ((__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
8008 (void const *)(addr), \
8009 (__v4di)(__m256i)(index), \
8010 (__mmask8)(mask), (int)(scale)))
8012#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8013 ((__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
8014 (void const *)(addr), \
8015 (__v4si)(__m128i)(index), \
8016 (__mmask8)(mask), (int)(scale)))
8018#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8019 ((__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
8020 (void const *)(addr), \
8021 (__v4si)(__m128i)(index), \
8022 (__mmask8)(mask), (int)(scale)))
8024#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8025 ((__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
8026 (void const *)(addr), \
8027 (__v4si)(__m128i)(index), \
8028 (__mmask8)(mask), (int)(scale)))
8030#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8031 ((__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
8032 (void const *)(addr), \
8033 (__v4si)(__m128i)(index), \
8034 (__mmask8)(mask), (int)(scale)))
8036#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8037 ((__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
8038 (void const *)(addr), \
8039 (__v4si)(__m128i)(index), \
8040 (__mmask8)(mask), (int)(scale)))
8042#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8043 ((__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
8044 (void const *)(addr), \
8045 (__v4si)(__m128i)(index), \
8046 (__mmask8)(mask), (int)(scale)))
8048#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8049 ((__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
8050 (void const *)(addr), \
8051 (__v8si)(__m256i)(index), \
8052 (__mmask8)(mask), (int)(scale)))
8054#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8055 ((__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
8056 (void const *)(addr), \
8057 (__v8si)(__m256i)(index), \
8058 (__mmask8)(mask), (int)(scale)))
8060#define _mm256_permutex_pd(X, C) \
8061 ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C)))
8063#define _mm256_mask_permutex_pd(W, U, X, C) \
8064 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8065 (__v4df)_mm256_permutex_pd((X), (C)), \
8066 (__v4df)(__m256d)(W)))
8068#define _mm256_maskz_permutex_pd(U, X, C) \
8069 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8070 (__v4df)_mm256_permutex_pd((X), (C)), \
8071 (__v4df)_mm256_setzero_pd()))
8073#define _mm256_permutex_epi64(X, C) \
8074 ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C)))
8076#define _mm256_mask_permutex_epi64(W, U, X, C) \
8077 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8078 (__v4di)_mm256_permutex_epi64((X), (C)), \
8079 (__v4di)(__m256i)(W)))
8081#define _mm256_maskz_permutex_epi64(U, X, C) \
8082 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8083 (__v4di)_mm256_permutex_epi64((X), (C)), \
8084 (__v4di)_mm256_setzero_si256()))
8089 return (__m256d)__builtin_ia32_permvardf256((__v4df)
__Y, (__v4di)__X);
8096 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
8104 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
8112 return (__m256i)__builtin_ia32_permvardi256((__v4di)
__Y, (__v4di) __X);
8118 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
8127 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
8132#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
8137 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8145 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8150#define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
8156 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
8164 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
8169#define _mm_alignr_epi32(A, B, imm) \
8170 ((__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
8171 (__v4si)(__m128i)(B), (int)(imm)))
8173#define _mm_mask_alignr_epi32(W, U, A, B, imm) \
8174 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8175 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8176 (__v4si)(__m128i)(W)))
8178#define _mm_maskz_alignr_epi32(U, A, B, imm) \
8179 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8180 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8181 (__v4si)_mm_setzero_si128()))
8183#define _mm256_alignr_epi32(A, B, imm) \
8184 ((__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
8185 (__v8si)(__m256i)(B), (int)(imm)))
8187#define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
8188 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8189 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8190 (__v8si)(__m256i)(W)))
8192#define _mm256_maskz_alignr_epi32(U, A, B, imm) \
8193 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8194 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8195 (__v8si)_mm256_setzero_si256()))
8197#define _mm_alignr_epi64(A, B, imm) \
8198 ((__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
8199 (__v2di)(__m128i)(B), (int)(imm)))
8201#define _mm_mask_alignr_epi64(W, U, A, B, imm) \
8202 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8203 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8204 (__v2di)(__m128i)(W)))
8206#define _mm_maskz_alignr_epi64(U, A, B, imm) \
8207 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8208 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8209 (__v2di)_mm_setzero_si128()))
8211#define _mm256_alignr_epi64(A, B, imm) \
8212 ((__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
8213 (__v4di)(__m256i)(B), (int)(imm)))
8215#define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
8216 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8217 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8218 (__v4di)(__m256i)(W)))
8220#define _mm256_maskz_alignr_epi64(U, A, B, imm) \
8221 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8222 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8223 (__v4di)_mm256_setzero_si256()))
8228 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8236 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8244 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8252 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8260 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8268 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8276 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8284 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8289#define _mm256_mask_shuffle_epi32(W, U, A, I) \
8290 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8291 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8292 (__v8si)(__m256i)(W)))
8294#define _mm256_maskz_shuffle_epi32(U, A, I) \
8295 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8296 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8297 (__v8si)_mm256_setzero_si256()))
8299#define _mm_mask_shuffle_epi32(W, U, A, I) \
8300 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8301 (__v4si)_mm_shuffle_epi32((A), (I)), \
8302 (__v4si)(__m128i)(W)))
8304#define _mm_maskz_shuffle_epi32(U, A, I) \
8305 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8306 (__v4si)_mm_shuffle_epi32((A), (I)), \
8307 (__v4si)_mm_setzero_si128()))
8312 return (__m128d) __builtin_ia32_selectpd_128 ((
__mmask8) __U,
8320 return (__m128d) __builtin_ia32_selectpd_128 ((
__mmask8) __U,
8328 return (__m256d) __builtin_ia32_selectpd_256 ((
__mmask8) __U,
8336 return (__m256d) __builtin_ia32_selectpd_256 ((
__mmask8) __U,
8344 return (__m128) __builtin_ia32_selectps_128 ((
__mmask8) __U,
8352 return (__m128) __builtin_ia32_selectps_128 ((
__mmask8) __U,
8360 return (__m256) __builtin_ia32_selectps_256 ((
__mmask8) __U,
8368 return (__m256) __builtin_ia32_selectps_256 ((
__mmask8) __U,
8376 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8384 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8393 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8401 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8407#define _mm_mask_cvt_roundps_ph(W, U, A, I) \
8408 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8409 (__v8hi)(__m128i)(W), \
8412#define _mm_maskz_cvt_roundps_ph(U, A, I) \
8413 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8414 (__v8hi)_mm_setzero_si128(), \
8417#define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph
8418#define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph
8420#define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
8421 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8422 (__v8hi)(__m128i)(W), \
8425#define _mm256_maskz_cvt_roundps_ph(U, A, I) \
8426 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8427 (__v8hi)_mm_setzero_si128(), \
8430#define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph
8431#define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph
8434#undef __DEFAULT_FN_ATTRS128
8435#undef __DEFAULT_FN_ATTRS256
static __inline__ vector float vector float __b
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi32(__m256i __a, __m128i __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by the number of bits give...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi64(__m256i __a, int __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a right by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi32(__m128i __V)
Zero-extends bytes from the lower half of the 128-bit integer vector in __V and returns the 32-bit va...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_si256(__m256i __a, __m256i __b)
Computes the bitwise AND of the 256-bit integer vectors in __a and __b.
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi32(__m128i __V)
Sign-extends 16-bit elements from the 128-bit vector of [8 x i16] in __V and returns the 32-bit value...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi32(__m256i __a, __m128i __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a left by the number of bits given...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors of [4 x i64] in __a and __b...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastd_epi32(__m128i __X)
Broadcasts the low element from the 128-bit vector of [4 x i32] in __X to all elements of the result'...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi64(__m256i __a, __m128i __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a left by the number of bits given...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi32(__m128i __V)
Sign-extends bytes from the lower half of the 128-bit integer vector in __V and returns the 32-bit va...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi32(__m256i __a, int __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a left by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mul_epi32(__m256i __a, __m256i __b)
Multiplies signed 32-bit integers from even-numbered elements of two 256-bit vectors of [8 x i32] and...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi32(__m256i __a, int __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi32(__m256i __a, __m256i __b)
Multiplies signed 32-bit integer elements of two 256-bit vectors of [8 x i32], and returns the lower ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi64(__m256i __a, __m256i __b)
Subtracts 64-bit integers from corresponding elements of two 256-bit vectors of [4 x i64].
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi64(__m256i __X, __m256i __Y)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __X left by the number of bits given...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi32(__m256i __a, __m256i __b)
Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors of [8 x i32] in __a and __b...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu32(__m256i __a, __m256i __b)
Compares the corresponding unsigned 32-bit integers in the two 256-bit vectors of [8 x i32] in __a an...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi32(__m256i __X, __m256i __Y)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __X right by the number of bits give...
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_broadcastsd_pd(__m128d __X)
Broadcasts the 64-bit floating-point value from the low element of the 128-bit vector of [2 x double]...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi32(__m128i __X, __m128i __Y)
Shifts each 32-bit element of the 128-bit vector of [4 x i32] in __X right by the number of bits give...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi64(__m128i __X, __m128i __Y)
Shifts each 64-bit element of the 128-bit vector of [2 x i64] in __X right by the number of bits give...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi64(__m256i __a, __m256i __b)
Adds 64-bit integers from corresponding elements of two 256-bit vectors of [4 x i64] and returns the ...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi32(__m128i __X, __m128i __Y)
Shifts each 32-bit element of the 128-bit vector of [4 x i32] in __X left by the number of bits given...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi32(__m256i __a, __m256i __b)
Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors of [8 x i32] in __a and __b...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi32(__m256i __a, __m256i __b)
Compares the corresponding signed 32-bit integers in the two 256-bit vectors of [8 x i32] in __a and ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi32(__m256i __X, __m256i __Y)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __X left by the number of bits given...
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_broadcastss_ps(__m128 __X)
Broadcasts the 32-bit floating-point value from the low element of the 128-bit vector of [4 x float] ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi64(__m256i __X, __m256i __Y)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __X right by the number of bits give...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_epi32(__m128i __V)
Zero-extends 16-bit elements from the 128-bit vector of [8 x i16] in __V and returns the 32-bit value...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi32(__m256i __a, __m128i __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by the number of bits give...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi32(__m128i __X, __m128i __Y)
Shifts each 32-bit element of the 128-bit vector of [4 x i32] in __X right by the number of bits give...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi32(__m256i __a, __m256i __b)
Compares the corresponding signed 32-bit integers in the two 256-bit vectors of [8 x i32] in __a and ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi64(__m128i __V)
Zero-extends the first four bytes from the 128-bit integer vector in __V and returns the 64-bit value...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi32(__m256i __X, __m256i __Y)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __X right by the number of bits give...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu32(__m256i __a, __m256i __b)
Compares the corresponding unsigned 32-bit integers in the two 256-bit vectors of [8 x i32] in __a an...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi32(__m256i __a, __m256i __b)
Subtracts 32-bit integers from corresponding elements of two 256-bit vectors of [8 x i32].
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_epi64(__m128i __V)
Zero-extends 16-bit elements from the lower half of the 128-bit vector of [8 x i16] in __V and return...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi64(__m128i __V)
Sign-extends 32-bit elements from the 128-bit vector of [4 x i32] in __V and returns the 64-bit value...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi64(__m256i __a, __m256i __b)
Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors of [4 x i64] in __a and __b...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastq_epi64(__m128i __X)
Broadcasts the low element from the 128-bit vector of [2 x i64] in __X to both elements of the result...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi64(__m256i __a, __m128i __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a right by the number of bits give...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi32(__m256i __a, int __count)
Shifts each 32-bit element of the 256-bit vector of [8 x i32] in __a right by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi64(__m256i __a, int __count)
Shifts each 64-bit element of the 256-bit vector of [4 x i64] in __a left by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_epi64(__m128i __V)
Zero-extends 32-bit elements from the 128-bit vector of [4 x i32] in __V and returns the 64-bit value...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastd_epi32(__m128i __X)
Broadcasts the low element from the 128-bit vector of [4 x i32] in __X to all elements of the result'...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi64(__m128i __V)
Sign-extends 16-bit elements from the lower half of the 128-bit vector of [8 x i16] in __V and return...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi32(__m256i __a)
Computes the absolute value of each signed 32-bit element in the 256-bit vector of [8 x i32] in __a a...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi64(__m128i __V)
Sign-extends the first four bytes from the 128-bit integer vector in __V and returns the 64-bit value...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mul_epu32(__m256i __a, __m256i __b)
Multiplies unsigned 32-bit integers from even-numered elements of two 256-bit vectors of [8 x i32] an...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi64(__m128i __X, __m128i __Y)
Shifts each 64-bit element of the 128-bit vector of [2 x i64] in __X left by the number of bits given...
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcastss_ps(__m128 __X)
Broadcasts the 32-bit floating-point value from the low element of the 128-bit vector of [4 x float] ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi32(__m256i __a, __m256i __b)
Adds 32-bit integers from corresponding elements of two 256-bit vectors of [8 x i32] and returns the ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastq_epi64(__m128i __X)
Broadcasts the low element from the 128-bit vector of [2 x i64] in __X to all elements of the result'...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu32(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu32(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi32(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
#define _mm_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi64(__m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rorv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_epi64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi8(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mov_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rolv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expand_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi16(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_pd(__mmask8 __U, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi32_mask(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_store_epi32(void *__P, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
#define _mm256_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_pd(__m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_xor_epi32(__m128i __a, __m128i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_movedup_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi32(__mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rorv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi64(void *__P, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epi64(__m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi32_mask(__m256i __A, __m256i __B)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi64(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epu64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi8(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_pd(__mmask8 __U, __m256d __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi16(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_epi32(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi16(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_epi64(__m256i __a, __m256i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rcp14_pd(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_pd(__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_mov_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu32(__mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
#define _mm256_cmpneq_epi32_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
#define _mm256_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi16(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi32(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rorv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_or_epi64(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_compress_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ps(__mmask8 __U, __m128 __A, __m128 __B)
#define _mm_cmpeq_epi64_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_pd(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_pd(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_movedup_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_load_epi32(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi32(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi8(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi64(__m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_load_epi64(void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_and_epi32(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu32(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_getexp_pd(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_load_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu32(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epi64(__m128i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_store_epi32(void *__P, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W)
#define _mm256_permutexvar_epi32(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_epi32(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_movedup_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_loadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi32_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi32(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_epi64(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srai_epi64(__m128i __A, unsigned int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi32(void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi64(__mmask8 __U, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rorv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_epi64(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_pd(__mmask8 __U, __m256d __A)
#define _mm256_cmpeq_epi64_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi16(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
#define _mm256_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi8(__m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_scalef_pd(__m256d __A, __m256d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_ps(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ps(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rolv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expand_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mov_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_andnot_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_compress_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi64(void *__P, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi16(__m256i __A)
#define _mm_cmpneq_epi64_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi32(__mmask8 __M, int __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rolv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi32(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_pd(__m128d __W, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_load_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expand_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_abs_epi64(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi32(__mmask8 __M, int __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rolv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
#define _mm256_cmpeq_epi32_mask(A, B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_permutexvar_pd(__m256i __X, __m256d __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_ps(__mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, __m256d __B)
#define _mm_cmpeq_epi32_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rolv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi32_mask(__m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_pd(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ps(__m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rorv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi32(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_compress_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi64(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu32(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi16(__m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epu64(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
short __v2hi __attribute__((__vector_size__(4)))
#define __DEFAULT_FN_ATTRS256
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_or_epi32(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_scalef_ps(__m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi16(__m128i __A)
#define _mm256_permutexvar_ps(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi16(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi16(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_pd(__m256d __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rorv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_load_epi64(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
#define __DEFAULT_FN_ATTRS128
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rcp14_ps(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi64(__m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_and_epi64(__m128i __a, __m128i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_pd(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi32(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_ps(__m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_compress_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_ps(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rolv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_movedup_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
#define _mm_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi64(__m256i __A, unsigned int __imm)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu32(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_ps(__mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_ps(__m256 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi32(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi64(__m256i __X, __m256i __Y)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi32(void *__P, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi16(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepu32_pd(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_loadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_epi32(__m256i __a, __m256i __b)
#define _mm_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi64_mask(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_pd(__mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expand_pd(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sra_epi64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rolv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
#define _mm_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mov_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expand_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcast_f32x4(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
#define _mm256_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi64(__m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_andnot_epi32(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi16(__mmask8 __M, __m128i __A)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_store_epi64(void *__P, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu64(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rorv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_epi32(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ps(__m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i32x4(__m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_pd(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ps(__m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu32(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_getexp_ps(__m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_mov_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi32(__mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi64_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu32(__m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi8(__m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_compress_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi32(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi32(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_load_epi32(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rolv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi32(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi64(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi8(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __imm)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi32(void *__P, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ps(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi64(void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu32(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_store_epi64(void *__P, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
#define _mm256_cmpneq_epi64_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi32(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
#define _mm_cmpneq_epi32_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi32(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rorv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi32(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_ps(__mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi16(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_xor_epi64(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_pd(__mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_add_ps(__m256 __a, __m256 __b)
Adds two 256-bit vectors of [8 x float].
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvtpd_epi32(__m256d __a)
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x i32].
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpacklo_pd(__m256d __a, __m256d __b)
Unpacks the even-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves the...
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sqrt_ps(__m256 __a)
Calculates the square roots of the values in a 256-bit vector of [8 x float].
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_cvtepi32_ps(__m256i __a)
Converts a vector of [8 x i32] into a vector of [8 x float].
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_moveldup_ps(__m256 __a)
Moves and duplicates even-indexed values from a 256-bit vector of [8 x float] to float values in a 25...
static __inline __m128 __DEFAULT_FN_ATTRS _mm256_cvtpd_ps(__m256d __a)
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x float].
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_permutevar_ps(__m256 __a, __m256i __c)
Copies the values stored in a 256-bit vector of [8 x float] as specified by the 256-bit integer vecto...
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtps_pd(__m128 __a)
Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4 x double].
static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_permutevar_ps(__m128 __a, __m128i __c)
Copies the values stored in a 128-bit vector of [4 x float] as specified by the 128-bit integer vecto...
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32(__m256d __a)
Converts a 256-bit vector of [4 x double] into four signed truncated (rounded toward zero) 32-bit int...
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvtps_epi32(__m256 __a)
Converts a vector of [8 x float] into a vector of [8 x i32].
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi64x(long long __q)
Constructs a 256-bit integer vector of [4 x i64], with each of the 64-bit integral vector elements se...
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_min_ps(__m256 __a, __m256 __b)
Compares two 256-bit vectors of [8 x float] and returns the lesser of each pair of values.
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32(__m256 __a)
Converts a vector of [8 x float] into eight signed truncated (rounded toward zero) 32-bit integers re...
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sub_ps(__m256 __a, __m256 __b)
Subtracts two 256-bit vectors of [8 x float].
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_max_ps(__m256 __a, __m256 __b)
Compares two 256-bit vectors of [8 x float] and returns the greater of each pair of values.
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_movehdup_ps(__m256 __a)
Moves and duplicates odd-indexed values from a 256-bit vector of [8 x float] to float values in a 256...
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_div_pd(__m256d __a, __m256d __b)
Divides two 256-bit vectors of [4 x double].
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sqrt_pd(__m256d __a)
Calculates the square roots of the values in a 256-bit vector of [4 x double].
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_mul_pd(__m256d __a, __m256d __b)
Multiplies two 256-bit vectors of [4 x double].
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtepi32_pd(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x double].
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpackhi_pd(__m256d __a, __m256d __b)
Unpacks the odd-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves them...
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sub_pd(__m256d __a, __m256d __b)
Subtracts two 256-bit vectors of [4 x double].
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_movedup_pd(__m256d __a)
Moves and duplicates double-precision floating point values from a 256-bit vector of [4 x double] to ...
static __inline __m128d __DEFAULT_FN_ATTRS128 _mm_permutevar_pd(__m128d __a, __m128i __c)
Copies the values in a 128-bit vector of [2 x double] as specified by the 128-bit integer vector oper...
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_div_ps(__m256 __a, __m256 __b)
Divides two 256-bit vectors of [8 x float].
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_min_pd(__m256d __a, __m256d __b)
Compares two 256-bit vectors of [4 x double] and returns the lesser of each pair of values.
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpackhi_ps(__m256 __a, __m256 __b)
Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the two 256-bit vectors of [8 x float] ...
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_mul_ps(__m256 __a, __m256 __b)
Multiplies two 256-bit vectors of [8 x float].
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpacklo_ps(__m256 __a, __m256 __b)
Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the two 256-bit vectors of [8 x float] ...
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_permutevar_pd(__m256d __a, __m256i __c)
Copies the values in a 256-bit vector of [4 x double] as specified by the 256-bit integer vector oper...
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_max_pd(__m256d __a, __m256d __b)
Compares two 256-bit vectors of [4 x double] and returns the greater of each pair of values.
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi32(int __i)
Constructs a 256-bit integer vector of [8 x i32], with each of the 32-bit integral vector elements se...
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_add_pd(__m256d __a, __m256d __b)
Adds two 256-bit vectors of [4 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, __m128d __b)
Performs an element-by-element division of two 128-bit vectors of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, __m128d __b)
Subtracts two 128-bit vectors of [2 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, __m128i __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, __m128i __b)
Subtracts the corresponding elements of two [2 x i64] vectors.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a)
Converts the lower two integer elements of a 128-bit vector of [4 x i32] into two double-precision fl...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, int __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns a vecto...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, __m128d __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, __m128i __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, __m128d __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them i...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, __m128i __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them into...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, int __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ void int __a
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, __m128i __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them int...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x float].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, __m128i __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x i32] and interleaves them i...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a)
Calculates the square root of the each of two values stored in a 128-bit vector of [2 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, int __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, __m128i __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x i32] and interleaves them ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [2 x i64], saving the lower 64 bits of each...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, __m128i __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, int __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [4 x i32], saving the lower 32 bits of each...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, __m128i __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the corresponding elements o...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, __m128i __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a)
Converts a vector of [4 x float] into four signed truncated (rounded toward zero) 32-bit integers,...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, int __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, __m128d __b)
Adds two 128-bit vectors of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns a vecto...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, __m128d __b)
Multiplies two 128-bit vectors of [2 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q)
Initializes both values in a 128-bit integer vector with the specified 64-bit integer value.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, __m128i __b)
Subtracts the corresponding 32-bit integer values in the operands.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a)
Converts the lower two single-precision floating-point elements of a 128-bit vector of [4 x float] in...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, __m128i __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32].
struct __storeu_i16 *__P __v
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_movedup_pd(__m128d __a)
Moves and duplicates the double-precision value in the lower bits of a 128-bit vector of [2 x double]...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movehdup_ps(__m128 __a)
Moves and duplicates odd-indexed values from a 128-bit vector of [4 x float] to float values stored i...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_moveldup_ps(__m128 __a)
Duplicates even-indexed values from a 128-bit vector of [4 x float] to float values stored in a 128-b...
__inline unsigned int unsigned int unsigned int * __P
__inline unsigned int unsigned int __Y
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V)
Zero-extends each of the lower four 8-bit integer elements of a 128-bit vector of [16 x i8] to 32-bit...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1, __m128i __V2)
Multiples corresponding elements of two 128-bit vectors of [4 x i32] and returns the lower 32 bits of...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x u32] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi32(__m128i __V)
Sign-extends each of the lower four 8-bit integer elements of a 128-bit vector of [16 x i8] to 32-bit...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V)
Zero-extends each of the lower two 32-bit integer elements of a 128-bit integer vector of [4 x i32] t...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V)
Zero-extends each of the lower two 16-bit integer elements of a 128-bit integer vector of [8 x i16] t...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x i32] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi64(__m128i __V)
Sign-extends each of the lower two 16-bit integer elements of a 128-bit integer vector of [8 x i16] t...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi64(__m128i __V)
Sign-extends each of the lower two 8-bit integer elements of a 128-bit integer vector of [16 x i8] to...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi32(__m128i __V)
Sign-extends each of the lower four 16-bit integer elements of a 128-bit integer vector of [8 x i16] ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V)
Zero-extends each of the lower four 16-bit integer elements of a 128-bit integer vector of [8 x i16] ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x i32] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V)
Zero-extends each of the lower two 8-bit integer elements of a 128-bit integer vector of [16 x i8] to...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x u32] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1, __m128i __V2)
Multiplies corresponding even-indexed elements of two 128-bit vectors of [4 x i32] and returns a 128-...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi64(__m128i __V)
Sign-extends each of the lower two 32-bit integer elements of a 128-bit integer vector of [4 x i32] t...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ps(__m128 __a, __m128 __b)
Divides two 128-bit vectors of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ps(__m128 __a, __m128 __b)
Adds two 128-bit vectors of [4 x float], and returns the results of the addition.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ps(__m128 __a, __m128 __b)
Multiplies two 128-bit vectors of [4 x float] and returns the results of the multiplication.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the greater of each pair of values.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ps(__m128 __a, __m128 __b)
Subtracts each of the values of the second operand from the first operand, both of which are 128-bit ...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the lesser of each pair of values.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpackhi_ps(__m128 __a, __m128 __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x float] and interleaves the...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ps(__m128 __a)
Calculates the square roots of the values stored in a 128-bit vector of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpacklo_ps(__m128 __a, __m128 __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x float] and interleaves them...