11 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
14 #ifndef __AVX512VLINTRIN_H
15 #define __AVX512VLINTRIN_H
17 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
18 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
26 #define _mm_cmpeq_epi32_mask(A, B) \
27 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
28 #define _mm_mask_cmpeq_epi32_mask(k, A, B) \
29 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
30 #define _mm_cmpge_epi32_mask(A, B) \
31 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
32 #define _mm_mask_cmpge_epi32_mask(k, A, B) \
33 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
34 #define _mm_cmpgt_epi32_mask(A, B) \
35 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
36 #define _mm_mask_cmpgt_epi32_mask(k, A, B) \
37 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
38 #define _mm_cmple_epi32_mask(A, B) \
39 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
40 #define _mm_mask_cmple_epi32_mask(k, A, B) \
41 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
42 #define _mm_cmplt_epi32_mask(A, B) \
43 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
44 #define _mm_mask_cmplt_epi32_mask(k, A, B) \
45 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
46 #define _mm_cmpneq_epi32_mask(A, B) \
47 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
48 #define _mm_mask_cmpneq_epi32_mask(k, A, B) \
49 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
51 #define _mm256_cmpeq_epi32_mask(A, B) \
52 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
53 #define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
54 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
55 #define _mm256_cmpge_epi32_mask(A, B) \
56 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
57 #define _mm256_mask_cmpge_epi32_mask(k, A, B) \
58 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
59 #define _mm256_cmpgt_epi32_mask(A, B) \
60 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
61 #define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
62 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
63 #define _mm256_cmple_epi32_mask(A, B) \
64 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
65 #define _mm256_mask_cmple_epi32_mask(k, A, B) \
66 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
67 #define _mm256_cmplt_epi32_mask(A, B) \
68 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
69 #define _mm256_mask_cmplt_epi32_mask(k, A, B) \
70 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
71 #define _mm256_cmpneq_epi32_mask(A, B) \
72 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
73 #define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
74 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
76 #define _mm_cmpeq_epu32_mask(A, B) \
77 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
78 #define _mm_mask_cmpeq_epu32_mask(k, A, B) \
79 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
80 #define _mm_cmpge_epu32_mask(A, B) \
81 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
82 #define _mm_mask_cmpge_epu32_mask(k, A, B) \
83 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
84 #define _mm_cmpgt_epu32_mask(A, B) \
85 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
86 #define _mm_mask_cmpgt_epu32_mask(k, A, B) \
87 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
88 #define _mm_cmple_epu32_mask(A, B) \
89 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
90 #define _mm_mask_cmple_epu32_mask(k, A, B) \
91 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
92 #define _mm_cmplt_epu32_mask(A, B) \
93 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
94 #define _mm_mask_cmplt_epu32_mask(k, A, B) \
95 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
96 #define _mm_cmpneq_epu32_mask(A, B) \
97 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
98 #define _mm_mask_cmpneq_epu32_mask(k, A, B) \
99 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
101 #define _mm256_cmpeq_epu32_mask(A, B) \
102 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
103 #define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
104 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
105 #define _mm256_cmpge_epu32_mask(A, B) \
106 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
107 #define _mm256_mask_cmpge_epu32_mask(k, A, B) \
108 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
109 #define _mm256_cmpgt_epu32_mask(A, B) \
110 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
111 #define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
112 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
113 #define _mm256_cmple_epu32_mask(A, B) \
114 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
115 #define _mm256_mask_cmple_epu32_mask(k, A, B) \
116 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
117 #define _mm256_cmplt_epu32_mask(A, B) \
118 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
119 #define _mm256_mask_cmplt_epu32_mask(k, A, B) \
120 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
121 #define _mm256_cmpneq_epu32_mask(A, B) \
122 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
123 #define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
124 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
126 #define _mm_cmpeq_epi64_mask(A, B) \
127 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
128 #define _mm_mask_cmpeq_epi64_mask(k, A, B) \
129 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
130 #define _mm_cmpge_epi64_mask(A, B) \
131 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
132 #define _mm_mask_cmpge_epi64_mask(k, A, B) \
133 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
134 #define _mm_cmpgt_epi64_mask(A, B) \
135 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
136 #define _mm_mask_cmpgt_epi64_mask(k, A, B) \
137 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
138 #define _mm_cmple_epi64_mask(A, B) \
139 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
140 #define _mm_mask_cmple_epi64_mask(k, A, B) \
141 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
142 #define _mm_cmplt_epi64_mask(A, B) \
143 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
144 #define _mm_mask_cmplt_epi64_mask(k, A, B) \
145 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
146 #define _mm_cmpneq_epi64_mask(A, B) \
147 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
148 #define _mm_mask_cmpneq_epi64_mask(k, A, B) \
149 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
151 #define _mm256_cmpeq_epi64_mask(A, B) \
152 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
153 #define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
154 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
155 #define _mm256_cmpge_epi64_mask(A, B) \
156 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
157 #define _mm256_mask_cmpge_epi64_mask(k, A, B) \
158 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
159 #define _mm256_cmpgt_epi64_mask(A, B) \
160 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
161 #define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
162 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
163 #define _mm256_cmple_epi64_mask(A, B) \
164 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
165 #define _mm256_mask_cmple_epi64_mask(k, A, B) \
166 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
167 #define _mm256_cmplt_epi64_mask(A, B) \
168 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
169 #define _mm256_mask_cmplt_epi64_mask(k, A, B) \
170 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
171 #define _mm256_cmpneq_epi64_mask(A, B) \
172 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
173 #define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
174 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
176 #define _mm_cmpeq_epu64_mask(A, B) \
177 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
178 #define _mm_mask_cmpeq_epu64_mask(k, A, B) \
179 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
180 #define _mm_cmpge_epu64_mask(A, B) \
181 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
182 #define _mm_mask_cmpge_epu64_mask(k, A, B) \
183 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
184 #define _mm_cmpgt_epu64_mask(A, B) \
185 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
186 #define _mm_mask_cmpgt_epu64_mask(k, A, B) \
187 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
188 #define _mm_cmple_epu64_mask(A, B) \
189 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
190 #define _mm_mask_cmple_epu64_mask(k, A, B) \
191 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
192 #define _mm_cmplt_epu64_mask(A, B) \
193 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
194 #define _mm_mask_cmplt_epu64_mask(k, A, B) \
195 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
196 #define _mm_cmpneq_epu64_mask(A, B) \
197 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
198 #define _mm_mask_cmpneq_epu64_mask(k, A, B) \
199 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
201 #define _mm256_cmpeq_epu64_mask(A, B) \
202 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
203 #define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
204 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
205 #define _mm256_cmpge_epu64_mask(A, B) \
206 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
207 #define _mm256_mask_cmpge_epu64_mask(k, A, B) \
208 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
209 #define _mm256_cmpgt_epu64_mask(A, B) \
210 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
211 #define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
212 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
213 #define _mm256_cmple_epu64_mask(A, B) \
214 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
215 #define _mm256_mask_cmple_epu64_mask(k, A, B) \
216 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
217 #define _mm256_cmplt_epu64_mask(A, B) \
218 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
219 #define _mm256_mask_cmplt_epu64_mask(k, A, B) \
220 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
221 #define _mm256_cmpneq_epu64_mask(A, B) \
222 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
223 #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
224 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
229 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
237 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
245 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
253 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
261 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
269 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
277 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
285 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
293 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
301 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
309 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
317 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
325 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
333 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
341 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
349 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
357 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
365 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
373 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
381 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
389 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
397 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
405 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
413 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
421 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
429 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
437 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
445 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
453 return (__m256i)((__v8su)
__a & (__v8su)
__b);
459 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
473 return (__m128i)((__v4su)
__a & (__v4su)
__b);
479 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
493 return (__m256i)(~(__v8su)__A & (__v8su)__B);
499 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
514 return (__m128i)(~(__v4su)__A & (__v4su)__B);
520 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
534 return (__m256i)((__v8su)
__a | (__v8su)
__b);
540 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
554 return (__m128i)((__v4su)
__a | (__v4su)
__b);
560 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
574 return (__m256i)((__v8su)
__a ^ (__v8su)
__b);
580 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
594 return (__m128i)((__v4su)
__a ^ (__v4su)
__b);
600 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
614 return (__m256i)((__v4du)
__a & (__v4du)
__b);
620 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
634 return (__m128i)((__v2du)
__a & (__v2du)
__b);
640 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
654 return (__m256i)(~(__v4du)__A & (__v4du)__B);
660 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
675 return (__m128i)(~(__v2du)__A & (__v2du)__B);
681 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
695 return (__m256i)((__v4du)
__a | (__v4du)
__b);
701 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
715 return (__m128i)((__v2du)
__a | (__v2du)
__b);
721 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
735 return (__m256i)((__v4du)
__a ^ (__v4du)
__b);
741 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
755 return (__m128i)((__v2du)
__a ^ (__v2du)
__b);
762 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
773 #define _mm_cmp_epi32_mask(a, b, p) \
774 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
775 (__v4si)(__m128i)(b), (int)(p), \
778 #define _mm_mask_cmp_epi32_mask(m, a, b, p) \
779 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
780 (__v4si)(__m128i)(b), (int)(p), \
783 #define _mm_cmp_epu32_mask(a, b, p) \
784 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
785 (__v4si)(__m128i)(b), (int)(p), \
788 #define _mm_mask_cmp_epu32_mask(m, a, b, p) \
789 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
790 (__v4si)(__m128i)(b), (int)(p), \
793 #define _mm256_cmp_epi32_mask(a, b, p) \
794 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
795 (__v8si)(__m256i)(b), (int)(p), \
798 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
799 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
800 (__v8si)(__m256i)(b), (int)(p), \
803 #define _mm256_cmp_epu32_mask(a, b, p) \
804 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
805 (__v8si)(__m256i)(b), (int)(p), \
808 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
809 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
810 (__v8si)(__m256i)(b), (int)(p), \
813 #define _mm_cmp_epi64_mask(a, b, p) \
814 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
815 (__v2di)(__m128i)(b), (int)(p), \
818 #define _mm_mask_cmp_epi64_mask(m, a, b, p) \
819 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
820 (__v2di)(__m128i)(b), (int)(p), \
823 #define _mm_cmp_epu64_mask(a, b, p) \
824 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
825 (__v2di)(__m128i)(b), (int)(p), \
828 #define _mm_mask_cmp_epu64_mask(m, a, b, p) \
829 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
830 (__v2di)(__m128i)(b), (int)(p), \
833 #define _mm256_cmp_epi64_mask(a, b, p) \
834 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
835 (__v4di)(__m256i)(b), (int)(p), \
838 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
839 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
840 (__v4di)(__m256i)(b), (int)(p), \
843 #define _mm256_cmp_epu64_mask(a, b, p) \
844 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
845 (__v4di)(__m256i)(b), (int)(p), \
848 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
849 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
850 (__v4di)(__m256i)(b), (int)(p), \
853 #define _mm256_cmp_ps_mask(a, b, p) \
854 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
855 (__v8sf)(__m256)(b), (int)(p), \
858 #define _mm256_mask_cmp_ps_mask(m, a, b, p) \
859 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
860 (__v8sf)(__m256)(b), (int)(p), \
863 #define _mm256_cmp_pd_mask(a, b, p) \
864 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
865 (__v4df)(__m256d)(b), (int)(p), \
868 #define _mm256_mask_cmp_pd_mask(m, a, b, p) \
869 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
870 (__v4df)(__m256d)(b), (int)(p), \
873 #define _mm_cmp_ps_mask(a, b, p) \
874 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
875 (__v4sf)(__m128)(b), (int)(p), \
878 #define _mm_mask_cmp_ps_mask(m, a, b, p) \
879 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
880 (__v4sf)(__m128)(b), (int)(p), \
883 #define _mm_cmp_pd_mask(a, b, p) \
884 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
885 (__v2df)(__m128d)(b), (int)(p), \
888 #define _mm_mask_cmp_pd_mask(m, a, b, p) \
889 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
890 (__v2df)(__m128d)(b), (int)(p), \
896 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
897 __builtin_ia32_vfmaddpd ((__v2df) __A,
906 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
907 __builtin_ia32_vfmaddpd ((__v2df) __A,
916 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
917 __builtin_ia32_vfmaddpd ((__v2df) __A,
926 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
927 __builtin_ia32_vfmaddpd ((__v2df) __A,
936 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
937 __builtin_ia32_vfmaddpd ((__v2df) __A,
946 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
947 __builtin_ia32_vfmaddpd (-(__v2df) __A,
956 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
957 __builtin_ia32_vfmaddpd (-(__v2df) __A,
966 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
967 __builtin_ia32_vfmaddpd (-(__v2df) __A,
976 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
977 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
986 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
987 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
996 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
997 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1006 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1007 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1016 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1017 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1026 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1027 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1036 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1037 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1046 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1047 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1056 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1057 __builtin_ia32_vfmaddps ((__v4sf) __A,
1066 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1067 __builtin_ia32_vfmaddps ((__v4sf) __A,
1076 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1077 __builtin_ia32_vfmaddps ((__v4sf) __A,
1086 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1087 __builtin_ia32_vfmaddps ((__v4sf) __A,
1096 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1097 __builtin_ia32_vfmaddps ((__v4sf) __A,
1106 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1107 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1116 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1117 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1126 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1127 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1136 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1137 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1146 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1147 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1156 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1157 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1166 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1167 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1176 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1177 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1186 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1187 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1196 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1197 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1206 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1207 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1216 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1217 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1226 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1227 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1236 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1237 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1246 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1247 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1256 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1257 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1266 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1267 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1276 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1277 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1286 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1287 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1296 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1297 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1306 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1307 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1316 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1317 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1326 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1327 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1336 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1337 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1346 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1347 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1356 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1357 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1367 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1368 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1377 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1378 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1387 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1388 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1397 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1398 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1407 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1408 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1417 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1418 __builtin_ia32_vfmaddpd ((__v2df) __A,
1427 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1428 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1437 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1438 __builtin_ia32_vfmaddps ((__v4sf) __A,
1447 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1448 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1457 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1458 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1467 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1468 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1477 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1478 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1487 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1488 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1497 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1498 __builtin_ia32_vfmaddpd ((__v2df) __A,
1507 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1508 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1517 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1518 __builtin_ia32_vfmaddps ((__v4sf) __A,
1527 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1528 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1537 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1538 __builtin_ia32_vfmaddpd ((__v2df) __A,
1547 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1548 __builtin_ia32_vfmaddpd ((__v2df) __A,
1557 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1558 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1567 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1568 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1577 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1578 __builtin_ia32_vfmaddps ((__v4sf) __A,
1587 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1588 __builtin_ia32_vfmaddps ((__v4sf) __A,
1597 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1598 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1607 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1608 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1616 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
1623 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
1630 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
1637 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
1644 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
1651 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
1658 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
1665 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
1672 return (__m128i) __builtin_ia32_selectd_128 ((
__mmask8) __U,
1679 return (__m256i) __builtin_ia32_selectd_256 ((
__mmask8) __U,
1686 return (__m128d) __builtin_ia32_selectpd_128 ((
__mmask8) __U,
1693 return (__m256d) __builtin_ia32_selectpd_256 ((
__mmask8) __U,
1700 return (__m128) __builtin_ia32_selectps_128 ((
__mmask8) __U,
1707 return (__m256) __builtin_ia32_selectps_256 ((
__mmask8) __U,
1714 return (__m128i) __builtin_ia32_selectq_128 ((
__mmask8) __U,
1721 return (__m256i) __builtin_ia32_selectq_256 ((
__mmask8) __U,
1728 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1735 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1743 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1750 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1758 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1765 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1773 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1780 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1788 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1795 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1803 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1810 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1818 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1825 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1833 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1840 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1848 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
1855 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
1862 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
1869 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
1876 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
1883 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
1890 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
1897 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
1904 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
1911 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
1918 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
1925 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
1932 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
1939 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
1946 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
1953 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
1960 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1967 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1975 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
1982 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
1989 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1996 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2004 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2011 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2018 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2026 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2033 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2041 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2049 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2056 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2064 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2071 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2078 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
2085 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
2092 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2099 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2106 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2113 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2120 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2128 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2135 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2143 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2151 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2158 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2166 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2173 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2181 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2188 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2195 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2203 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2210 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2218 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2226 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2233 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2241 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2248 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2255 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
2262 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
2269 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2277 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2284 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2292 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2300 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2307 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2315 return (__m128d) __builtin_convertvector(
2316 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2321 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
2328 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
2335 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2340 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
2347 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
2354 return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
2359 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2366 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2373 return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
2378 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2385 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2392 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2399 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2406 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2413 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2420 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2427 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2434 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2441 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2448 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2455 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2463 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2470 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2478 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2485 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2493 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2500 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2508 return (__m128d) __builtin_ia32_expandloaddf128_mask ((
const __v2df *) __P,
2516 return (__m128d) __builtin_ia32_expandloaddf128_mask ((
const __v2df *) __P,
2525 return (__m256d) __builtin_ia32_expandloaddf256_mask ((
const __v4df *) __P,
2533 return (__m256d) __builtin_ia32_expandloaddf256_mask ((
const __v4df *) __P,
2542 return (__m128i) __builtin_ia32_expandloaddi128_mask ((
const __v2di *) __P,
2550 return (__m128i) __builtin_ia32_expandloaddi128_mask ((
const __v2di *) __P,
2560 return (__m256i) __builtin_ia32_expandloaddi256_mask ((
const __v4di *) __P,
2568 return (__m256i) __builtin_ia32_expandloaddi256_mask ((
const __v4di *) __P,
2577 return (__m128) __builtin_ia32_expandloadsf128_mask ((
const __v4sf *) __P,
2584 return (__m128) __builtin_ia32_expandloadsf128_mask ((
const __v4sf *) __P,
2593 return (__m256) __builtin_ia32_expandloadsf256_mask ((
const __v8sf *) __P,
2600 return (__m256) __builtin_ia32_expandloadsf256_mask ((
const __v8sf *) __P,
2609 return (__m128i) __builtin_ia32_expandloadsi128_mask ((
const __v4si *) __P,
2617 return (__m128i) __builtin_ia32_expandloadsi128_mask ((
const __v4si *) __P,
2626 return (__m256i) __builtin_ia32_expandloadsi256_mask ((
const __v8si *) __P,
2634 return (__m256i) __builtin_ia32_expandloadsi256_mask ((
const __v8si *) __P,
2643 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2650 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2658 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2665 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2673 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2680 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2688 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2695 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2703 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2711 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2718 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2726 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2734 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2741 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2749 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2757 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2764 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2772 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2780 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2787 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2795 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2802 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2809 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2816 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2823 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2830 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2837 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2844 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2851 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2858 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2865 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2872 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2879 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2886 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2893 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2900 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2907 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2914 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2921 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2928 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2935 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2942 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2949 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2956 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2963 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2970 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2977 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
2984 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
2991 return (__m128i)__builtin_ia32_pabsq128((__v2di)__A);
2996 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
3003 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
3010 return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A);
3015 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
3022 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
3029 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3036 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3043 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3050 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3057 return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B);
3062 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3069 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3076 return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B);
3081 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3088 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3095 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3102 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3109 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3116 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3123 return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B);
3128 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3135 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3142 return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B);
3147 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3154 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3161 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3168 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3175 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3182 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3189 return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B);
3194 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3201 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3208 return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B);
3213 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3220 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3227 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3234 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3241 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3248 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3255 return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B);
3260 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3267 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3274 return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B);
3279 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3286 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3291 #define _mm_roundscale_pd(A, imm) \
3292 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3294 (__v2df)_mm_setzero_pd(), \
3298 #define _mm_mask_roundscale_pd(W, U, A, imm) \
3299 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3301 (__v2df)(__m128d)(W), \
3305 #define _mm_maskz_roundscale_pd(U, A, imm) \
3306 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3308 (__v2df)_mm_setzero_pd(), \
3312 #define _mm256_roundscale_pd(A, imm) \
3313 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3315 (__v4df)_mm256_setzero_pd(), \
3319 #define _mm256_mask_roundscale_pd(W, U, A, imm) \
3320 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3322 (__v4df)(__m256d)(W), \
3326 #define _mm256_maskz_roundscale_pd(U, A, imm) \
3327 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3329 (__v4df)_mm256_setzero_pd(), \
3332 #define _mm_roundscale_ps(A, imm) \
3333 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3334 (__v4sf)_mm_setzero_ps(), \
3338 #define _mm_mask_roundscale_ps(W, U, A, imm) \
3339 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3340 (__v4sf)(__m128)(W), \
3344 #define _mm_maskz_roundscale_ps(U, A, imm) \
3345 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3346 (__v4sf)_mm_setzero_ps(), \
3349 #define _mm256_roundscale_ps(A, imm) \
3350 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3351 (__v8sf)_mm256_setzero_ps(), \
3354 #define _mm256_mask_roundscale_ps(W, U, A, imm) \
3355 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3356 (__v8sf)(__m256)(W), \
3360 #define _mm256_maskz_roundscale_ps(U, A, imm) \
3361 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3362 (__v8sf)_mm256_setzero_ps(), \
3367 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3377 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3385 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3394 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3404 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3412 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3421 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3430 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3438 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3447 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3457 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3465 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3472 #define _mm_i64scatter_pd(addr, index, v1, scale) \
3473 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
3474 (__v2di)(__m128i)(index), \
3475 (__v2df)(__m128d)(v1), (int)(scale))
3477 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3478 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
3479 (__v2di)(__m128i)(index), \
3480 (__v2df)(__m128d)(v1), (int)(scale))
3482 #define _mm_i64scatter_epi64(addr, index, v1, scale) \
3483 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
3484 (__v2di)(__m128i)(index), \
3485 (__v2di)(__m128i)(v1), (int)(scale))
3487 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3488 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
3489 (__v2di)(__m128i)(index), \
3490 (__v2di)(__m128i)(v1), (int)(scale))
3492 #define _mm256_i64scatter_pd(addr, index, v1, scale) \
3493 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
3494 (__v4di)(__m256i)(index), \
3495 (__v4df)(__m256d)(v1), (int)(scale))
3497 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3498 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
3499 (__v4di)(__m256i)(index), \
3500 (__v4df)(__m256d)(v1), (int)(scale))
3502 #define _mm256_i64scatter_epi64(addr, index, v1, scale) \
3503 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
3504 (__v4di)(__m256i)(index), \
3505 (__v4di)(__m256i)(v1), (int)(scale))
3507 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3508 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
3509 (__v4di)(__m256i)(index), \
3510 (__v4di)(__m256i)(v1), (int)(scale))
3512 #define _mm_i64scatter_ps(addr, index, v1, scale) \
3513 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
3514 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3517 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3518 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
3519 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3522 #define _mm_i64scatter_epi32(addr, index, v1, scale) \
3523 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
3524 (__v2di)(__m128i)(index), \
3525 (__v4si)(__m128i)(v1), (int)(scale))
3527 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3528 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
3529 (__v2di)(__m128i)(index), \
3530 (__v4si)(__m128i)(v1), (int)(scale))
3532 #define _mm256_i64scatter_ps(addr, index, v1, scale) \
3533 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
3534 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3537 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3538 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
3539 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3542 #define _mm256_i64scatter_epi32(addr, index, v1, scale) \
3543 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
3544 (__v4di)(__m256i)(index), \
3545 (__v4si)(__m128i)(v1), (int)(scale))
3547 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3548 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
3549 (__v4di)(__m256i)(index), \
3550 (__v4si)(__m128i)(v1), (int)(scale))
3552 #define _mm_i32scatter_pd(addr, index, v1, scale) \
3553 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
3554 (__v4si)(__m128i)(index), \
3555 (__v2df)(__m128d)(v1), (int)(scale))
3557 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3558 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
3559 (__v4si)(__m128i)(index), \
3560 (__v2df)(__m128d)(v1), (int)(scale))
3562 #define _mm_i32scatter_epi64(addr, index, v1, scale) \
3563 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
3564 (__v4si)(__m128i)(index), \
3565 (__v2di)(__m128i)(v1), (int)(scale))
3567 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3568 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
3569 (__v4si)(__m128i)(index), \
3570 (__v2di)(__m128i)(v1), (int)(scale))
3572 #define _mm256_i32scatter_pd(addr, index, v1, scale) \
3573 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
3574 (__v4si)(__m128i)(index), \
3575 (__v4df)(__m256d)(v1), (int)(scale))
3577 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3578 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
3579 (__v4si)(__m128i)(index), \
3580 (__v4df)(__m256d)(v1), (int)(scale))
3582 #define _mm256_i32scatter_epi64(addr, index, v1, scale) \
3583 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
3584 (__v4si)(__m128i)(index), \
3585 (__v4di)(__m256i)(v1), (int)(scale))
3587 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3588 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
3589 (__v4si)(__m128i)(index), \
3590 (__v4di)(__m256i)(v1), (int)(scale))
3592 #define _mm_i32scatter_ps(addr, index, v1, scale) \
3593 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
3594 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3597 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3598 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
3599 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3602 #define _mm_i32scatter_epi32(addr, index, v1, scale) \
3603 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
3604 (__v4si)(__m128i)(index), \
3605 (__v4si)(__m128i)(v1), (int)(scale))
3607 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3608 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
3609 (__v4si)(__m128i)(index), \
3610 (__v4si)(__m128i)(v1), (int)(scale))
3612 #define _mm256_i32scatter_ps(addr, index, v1, scale) \
3613 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
3614 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3617 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3618 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
3619 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3622 #define _mm256_i32scatter_epi32(addr, index, v1, scale) \
3623 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
3624 (__v8si)(__m256i)(index), \
3625 (__v8si)(__m256i)(v1), (int)(scale))
3627 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3628 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
3629 (__v8si)(__m256i)(index), \
3630 (__v8si)(__m256i)(v1), (int)(scale))
3634 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3641 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3648 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3655 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3662 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3669 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3676 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3683 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3690 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3697 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3704 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3711 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3718 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3725 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3732 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3739 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3746 return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
3753 return (__m128i)__builtin_ia32_selectd_128(__U,
3761 return (__m128i)__builtin_ia32_selectd_128(__U,
3769 return (__m128i)__builtin_ia32_selectd_128(__U,
3776 return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
3783 return (__m256i)__builtin_ia32_selectd_256(__U,
3791 return (__m256i)__builtin_ia32_selectd_256(__U,
3799 return (__m256i)__builtin_ia32_selectd_256(__U,
3806 return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
3812 return (__m128d)__builtin_ia32_selectpd_128(__U,
3819 return (__m128d)__builtin_ia32_selectpd_128(__U,
3821 (__v2df)(__m128d)__I);
3826 return (__m128d)__builtin_ia32_selectpd_128(__U,
3833 return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
3840 return (__m256d)__builtin_ia32_selectpd_256(__U,
3848 return (__m256d)__builtin_ia32_selectpd_256(__U,
3850 (__v4df)(__m256d)__I);
3856 return (__m256d)__builtin_ia32_selectpd_256(__U,
3863 return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
3869 return (__m128)__builtin_ia32_selectps_128(__U,
3876 return (__m128)__builtin_ia32_selectps_128(__U,
3878 (__v4sf)(__m128)__I);
3883 return (__m128)__builtin_ia32_selectps_128(__U,
3890 return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
3896 return (__m256)__builtin_ia32_selectps_256(__U,
3904 return (__m256)__builtin_ia32_selectps_256(__U,
3906 (__v8sf)(__m256)__I);
3912 return (__m256)__builtin_ia32_selectps_256(__U,
3919 return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
3926 return (__m128i)__builtin_ia32_selectq_128(__U,
3934 return (__m128i)__builtin_ia32_selectq_128(__U,
3942 return (__m128i)__builtin_ia32_selectq_128(__U,
3950 return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
3957 return (__m256i)__builtin_ia32_selectq_256(__U,
3965 return (__m256i)__builtin_ia32_selectq_256(__U,
3973 return (__m256i)__builtin_ia32_selectq_256(__U,
3981 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
3989 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
3997 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4005 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4013 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4021 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4029 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4037 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4045 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4053 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4061 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4069 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4077 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4085 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4093 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4101 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4109 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4117 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4125 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4133 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4142 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4150 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4158 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4166 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4174 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4182 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4190 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4198 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4206 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4214 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4222 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4230 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4238 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4246 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4254 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4262 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4270 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4278 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4286 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4294 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4300 #define _mm_rol_epi32(a, b) \
4301 (__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b))
4303 #define _mm_mask_rol_epi32(w, u, a, b) \
4304 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4305 (__v4si)_mm_rol_epi32((a), (b)), \
4306 (__v4si)(__m128i)(w))
4308 #define _mm_maskz_rol_epi32(u, a, b) \
4309 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4310 (__v4si)_mm_rol_epi32((a), (b)), \
4311 (__v4si)_mm_setzero_si128())
4313 #define _mm256_rol_epi32(a, b) \
4314 (__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b))
4316 #define _mm256_mask_rol_epi32(w, u, a, b) \
4317 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4318 (__v8si)_mm256_rol_epi32((a), (b)), \
4319 (__v8si)(__m256i)(w))
4321 #define _mm256_maskz_rol_epi32(u, a, b) \
4322 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4323 (__v8si)_mm256_rol_epi32((a), (b)), \
4324 (__v8si)_mm256_setzero_si256())
4326 #define _mm_rol_epi64(a, b) \
4327 (__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b))
4329 #define _mm_mask_rol_epi64(w, u, a, b) \
4330 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4331 (__v2di)_mm_rol_epi64((a), (b)), \
4332 (__v2di)(__m128i)(w))
4334 #define _mm_maskz_rol_epi64(u, a, b) \
4335 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4336 (__v2di)_mm_rol_epi64((a), (b)), \
4337 (__v2di)_mm_setzero_si128())
4339 #define _mm256_rol_epi64(a, b) \
4340 (__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b))
4342 #define _mm256_mask_rol_epi64(w, u, a, b) \
4343 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4344 (__v4di)_mm256_rol_epi64((a), (b)), \
4345 (__v4di)(__m256i)(w))
4347 #define _mm256_maskz_rol_epi64(u, a, b) \
4348 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4349 (__v4di)_mm256_rol_epi64((a), (b)), \
4350 (__v4di)_mm256_setzero_si256())
4355 return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
4361 return (__m128i)__builtin_ia32_selectd_128(__U,
4369 return (__m128i)__builtin_ia32_selectd_128(__U,
4377 return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
4383 return (__m256i)__builtin_ia32_selectd_256(__U,
4391 return (__m256i)__builtin_ia32_selectd_256(__U,
4399 return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
4405 return (__m128i)__builtin_ia32_selectq_128(__U,
4413 return (__m128i)__builtin_ia32_selectq_128(__U,
4421 return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
4427 return (__m256i)__builtin_ia32_selectq_256(__U,
4435 return (__m256i)__builtin_ia32_selectq_256(__U,
4440 #define _mm_ror_epi32(a, b) \
4441 (__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b))
4443 #define _mm_mask_ror_epi32(w, u, a, b) \
4444 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4445 (__v4si)_mm_ror_epi32((a), (b)), \
4446 (__v4si)(__m128i)(w))
4448 #define _mm_maskz_ror_epi32(u, a, b) \
4449 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4450 (__v4si)_mm_ror_epi32((a), (b)), \
4451 (__v4si)_mm_setzero_si128())
4453 #define _mm256_ror_epi32(a, b) \
4454 (__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b))
4456 #define _mm256_mask_ror_epi32(w, u, a, b) \
4457 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4458 (__v8si)_mm256_ror_epi32((a), (b)), \
4459 (__v8si)(__m256i)(w))
4461 #define _mm256_maskz_ror_epi32(u, a, b) \
4462 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4463 (__v8si)_mm256_ror_epi32((a), (b)), \
4464 (__v8si)_mm256_setzero_si256())
4466 #define _mm_ror_epi64(a, b) \
4467 (__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b))
4469 #define _mm_mask_ror_epi64(w, u, a, b) \
4470 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4471 (__v2di)_mm_ror_epi64((a), (b)), \
4472 (__v2di)(__m128i)(w))
4474 #define _mm_maskz_ror_epi64(u, a, b) \
4475 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4476 (__v2di)_mm_ror_epi64((a), (b)), \
4477 (__v2di)_mm_setzero_si128())
4479 #define _mm256_ror_epi64(a, b) \
4480 (__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b))
4482 #define _mm256_mask_ror_epi64(w, u, a, b) \
4483 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4484 (__v4di)_mm256_ror_epi64((a), (b)), \
4485 (__v4di)(__m256i)(w))
4487 #define _mm256_maskz_ror_epi64(u, a, b) \
4488 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4489 (__v4di)_mm256_ror_epi64((a), (b)), \
4490 (__v4di)_mm256_setzero_si256())
4495 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4503 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4511 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4519 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4527 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4535 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4543 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4551 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4559 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4567 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4575 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4583 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4591 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4599 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4607 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4615 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4623 return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
4629 return (__m128i)__builtin_ia32_selectd_128(__U,
4637 return (__m128i)__builtin_ia32_selectd_128(__U,
4645 return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
4651 return (__m256i)__builtin_ia32_selectd_256(__U,
4659 return (__m256i)__builtin_ia32_selectd_256(__U,
4667 return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
4673 return (__m128i)__builtin_ia32_selectq_128(__U,
4681 return (__m128i)__builtin_ia32_selectq_128(__U,
4689 return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
4695 return (__m256i)__builtin_ia32_selectq_256(__U,
4703 return (__m256i)__builtin_ia32_selectq_256(__U,
4711 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4719 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4727 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4735 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4743 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4751 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4759 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4767 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4775 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4783 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4791 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4799 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4807 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4815 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4823 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4831 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4839 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4847 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4855 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4863 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4871 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4879 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4887 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4895 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4903 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4911 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4919 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4927 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4935 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4943 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4951 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4959 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4967 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4975 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4983 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4991 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4999 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
5005 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5013 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5021 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
5027 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5035 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5043 return (__m128i) __builtin_ia32_selectd_128 ((
__mmask8) __U,
5051 return (__m128i) __builtin_ia32_selectd_128 ((
__mmask8) __U,
5060 return (__m256i) __builtin_ia32_selectd_256 ((
__mmask8) __U,
5068 return (__m256i) __builtin_ia32_selectd_256 ((
__mmask8) __U,
5076 return *(
const __m128i *) __P;
5082 return (__m128i) __builtin_ia32_movdqa32load128_mask ((
const __v4si *) __P,
5091 return (__m128i) __builtin_ia32_movdqa32load128_mask ((
const __v4si *) __P,
5101 return *(
const __m256i *) __P;
5107 return (__m256i) __builtin_ia32_movdqa32load256_mask ((
const __v8si *) __P,
5116 return (__m256i) __builtin_ia32_movdqa32load256_mask ((
const __v8si *) __P,
5126 *(__m128i *) __P = __A;
5132 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5140 *(__m256i *) __P = __A;
5146 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5154 return (__m128i) __builtin_ia32_selectq_128 ((
__mmask8) __U,
5162 return (__m128i) __builtin_ia32_selectq_128 ((
__mmask8) __U,
5170 return (__m256i) __builtin_ia32_selectq_256 ((
__mmask8) __U,
5178 return (__m256i) __builtin_ia32_selectq_256 ((
__mmask8) __U,
5186 return *(
const __m128i *) __P;
5192 return (__m128i) __builtin_ia32_movdqa64load128_mask ((
const __v2di *) __P,
5201 return (__m128i) __builtin_ia32_movdqa64load128_mask ((
const __v2di *) __P,
5211 return *(
const __m256i *) __P;
5217 return (__m256i) __builtin_ia32_movdqa64load256_mask ((
const __v4di *) __P,
5226 return (__m256i) __builtin_ia32_movdqa64load256_mask ((
const __v4di *) __P,
5236 *(__m128i *) __P = __A;
5242 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5250 *(__m256i *) __P = __A;
5256 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5264 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5272 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5280 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5288 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5296 return (__m128i)__builtin_ia32_selectd_128(__M,
5304 return (__m128i)__builtin_ia32_selectd_128(__M,
5312 return (__m256i)__builtin_ia32_selectd_256(__M,
5320 return (__m256i)__builtin_ia32_selectd_256(__M,
5329 return (__m128i) __builtin_ia32_selectq_128(__M,
5337 return (__m128i) __builtin_ia32_selectq_128(__M,
5345 return (__m256i) __builtin_ia32_selectq_256(__M,
5353 return (__m256i) __builtin_ia32_selectq_256(__M,
5358 #define _mm_fixupimm_pd(A, B, C, imm) \
5359 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5360 (__v2df)(__m128d)(B), \
5361 (__v2di)(__m128i)(C), (int)(imm), \
5364 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
5365 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5366 (__v2df)(__m128d)(B), \
5367 (__v2di)(__m128i)(C), (int)(imm), \
5370 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
5371 (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5372 (__v2df)(__m128d)(B), \
5373 (__v2di)(__m128i)(C), \
5374 (int)(imm), (__mmask8)(U))
5376 #define _mm256_fixupimm_pd(A, B, C, imm) \
5377 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5378 (__v4df)(__m256d)(B), \
5379 (__v4di)(__m256i)(C), (int)(imm), \
5382 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
5383 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5384 (__v4df)(__m256d)(B), \
5385 (__v4di)(__m256i)(C), (int)(imm), \
5388 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
5389 (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5390 (__v4df)(__m256d)(B), \
5391 (__v4di)(__m256i)(C), \
5392 (int)(imm), (__mmask8)(U))
5394 #define _mm_fixupimm_ps(A, B, C, imm) \
5395 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5396 (__v4sf)(__m128)(B), \
5397 (__v4si)(__m128i)(C), (int)(imm), \
5400 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
5401 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5402 (__v4sf)(__m128)(B), \
5403 (__v4si)(__m128i)(C), (int)(imm), \
5406 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
5407 (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5408 (__v4sf)(__m128)(B), \
5409 (__v4si)(__m128i)(C), (int)(imm), \
5412 #define _mm256_fixupimm_ps(A, B, C, imm) \
5413 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5414 (__v8sf)(__m256)(B), \
5415 (__v8si)(__m256i)(C), (int)(imm), \
5418 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
5419 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5420 (__v8sf)(__m256)(B), \
5421 (__v8si)(__m256i)(C), (int)(imm), \
5424 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
5425 (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5426 (__v8sf)(__m256)(B), \
5427 (__v8si)(__m256i)(C), (int)(imm), \
5433 return (__m128d) __builtin_ia32_loadapd128_mask ((
const __v2df *) __P,