clang  8.0.0svn
avx512fintrin.h
Go to the documentation of this file.
1 /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  *
21  *===-----------------------------------------------------------------------===
22  */
23 #ifndef __IMMINTRIN_H
24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25 #endif
26 
27 #ifndef __AVX512FINTRIN_H
28 #define __AVX512FINTRIN_H
29 
30 typedef char __v64qi __attribute__((__vector_size__(64)));
31 typedef short __v32hi __attribute__((__vector_size__(64)));
32 typedef double __v8df __attribute__((__vector_size__(64)));
33 typedef float __v16sf __attribute__((__vector_size__(64)));
34 typedef long long __v8di __attribute__((__vector_size__(64)));
35 typedef int __v16si __attribute__((__vector_size__(64)));
36 
37 /* Unsigned types */
38 typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
39 typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
40 typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
41 typedef unsigned int __v16su __attribute__((__vector_size__(64)));
42 
43 typedef float __m512 __attribute__((__vector_size__(64)));
44 typedef double __m512d __attribute__((__vector_size__(64)));
45 typedef long long __m512i __attribute__((__vector_size__(64)));
46 
47 typedef unsigned char __mmask8;
48 typedef unsigned short __mmask16;
49 
50 /* Rounding mode macros. */
51 #define _MM_FROUND_TO_NEAREST_INT 0x00
52 #define _MM_FROUND_TO_NEG_INF 0x01
53 #define _MM_FROUND_TO_POS_INF 0x02
54 #define _MM_FROUND_TO_ZERO 0x03
55 #define _MM_FROUND_CUR_DIRECTION 0x04
56 
57 /* Constants for integer comparison predicates */
58 typedef enum {
59  _MM_CMPINT_EQ, /* Equal */
60  _MM_CMPINT_LT, /* Less than */
61  _MM_CMPINT_LE, /* Less than or Equal */
63  _MM_CMPINT_NE, /* Not Equal */
64  _MM_CMPINT_NLT, /* Not Less than */
65 #define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */
66  _MM_CMPINT_NLE /* Not Less than or Equal */
67 #define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */
69 
70 typedef enum
71 {
72  _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
73  _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
74  _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
75  _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
76  _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
77  _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
78  _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
79  _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
80  _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
81  _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
82  _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
83  _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
84  _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
85  _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
86  _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
87  _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
88  _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
89  _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
90  _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
91  _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
92  _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
93  _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
94  _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
95  _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
96  _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
97  _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
98  _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
99  _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
100  _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
101  _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
102  _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
103  _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
104  _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
105  _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
106  _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
107  _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
108  _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
109  _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
110  _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
111  _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
112  _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
113  _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
114  _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
115  _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
116  _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
117  _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
118  _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
119  _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
120  _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
121  _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
122  _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
123  _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
124  _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
125  _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
126  _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
127  _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
128  _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
129  _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
130  _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
131  _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
132  _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
133  _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
134  _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
135  _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
136  _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
137  _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
138  _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
139  _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
140  _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
141  _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
142  _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
143  _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
144  _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
145  _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
146  _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
147  _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
148  _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
149  _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
150  _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
151  _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
152  _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
153  _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
154  _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
155  _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
156  _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
158 } _MM_PERM_ENUM;
159 
160 typedef enum
161 {
162  _MM_MANT_NORM_1_2, /* interval [1, 2) */
163  _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
164  _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
165  _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
167 
168 typedef enum
169 {
170  _MM_MANT_SIGN_src, /* sign = sign(SRC) */
171  _MM_MANT_SIGN_zero, /* sign = 0 */
172  _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
174 
175 /* Define the default attributes for the functions in this file. */
176 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
177 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
178 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
179 
180 /* Create vectors with repeated elements */
181 
182 static __inline __m512i __DEFAULT_FN_ATTRS512
184 {
185  return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
186 }
187 
188 #define _mm512_setzero_epi32 _mm512_setzero_si512
189 
190 static __inline__ __m512d __DEFAULT_FN_ATTRS512
192 {
193  return (__m512d)__builtin_ia32_undef512();
194 }
195 
196 static __inline__ __m512 __DEFAULT_FN_ATTRS512
198 {
199  return (__m512)__builtin_ia32_undef512();
200 }
201 
202 static __inline__ __m512 __DEFAULT_FN_ATTRS512
204 {
205  return (__m512)__builtin_ia32_undef512();
206 }
207 
208 static __inline__ __m512i __DEFAULT_FN_ATTRS512
210 {
211  return (__m512i)__builtin_ia32_undef512();
212 }
213 
214 static __inline__ __m512i __DEFAULT_FN_ATTRS512
216 {
217  return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
218  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
219 }
220 
221 static __inline__ __m512i __DEFAULT_FN_ATTRS512
222 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
223 {
224  return (__m512i)__builtin_ia32_selectd_512(__M,
225  (__v16si) _mm512_broadcastd_epi32(__A),
226  (__v16si) __O);
227 }
228 
229 static __inline__ __m512i __DEFAULT_FN_ATTRS512
230 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
231 {
232  return (__m512i)__builtin_ia32_selectd_512(__M,
233  (__v16si) _mm512_broadcastd_epi32(__A),
234  (__v16si) _mm512_setzero_si512());
235 }
236 
237 static __inline__ __m512i __DEFAULT_FN_ATTRS512
239 {
240  return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
241  0, 0, 0, 0, 0, 0, 0, 0);
242 }
243 
244 static __inline__ __m512i __DEFAULT_FN_ATTRS512
245 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
246 {
247  return (__m512i)__builtin_ia32_selectq_512(__M,
248  (__v8di) _mm512_broadcastq_epi64(__A),
249  (__v8di) __O);
250 
251 }
252 
253 static __inline__ __m512i __DEFAULT_FN_ATTRS512
254 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
255 {
256  return (__m512i)__builtin_ia32_selectq_512(__M,
257  (__v8di) _mm512_broadcastq_epi64(__A),
258  (__v8di) _mm512_setzero_si512());
259 }
260 
261 
262 static __inline __m512 __DEFAULT_FN_ATTRS512
264 {
265  return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
266  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
267 }
268 
269 #define _mm512_setzero _mm512_setzero_ps
270 
271 static __inline __m512d __DEFAULT_FN_ATTRS512
273 {
274  return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
275 }
276 
277 static __inline __m512 __DEFAULT_FN_ATTRS512
278 _mm512_set1_ps(float __w)
279 {
280  return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
281  __w, __w, __w, __w, __w, __w, __w, __w };
282 }
283 
284 static __inline __m512d __DEFAULT_FN_ATTRS512
285 _mm512_set1_pd(double __w)
286 {
287  return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
288 }
289 
290 static __inline __m512i __DEFAULT_FN_ATTRS512
292 {
293  return __extension__ (__m512i)(__v64qi){
294  __w, __w, __w, __w, __w, __w, __w, __w,
295  __w, __w, __w, __w, __w, __w, __w, __w,
296  __w, __w, __w, __w, __w, __w, __w, __w,
297  __w, __w, __w, __w, __w, __w, __w, __w,
298  __w, __w, __w, __w, __w, __w, __w, __w,
299  __w, __w, __w, __w, __w, __w, __w, __w,
300  __w, __w, __w, __w, __w, __w, __w, __w,
301  __w, __w, __w, __w, __w, __w, __w, __w };
302 }
303 
304 static __inline __m512i __DEFAULT_FN_ATTRS512
306 {
307  return __extension__ (__m512i)(__v32hi){
308  __w, __w, __w, __w, __w, __w, __w, __w,
309  __w, __w, __w, __w, __w, __w, __w, __w,
310  __w, __w, __w, __w, __w, __w, __w, __w,
311  __w, __w, __w, __w, __w, __w, __w, __w };
312 }
313 
314 static __inline __m512i __DEFAULT_FN_ATTRS512
316 {
317  return __extension__ (__m512i)(__v16si){
318  __s, __s, __s, __s, __s, __s, __s, __s,
319  __s, __s, __s, __s, __s, __s, __s, __s };
320 }
321 
322 static __inline __m512i __DEFAULT_FN_ATTRS512
323 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
324 {
325  return (__m512i)__builtin_ia32_selectd_512(__M,
326  (__v16si)_mm512_set1_epi32(__A),
327  (__v16si)_mm512_setzero_si512());
328 }
329 
330 static __inline __m512i __DEFAULT_FN_ATTRS512
331 _mm512_set1_epi64(long long __d)
332 {
333  return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
334 }
335 
336 static __inline __m512i __DEFAULT_FN_ATTRS512
337 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
338 {
339  return (__m512i)__builtin_ia32_selectq_512(__M,
340  (__v8di)_mm512_set1_epi64(__A),
341  (__v8di)_mm512_setzero_si512());
342 }
343 
344 static __inline__ __m512 __DEFAULT_FN_ATTRS512
346 {
347  return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
348  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
349 }
350 
351 static __inline __m512i __DEFAULT_FN_ATTRS512
352 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
353 {
354  return __extension__ (__m512i)(__v16si)
355  { __D, __C, __B, __A, __D, __C, __B, __A,
356  __D, __C, __B, __A, __D, __C, __B, __A };
357 }
358 
359 static __inline __m512i __DEFAULT_FN_ATTRS512
360 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
361  long long __D)
362 {
363  return __extension__ (__m512i) (__v8di)
364  { __D, __C, __B, __A, __D, __C, __B, __A };
365 }
366 
367 static __inline __m512d __DEFAULT_FN_ATTRS512
368 _mm512_set4_pd (double __A, double __B, double __C, double __D)
369 {
370  return __extension__ (__m512d)
371  { __D, __C, __B, __A, __D, __C, __B, __A };
372 }
373 
374 static __inline __m512 __DEFAULT_FN_ATTRS512
375 _mm512_set4_ps (float __A, float __B, float __C, float __D)
376 {
377  return __extension__ (__m512)
378  { __D, __C, __B, __A, __D, __C, __B, __A,
379  __D, __C, __B, __A, __D, __C, __B, __A };
380 }
381 
382 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
383  _mm512_set4_epi32((e3),(e2),(e1),(e0))
384 
385 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
386  _mm512_set4_epi64((e3),(e2),(e1),(e0))
387 
388 #define _mm512_setr4_pd(e0,e1,e2,e3) \
389  _mm512_set4_pd((e3),(e2),(e1),(e0))
390 
391 #define _mm512_setr4_ps(e0,e1,e2,e3) \
392  _mm512_set4_ps((e3),(e2),(e1),(e0))
393 
394 static __inline__ __m512d __DEFAULT_FN_ATTRS512
396 {
397  return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
398  0, 0, 0, 0, 0, 0, 0, 0);
399 }
400 
401 /* Cast between vector types */
402 
403 static __inline __m512d __DEFAULT_FN_ATTRS512
405 {
406  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
407 }
408 
409 static __inline __m512 __DEFAULT_FN_ATTRS512
411 {
412  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
413  -1, -1, -1, -1, -1, -1, -1, -1);
414 }
415 
416 static __inline __m128d __DEFAULT_FN_ATTRS512
418 {
419  return __builtin_shufflevector(__a, __a, 0, 1);
420 }
421 
422 static __inline __m256d __DEFAULT_FN_ATTRS512
424 {
425  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
426 }
427 
428 static __inline __m128 __DEFAULT_FN_ATTRS512
430 {
431  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
432 }
433 
434 static __inline __m256 __DEFAULT_FN_ATTRS512
436 {
437  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
438 }
439 
440 static __inline __m512 __DEFAULT_FN_ATTRS512
441 _mm512_castpd_ps (__m512d __A)
442 {
443  return (__m512) (__A);
444 }
445 
446 static __inline __m512i __DEFAULT_FN_ATTRS512
447 _mm512_castpd_si512 (__m512d __A)
448 {
449  return (__m512i) (__A);
450 }
451 
452 static __inline__ __m512d __DEFAULT_FN_ATTRS512
454 {
455  return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
456 }
457 
458 static __inline __m512d __DEFAULT_FN_ATTRS512
459 _mm512_castps_pd (__m512 __A)
460 {
461  return (__m512d) (__A);
462 }
463 
464 static __inline __m512i __DEFAULT_FN_ATTRS512
466 {
467  return (__m512i) (__A);
468 }
469 
470 static __inline__ __m512 __DEFAULT_FN_ATTRS512
472 {
473  return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
474 }
475 
476 static __inline__ __m512i __DEFAULT_FN_ATTRS512
478 {
479  return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
480 }
481 
482 static __inline__ __m512i __DEFAULT_FN_ATTRS512
484 {
485  return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
486 }
487 
488 static __inline __m512 __DEFAULT_FN_ATTRS512
489 _mm512_castsi512_ps (__m512i __A)
490 {
491  return (__m512) (__A);
492 }
493 
494 static __inline __m512d __DEFAULT_FN_ATTRS512
495 _mm512_castsi512_pd (__m512i __A)
496 {
497  return (__m512d) (__A);
498 }
499 
500 static __inline __m128i __DEFAULT_FN_ATTRS512
502 {
503  return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
504 }
505 
506 static __inline __m256i __DEFAULT_FN_ATTRS512
508 {
509  return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
510 }
511 
512 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
514 {
515  return (__mmask16)__a;
516 }
517 
518 static __inline__ int __DEFAULT_FN_ATTRS
520 {
521  return (int)__a;
522 }
523 
524 /// Constructs a 512-bit floating-point vector of [8 x double] from a
525 /// 128-bit floating-point vector of [2 x double]. The lower 128 bits
526 /// contain the value of the source vector. The upper 384 bits are set
527 /// to zero.
528 ///
529 /// \headerfile <x86intrin.h>
530 ///
531 /// This intrinsic has no corresponding instruction.
532 ///
533 /// \param __a
534 /// A 128-bit vector of [2 x double].
535 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
536 /// contain the value of the parameter. The upper 384 bits are set to zero.
537 static __inline __m512d __DEFAULT_FN_ATTRS512
539 {
540  return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
541 }
542 
543 /// Constructs a 512-bit floating-point vector of [8 x double] from a
544 /// 256-bit floating-point vector of [4 x double]. The lower 256 bits
545 /// contain the value of the source vector. The upper 256 bits are set
546 /// to zero.
547 ///
548 /// \headerfile <x86intrin.h>
549 ///
550 /// This intrinsic has no corresponding instruction.
551 ///
552 /// \param __a
553 /// A 256-bit vector of [4 x double].
554 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
555 /// contain the value of the parameter. The upper 256 bits are set to zero.
556 static __inline __m512d __DEFAULT_FN_ATTRS512
558 {
559  return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
560 }
561 
562 /// Constructs a 512-bit floating-point vector of [16 x float] from a
563 /// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
564 /// the value of the source vector. The upper 384 bits are set to zero.
565 ///
566 /// \headerfile <x86intrin.h>
567 ///
568 /// This intrinsic has no corresponding instruction.
569 ///
570 /// \param __a
571 /// A 128-bit vector of [4 x float].
572 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
573 /// contain the value of the parameter. The upper 384 bits are set to zero.
574 static __inline __m512 __DEFAULT_FN_ATTRS512
576 {
577  return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
578 }
579 
580 /// Constructs a 512-bit floating-point vector of [16 x float] from a
581 /// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain
582 /// the value of the source vector. The upper 256 bits are set to zero.
583 ///
584 /// \headerfile <x86intrin.h>
585 ///
586 /// This intrinsic has no corresponding instruction.
587 ///
588 /// \param __a
589 /// A 256-bit vector of [8 x float].
590 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
591 /// contain the value of the parameter. The upper 256 bits are set to zero.
592 static __inline __m512 __DEFAULT_FN_ATTRS512
594 {
595  return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
596 }
597 
598 /// Constructs a 512-bit integer vector from a 128-bit integer vector.
599 /// The lower 128 bits contain the value of the source vector. The upper
600 /// 384 bits are set to zero.
601 ///
602 /// \headerfile <x86intrin.h>
603 ///
604 /// This intrinsic has no corresponding instruction.
605 ///
606 /// \param __a
607 /// A 128-bit integer vector.
608 /// \returns A 512-bit integer vector. The lower 128 bits contain the value of
609 /// the parameter. The upper 384 bits are set to zero.
610 static __inline __m512i __DEFAULT_FN_ATTRS512
612 {
613  return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
614 }
615 
616 /// Constructs a 512-bit integer vector from a 256-bit integer vector.
617 /// The lower 256 bits contain the value of the source vector. The upper
618 /// 256 bits are set to zero.
619 ///
620 /// \headerfile <x86intrin.h>
621 ///
622 /// This intrinsic has no corresponding instruction.
623 ///
624 /// \param __a
625 /// A 256-bit integer vector.
626 /// \returns A 512-bit integer vector. The lower 256 bits contain the value of
627 /// the parameter. The upper 256 bits are set to zero.
628 static __inline __m512i __DEFAULT_FN_ATTRS512
630 {
631  return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
632 }
633 
634 /* Bitwise operators */
635 static __inline__ __m512i __DEFAULT_FN_ATTRS512
636 _mm512_and_epi32(__m512i __a, __m512i __b)
637 {
638  return (__m512i)((__v16su)__a & (__v16su)__b);
639 }
640 
641 static __inline__ __m512i __DEFAULT_FN_ATTRS512
642 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
643 {
644  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
645  (__v16si) _mm512_and_epi32(__a, __b),
646  (__v16si) __src);
647 }
648 
649 static __inline__ __m512i __DEFAULT_FN_ATTRS512
650 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
651 {
652  return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
653  __k, __a, __b);
654 }
655 
656 static __inline__ __m512i __DEFAULT_FN_ATTRS512
657 _mm512_and_epi64(__m512i __a, __m512i __b)
658 {
659  return (__m512i)((__v8du)__a & (__v8du)__b);
660 }
661 
662 static __inline__ __m512i __DEFAULT_FN_ATTRS512
663 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
664 {
665  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
666  (__v8di) _mm512_and_epi64(__a, __b),
667  (__v8di) __src);
668 }
669 
670 static __inline__ __m512i __DEFAULT_FN_ATTRS512
671 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
672 {
673  return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
674  __k, __a, __b);
675 }
676 
677 static __inline__ __m512i __DEFAULT_FN_ATTRS512
678 _mm512_andnot_si512 (__m512i __A, __m512i __B)
679 {
680  return (__m512i)(~(__v8du)__A & (__v8du)__B);
681 }
682 
683 static __inline__ __m512i __DEFAULT_FN_ATTRS512
684 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
685 {
686  return (__m512i)(~(__v16su)__A & (__v16su)__B);
687 }
688 
689 static __inline__ __m512i __DEFAULT_FN_ATTRS512
690 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
691 {
692  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
693  (__v16si)_mm512_andnot_epi32(__A, __B),
694  (__v16si)__W);
695 }
696 
697 static __inline__ __m512i __DEFAULT_FN_ATTRS512
698 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
699 {
701  __U, __A, __B);
702 }
703 
704 static __inline__ __m512i __DEFAULT_FN_ATTRS512
705 _mm512_andnot_epi64(__m512i __A, __m512i __B)
706 {
707  return (__m512i)(~(__v8du)__A & (__v8du)__B);
708 }
709 
710 static __inline__ __m512i __DEFAULT_FN_ATTRS512
711 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
712 {
713  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
714  (__v8di)_mm512_andnot_epi64(__A, __B),
715  (__v8di)__W);
716 }
717 
718 static __inline__ __m512i __DEFAULT_FN_ATTRS512
719 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
720 {
722  __U, __A, __B);
723 }
724 
725 static __inline__ __m512i __DEFAULT_FN_ATTRS512
726 _mm512_or_epi32(__m512i __a, __m512i __b)
727 {
728  return (__m512i)((__v16su)__a | (__v16su)__b);
729 }
730 
731 static __inline__ __m512i __DEFAULT_FN_ATTRS512
732 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
733 {
734  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
735  (__v16si)_mm512_or_epi32(__a, __b),
736  (__v16si)__src);
737 }
738 
739 static __inline__ __m512i __DEFAULT_FN_ATTRS512
740 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
741 {
742  return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
743 }
744 
745 static __inline__ __m512i __DEFAULT_FN_ATTRS512
746 _mm512_or_epi64(__m512i __a, __m512i __b)
747 {
748  return (__m512i)((__v8du)__a | (__v8du)__b);
749 }
750 
751 static __inline__ __m512i __DEFAULT_FN_ATTRS512
752 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
753 {
754  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
755  (__v8di)_mm512_or_epi64(__a, __b),
756  (__v8di)__src);
757 }
758 
759 static __inline__ __m512i __DEFAULT_FN_ATTRS512
760 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
761 {
762  return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
763 }
764 
765 static __inline__ __m512i __DEFAULT_FN_ATTRS512
766 _mm512_xor_epi32(__m512i __a, __m512i __b)
767 {
768  return (__m512i)((__v16su)__a ^ (__v16su)__b);
769 }
770 
771 static __inline__ __m512i __DEFAULT_FN_ATTRS512
772 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
773 {
774  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
775  (__v16si)_mm512_xor_epi32(__a, __b),
776  (__v16si)__src);
777 }
778 
779 static __inline__ __m512i __DEFAULT_FN_ATTRS512
780 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
781 {
782  return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
783 }
784 
785 static __inline__ __m512i __DEFAULT_FN_ATTRS512
786 _mm512_xor_epi64(__m512i __a, __m512i __b)
787 {
788  return (__m512i)((__v8du)__a ^ (__v8du)__b);
789 }
790 
791 static __inline__ __m512i __DEFAULT_FN_ATTRS512
792 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
793 {
794  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
795  (__v8di)_mm512_xor_epi64(__a, __b),
796  (__v8di)__src);
797 }
798 
799 static __inline__ __m512i __DEFAULT_FN_ATTRS512
800 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
801 {
802  return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
803 }
804 
805 static __inline__ __m512i __DEFAULT_FN_ATTRS512
806 _mm512_and_si512(__m512i __a, __m512i __b)
807 {
808  return (__m512i)((__v8du)__a & (__v8du)__b);
809 }
810 
811 static __inline__ __m512i __DEFAULT_FN_ATTRS512
812 _mm512_or_si512(__m512i __a, __m512i __b)
813 {
814  return (__m512i)((__v8du)__a | (__v8du)__b);
815 }
816 
817 static __inline__ __m512i __DEFAULT_FN_ATTRS512
818 _mm512_xor_si512(__m512i __a, __m512i __b)
819 {
820  return (__m512i)((__v8du)__a ^ (__v8du)__b);
821 }
822 
823 /* Arithmetic */
824 
825 static __inline __m512d __DEFAULT_FN_ATTRS512
826 _mm512_add_pd(__m512d __a, __m512d __b)
827 {
828  return (__m512d)((__v8df)__a + (__v8df)__b);
829 }
830 
831 static __inline __m512 __DEFAULT_FN_ATTRS512
832 _mm512_add_ps(__m512 __a, __m512 __b)
833 {
834  return (__m512)((__v16sf)__a + (__v16sf)__b);
835 }
836 
837 static __inline __m512d __DEFAULT_FN_ATTRS512
838 _mm512_mul_pd(__m512d __a, __m512d __b)
839 {
840  return (__m512d)((__v8df)__a * (__v8df)__b);
841 }
842 
843 static __inline __m512 __DEFAULT_FN_ATTRS512
844 _mm512_mul_ps(__m512 __a, __m512 __b)
845 {
846  return (__m512)((__v16sf)__a * (__v16sf)__b);
847 }
848 
849 static __inline __m512d __DEFAULT_FN_ATTRS512
850 _mm512_sub_pd(__m512d __a, __m512d __b)
851 {
852  return (__m512d)((__v8df)__a - (__v8df)__b);
853 }
854 
855 static __inline __m512 __DEFAULT_FN_ATTRS512
856 _mm512_sub_ps(__m512 __a, __m512 __b)
857 {
858  return (__m512)((__v16sf)__a - (__v16sf)__b);
859 }
860 
861 static __inline__ __m512i __DEFAULT_FN_ATTRS512
862 _mm512_add_epi64 (__m512i __A, __m512i __B)
863 {
864  return (__m512i) ((__v8du) __A + (__v8du) __B);
865 }
866 
867 static __inline__ __m512i __DEFAULT_FN_ATTRS512
868 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
869 {
870  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
871  (__v8di)_mm512_add_epi64(__A, __B),
872  (__v8di)__W);
873 }
874 
875 static __inline__ __m512i __DEFAULT_FN_ATTRS512
876 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
877 {
878  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
879  (__v8di)_mm512_add_epi64(__A, __B),
880  (__v8di)_mm512_setzero_si512());
881 }
882 
883 static __inline__ __m512i __DEFAULT_FN_ATTRS512
884 _mm512_sub_epi64 (__m512i __A, __m512i __B)
885 {
886  return (__m512i) ((__v8du) __A - (__v8du) __B);
887 }
888 
889 static __inline__ __m512i __DEFAULT_FN_ATTRS512
890 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
891 {
892  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
893  (__v8di)_mm512_sub_epi64(__A, __B),
894  (__v8di)__W);
895 }
896 
897 static __inline__ __m512i __DEFAULT_FN_ATTRS512
898 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
899 {
900  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
901  (__v8di)_mm512_sub_epi64(__A, __B),
902  (__v8di)_mm512_setzero_si512());
903 }
904 
905 static __inline__ __m512i __DEFAULT_FN_ATTRS512
906 _mm512_add_epi32 (__m512i __A, __m512i __B)
907 {
908  return (__m512i) ((__v16su) __A + (__v16su) __B);
909 }
910 
911 static __inline__ __m512i __DEFAULT_FN_ATTRS512
912 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
913 {
914  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
915  (__v16si)_mm512_add_epi32(__A, __B),
916  (__v16si)__W);
917 }
918 
919 static __inline__ __m512i __DEFAULT_FN_ATTRS512
920 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
921 {
922  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
923  (__v16si)_mm512_add_epi32(__A, __B),
924  (__v16si)_mm512_setzero_si512());
925 }
926 
927 static __inline__ __m512i __DEFAULT_FN_ATTRS512
928 _mm512_sub_epi32 (__m512i __A, __m512i __B)
929 {
930  return (__m512i) ((__v16su) __A - (__v16su) __B);
931 }
932 
933 static __inline__ __m512i __DEFAULT_FN_ATTRS512
934 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
935 {
936  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
937  (__v16si)_mm512_sub_epi32(__A, __B),
938  (__v16si)__W);
939 }
940 
941 static __inline__ __m512i __DEFAULT_FN_ATTRS512
942 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
943 {
944  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
945  (__v16si)_mm512_sub_epi32(__A, __B),
946  (__v16si)_mm512_setzero_si512());
947 }
948 
949 #define _mm512_max_round_pd(A, B, R) \
950  (__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
951  (__v8df)(__m512d)(B), (int)(R))
952 
953 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
954  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
955  (__v8df)_mm512_max_round_pd((A), (B), (R)), \
956  (__v8df)(W))
957 
958 #define _mm512_maskz_max_round_pd(U, A, B, R) \
959  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
960  (__v8df)_mm512_max_round_pd((A), (B), (R)), \
961  (__v8df)_mm512_setzero_pd())
962 
963 static __inline__ __m512d __DEFAULT_FN_ATTRS512
964 _mm512_max_pd(__m512d __A, __m512d __B)
965 {
966  return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
968 }
969 
970 static __inline__ __m512d __DEFAULT_FN_ATTRS512
971 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
972 {
973  return (__m512d)__builtin_ia32_selectpd_512(__U,
974  (__v8df)_mm512_max_pd(__A, __B),
975  (__v8df)__W);
976 }
977 
978 static __inline__ __m512d __DEFAULT_FN_ATTRS512
979 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
980 {
981  return (__m512d)__builtin_ia32_selectpd_512(__U,
982  (__v8df)_mm512_max_pd(__A, __B),
983  (__v8df)_mm512_setzero_pd());
984 }
985 
986 #define _mm512_max_round_ps(A, B, R) \
987  (__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
988  (__v16sf)(__m512)(B), (int)(R))
989 
990 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
991  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
992  (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
993  (__v16sf)(W))
994 
995 #define _mm512_maskz_max_round_ps(U, A, B, R) \
996  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
997  (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
998  (__v16sf)_mm512_setzero_ps())
999 
1000 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1001 _mm512_max_ps(__m512 __A, __m512 __B)
1002 {
1003  return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
1005 }
1006 
1007 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1008 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1009 {
1010  return (__m512)__builtin_ia32_selectps_512(__U,
1011  (__v16sf)_mm512_max_ps(__A, __B),
1012  (__v16sf)__W);
1013 }
1014 
1015 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1016 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1017 {
1018  return (__m512)__builtin_ia32_selectps_512(__U,
1019  (__v16sf)_mm512_max_ps(__A, __B),
1020  (__v16sf)_mm512_setzero_ps());
1021 }
1022 
1023 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1024 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1025  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1026  (__v4sf) __B,
1027  (__v4sf) __W,
1028  (__mmask8) __U,
1030 }
1031 
1032 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1033 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1034  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1035  (__v4sf) __B,
1036  (__v4sf) _mm_setzero_ps (),
1037  (__mmask8) __U,
1039 }
1040 
1041 #define _mm_max_round_ss(A, B, R) \
1042  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1043  (__v4sf)(__m128)(B), \
1044  (__v4sf)_mm_setzero_ps(), \
1045  (__mmask8)-1, (int)(R))
1046 
1047 #define _mm_mask_max_round_ss(W, U, A, B, R) \
1048  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1049  (__v4sf)(__m128)(B), \
1050  (__v4sf)(__m128)(W), (__mmask8)(U), \
1051  (int)(R))
1052 
1053 #define _mm_maskz_max_round_ss(U, A, B, R) \
1054  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1055  (__v4sf)(__m128)(B), \
1056  (__v4sf)_mm_setzero_ps(), \
1057  (__mmask8)(U), (int)(R))
1058 
1059 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1060 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1061  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1062  (__v2df) __B,
1063  (__v2df) __W,
1064  (__mmask8) __U,
1066 }
1067 
1068 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1069 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1070  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1071  (__v2df) __B,
1072  (__v2df) _mm_setzero_pd (),
1073  (__mmask8) __U,
1075 }
1076 
1077 #define _mm_max_round_sd(A, B, R) \
1078  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1079  (__v2df)(__m128d)(B), \
1080  (__v2df)_mm_setzero_pd(), \
1081  (__mmask8)-1, (int)(R))
1082 
1083 #define _mm_mask_max_round_sd(W, U, A, B, R) \
1084  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1085  (__v2df)(__m128d)(B), \
1086  (__v2df)(__m128d)(W), \
1087  (__mmask8)(U), (int)(R))
1088 
1089 #define _mm_maskz_max_round_sd(U, A, B, R) \
1090  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1091  (__v2df)(__m128d)(B), \
1092  (__v2df)_mm_setzero_pd(), \
1093  (__mmask8)(U), (int)(R))
1094 
1095 static __inline __m512i
1097 _mm512_max_epi32(__m512i __A, __m512i __B)
1098 {
1099  return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
1100 }
1101 
1102 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1103 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1104 {
1105  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1106  (__v16si)_mm512_max_epi32(__A, __B),
1107  (__v16si)__W);
1108 }
1109 
1110 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1111 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1112 {
1113  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1114  (__v16si)_mm512_max_epi32(__A, __B),
1115  (__v16si)_mm512_setzero_si512());
1116 }
1117 
1118 static __inline __m512i __DEFAULT_FN_ATTRS512
1119 _mm512_max_epu32(__m512i __A, __m512i __B)
1120 {
1121  return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
1122 }
1123 
1124 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1125 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1126 {
1127  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1128  (__v16si)_mm512_max_epu32(__A, __B),
1129  (__v16si)__W);
1130 }
1131 
1132 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1133 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1134 {
1135  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1136  (__v16si)_mm512_max_epu32(__A, __B),
1137  (__v16si)_mm512_setzero_si512());
1138 }
1139 
1140 static __inline __m512i __DEFAULT_FN_ATTRS512
1141 _mm512_max_epi64(__m512i __A, __m512i __B)
1142 {
1143  return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
1144 }
1145 
1146 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1147 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1148 {
1149  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1150  (__v8di)_mm512_max_epi64(__A, __B),
1151  (__v8di)__W);
1152 }
1153 
1154 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1155 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1156 {
1157  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1158  (__v8di)_mm512_max_epi64(__A, __B),
1159  (__v8di)_mm512_setzero_si512());
1160 }
1161 
1162 static __inline __m512i __DEFAULT_FN_ATTRS512
1163 _mm512_max_epu64(__m512i __A, __m512i __B)
1164 {
1165  return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
1166 }
1167 
1168 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1169 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1170 {
1171  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1172  (__v8di)_mm512_max_epu64(__A, __B),
1173  (__v8di)__W);
1174 }
1175 
1176 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1177 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1178 {
1179  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1180  (__v8di)_mm512_max_epu64(__A, __B),
1181  (__v8di)_mm512_setzero_si512());
1182 }
1183 
1184 #define _mm512_min_round_pd(A, B, R) \
1185  (__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1186  (__v8df)(__m512d)(B), (int)(R))
1187 
1188 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
1189  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1190  (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1191  (__v8df)(W))
1192 
1193 #define _mm512_maskz_min_round_pd(U, A, B, R) \
1194  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1195  (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1196  (__v8df)_mm512_setzero_pd())
1197 
1198 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1199 _mm512_min_pd(__m512d __A, __m512d __B)
1200 {
1201  return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1203 }
1204 
1205 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1206 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1207 {
1208  return (__m512d)__builtin_ia32_selectpd_512(__U,
1209  (__v8df)_mm512_min_pd(__A, __B),
1210  (__v8df)__W);
1211 }
1212 
1213 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1214 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1215 {
1216  return (__m512d)__builtin_ia32_selectpd_512(__U,
1217  (__v8df)_mm512_min_pd(__A, __B),
1218  (__v8df)_mm512_setzero_pd());
1219 }
1220 
1221 #define _mm512_min_round_ps(A, B, R) \
1222  (__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1223  (__v16sf)(__m512)(B), (int)(R))
1224 
1225 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
1226  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1227  (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1228  (__v16sf)(W))
1229 
1230 #define _mm512_maskz_min_round_ps(U, A, B, R) \
1231  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1232  (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1233  (__v16sf)_mm512_setzero_ps())
1234 
1235 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1236 _mm512_min_ps(__m512 __A, __m512 __B)
1237 {
1238  return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1240 }
1241 
1242 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1243 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1244 {
1245  return (__m512)__builtin_ia32_selectps_512(__U,
1246  (__v16sf)_mm512_min_ps(__A, __B),
1247  (__v16sf)__W);
1248 }
1249 
1250 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1251 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1252 {
1253  return (__m512)__builtin_ia32_selectps_512(__U,
1254  (__v16sf)_mm512_min_ps(__A, __B),
1255  (__v16sf)_mm512_setzero_ps());
1256 }
1257 
1258 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1259 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1260  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1261  (__v4sf) __B,
1262  (__v4sf) __W,
1263  (__mmask8) __U,
1265 }
1266 
1267 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1268 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1269  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1270  (__v4sf) __B,
1271  (__v4sf) _mm_setzero_ps (),
1272  (__mmask8) __U,
1274 }
1275 
1276 #define _mm_min_round_ss(A, B, R) \
1277  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1278  (__v4sf)(__m128)(B), \
1279  (__v4sf)_mm_setzero_ps(), \
1280  (__mmask8)-1, (int)(R))
1281 
1282 #define _mm_mask_min_round_ss(W, U, A, B, R) \
1283  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1284  (__v4sf)(__m128)(B), \
1285  (__v4sf)(__m128)(W), (__mmask8)(U), \
1286  (int)(R))
1287 
1288 #define _mm_maskz_min_round_ss(U, A, B, R) \
1289  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1290  (__v4sf)(__m128)(B), \
1291  (__v4sf)_mm_setzero_ps(), \
1292  (__mmask8)(U), (int)(R))
1293 
1294 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1295 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1296  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1297  (__v2df) __B,
1298  (__v2df) __W,
1299  (__mmask8) __U,
1301 }
1302 
1303 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1304 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1305  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1306  (__v2df) __B,
1307  (__v2df) _mm_setzero_pd (),
1308  (__mmask8) __U,
1310 }
1311 
1312 #define _mm_min_round_sd(A, B, R) \
1313  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1314  (__v2df)(__m128d)(B), \
1315  (__v2df)_mm_setzero_pd(), \
1316  (__mmask8)-1, (int)(R))
1317 
1318 #define _mm_mask_min_round_sd(W, U, A, B, R) \
1319  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1320  (__v2df)(__m128d)(B), \
1321  (__v2df)(__m128d)(W), \
1322  (__mmask8)(U), (int)(R))
1323 
1324 #define _mm_maskz_min_round_sd(U, A, B, R) \
1325  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1326  (__v2df)(__m128d)(B), \
1327  (__v2df)_mm_setzero_pd(), \
1328  (__mmask8)(U), (int)(R))
1329 
1330 static __inline __m512i
1332 _mm512_min_epi32(__m512i __A, __m512i __B)
1333 {
1334  return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
1335 }
1336 
1337 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1338 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1339 {
1340  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1341  (__v16si)_mm512_min_epi32(__A, __B),
1342  (__v16si)__W);
1343 }
1344 
1345 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1346 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1347 {
1348  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1349  (__v16si)_mm512_min_epi32(__A, __B),
1350  (__v16si)_mm512_setzero_si512());
1351 }
1352 
1353 static __inline __m512i __DEFAULT_FN_ATTRS512
1354 _mm512_min_epu32(__m512i __A, __m512i __B)
1355 {
1356  return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
1357 }
1358 
1359 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1360 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1361 {
1362  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1363  (__v16si)_mm512_min_epu32(__A, __B),
1364  (__v16si)__W);
1365 }
1366 
1367 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1368 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1369 {
1370  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1371  (__v16si)_mm512_min_epu32(__A, __B),
1372  (__v16si)_mm512_setzero_si512());
1373 }
1374 
1375 static __inline __m512i __DEFAULT_FN_ATTRS512
1376 _mm512_min_epi64(__m512i __A, __m512i __B)
1377 {
1378  return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
1379 }
1380 
1381 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1382 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1383 {
1384  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1385  (__v8di)_mm512_min_epi64(__A, __B),
1386  (__v8di)__W);
1387 }
1388 
1389 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1390 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1391 {
1392  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1393  (__v8di)_mm512_min_epi64(__A, __B),
1394  (__v8di)_mm512_setzero_si512());
1395 }
1396 
1397 static __inline __m512i __DEFAULT_FN_ATTRS512
1398 _mm512_min_epu64(__m512i __A, __m512i __B)
1399 {
1400  return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
1401 }
1402 
1403 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1404 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1405 {
1406  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1407  (__v8di)_mm512_min_epu64(__A, __B),
1408  (__v8di)__W);
1409 }
1410 
1411 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1412 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1413 {
1414  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1415  (__v8di)_mm512_min_epu64(__A, __B),
1416  (__v8di)_mm512_setzero_si512());
1417 }
1418 
1419 static __inline __m512i __DEFAULT_FN_ATTRS512
1420 _mm512_mul_epi32(__m512i __X, __m512i __Y)
1421 {
1422  return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1423 }
1424 
1425 static __inline __m512i __DEFAULT_FN_ATTRS512
1426 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1427 {
1428  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1429  (__v8di)_mm512_mul_epi32(__X, __Y),
1430  (__v8di)__W);
1431 }
1432 
1433 static __inline __m512i __DEFAULT_FN_ATTRS512
1434 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1435 {
1436  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1437  (__v8di)_mm512_mul_epi32(__X, __Y),
1438  (__v8di)_mm512_setzero_si512 ());
1439 }
1440 
1441 static __inline __m512i __DEFAULT_FN_ATTRS512
1442 _mm512_mul_epu32(__m512i __X, __m512i __Y)
1443 {
1444  return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1445 }
1446 
1447 static __inline __m512i __DEFAULT_FN_ATTRS512
1448 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1449 {
1450  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1451  (__v8di)_mm512_mul_epu32(__X, __Y),
1452  (__v8di)__W);
1453 }
1454 
1455 static __inline __m512i __DEFAULT_FN_ATTRS512
1456 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1457 {
1458  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1459  (__v8di)_mm512_mul_epu32(__X, __Y),
1460  (__v8di)_mm512_setzero_si512 ());
1461 }
1462 
1463 static __inline __m512i __DEFAULT_FN_ATTRS512
1464 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
1465 {
1466  return (__m512i) ((__v16su) __A * (__v16su) __B);
1467 }
1468 
1469 static __inline __m512i __DEFAULT_FN_ATTRS512
1470 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1471 {
1472  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1473  (__v16si)_mm512_mullo_epi32(__A, __B),
1474  (__v16si)_mm512_setzero_si512());
1475 }
1476 
1477 static __inline __m512i __DEFAULT_FN_ATTRS512
1478 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1479 {
1480  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1481  (__v16si)_mm512_mullo_epi32(__A, __B),
1482  (__v16si)__W);
1483 }
1484 
1485 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1486 _mm512_mullox_epi64 (__m512i __A, __m512i __B) {
1487  return (__m512i) ((__v8du) __A * (__v8du) __B);
1488 }
1489 
1490 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1491 _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1492  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1493  (__v8di)_mm512_mullox_epi64(__A, __B),
1494  (__v8di)__W);
1495 }
1496 
1497 #define _mm512_sqrt_round_pd(A, R) \
1498  (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R))
1499 
1500 #define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1501  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1502  (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1503  (__v8df)(__m512d)(W))
1504 
1505 #define _mm512_maskz_sqrt_round_pd(U, A, R) \
1506  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1507  (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1508  (__v8df)_mm512_setzero_pd())
1509 
1510 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1511 _mm512_sqrt_pd(__m512d __A)
1512 {
1513  return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1515 }
1516 
1517 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1518 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1519 {
1520  return (__m512d)__builtin_ia32_selectpd_512(__U,
1521  (__v8df)_mm512_sqrt_pd(__A),
1522  (__v8df)__W);
1523 }
1524 
1525 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1526 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1527 {
1528  return (__m512d)__builtin_ia32_selectpd_512(__U,
1529  (__v8df)_mm512_sqrt_pd(__A),
1530  (__v8df)_mm512_setzero_pd());
1531 }
1532 
1533 #define _mm512_sqrt_round_ps(A, R) \
1534  (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R))
1535 
1536 #define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1537  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1538  (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1539  (__v16sf)(__m512)(W))
1540 
1541 #define _mm512_maskz_sqrt_round_ps(U, A, R) \
1542  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1543  (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1544  (__v16sf)_mm512_setzero_ps())
1545 
1546 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1547 _mm512_sqrt_ps(__m512 __A)
1548 {
1549  return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1551 }
1552 
1553 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1554 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1555 {
1556  return (__m512)__builtin_ia32_selectps_512(__U,
1557  (__v16sf)_mm512_sqrt_ps(__A),
1558  (__v16sf)__W);
1559 }
1560 
1561 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1562 _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1563 {
1564  return (__m512)__builtin_ia32_selectps_512(__U,
1565  (__v16sf)_mm512_sqrt_ps(__A),
1566  (__v16sf)_mm512_setzero_ps());
1567 }
1568 
1569 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1570 _mm512_rsqrt14_pd(__m512d __A)
1571 {
1572  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1573  (__v8df)
1574  _mm512_setzero_pd (),
1575  (__mmask8) -1);}
1576 
1577 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1578 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1579 {
1580  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1581  (__v8df) __W,
1582  (__mmask8) __U);
1583 }
1584 
1585 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1586 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1587 {
1588  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1589  (__v8df)
1590  _mm512_setzero_pd (),
1591  (__mmask8) __U);
1592 }
1593 
1594 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1596 {
1597  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1598  (__v16sf)
1599  _mm512_setzero_ps (),
1600  (__mmask16) -1);
1601 }
1602 
1603 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1604 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1605 {
1606  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1607  (__v16sf) __W,
1608  (__mmask16) __U);
1609 }
1610 
1611 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1612 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1613 {
1614  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1615  (__v16sf)
1616  _mm512_setzero_ps (),
1617  (__mmask16) __U);
1618 }
1619 
1620 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1621 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
1622 {
1623  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1624  (__v4sf) __B,
1625  (__v4sf)
1626  _mm_setzero_ps (),
1627  (__mmask8) -1);
1628 }
1629 
1630 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1631 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1632 {
1633  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1634  (__v4sf) __B,
1635  (__v4sf) __W,
1636  (__mmask8) __U);
1637 }
1638 
1639 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1640 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1641 {
1642  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1643  (__v4sf) __B,
1644  (__v4sf) _mm_setzero_ps (),
1645  (__mmask8) __U);
1646 }
1647 
1648 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1649 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
1650 {
1651  return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1652  (__v2df) __B,
1653  (__v2df)
1654  _mm_setzero_pd (),
1655  (__mmask8) -1);
1656 }
1657 
1658 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1659 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1660 {
1661  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1662  (__v2df) __B,
1663  (__v2df) __W,
1664  (__mmask8) __U);
1665 }
1666 
1667 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1668 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1669 {
1670  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1671  (__v2df) __B,
1672  (__v2df) _mm_setzero_pd (),
1673  (__mmask8) __U);
1674 }
1675 
1676 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1677 _mm512_rcp14_pd(__m512d __A)
1678 {
1679  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1680  (__v8df)
1681  _mm512_setzero_pd (),
1682  (__mmask8) -1);
1683 }
1684 
1685 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1686 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1687 {
1688  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1689  (__v8df) __W,
1690  (__mmask8) __U);
1691 }
1692 
1693 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1694 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1695 {
1696  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1697  (__v8df)
1698  _mm512_setzero_pd (),
1699  (__mmask8) __U);
1700 }
1701 
1702 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1703 _mm512_rcp14_ps(__m512 __A)
1704 {
1705  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1706  (__v16sf)
1707  _mm512_setzero_ps (),
1708  (__mmask16) -1);
1709 }
1710 
1711 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1712 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1713 {
1714  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1715  (__v16sf) __W,
1716  (__mmask16) __U);
1717 }
1718 
1719 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1720 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1721 {
1722  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1723  (__v16sf)
1724  _mm512_setzero_ps (),
1725  (__mmask16) __U);
1726 }
1727 
1728 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1729 _mm_rcp14_ss(__m128 __A, __m128 __B)
1730 {
1731  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1732  (__v4sf) __B,
1733  (__v4sf)
1734  _mm_setzero_ps (),
1735  (__mmask8) -1);
1736 }
1737 
1738 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1739 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1740 {
1741  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1742  (__v4sf) __B,
1743  (__v4sf) __W,
1744  (__mmask8) __U);
1745 }
1746 
1747 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1748 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1749 {
1750  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1751  (__v4sf) __B,
1752  (__v4sf) _mm_setzero_ps (),
1753  (__mmask8) __U);
1754 }
1755 
1756 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1757 _mm_rcp14_sd(__m128d __A, __m128d __B)
1758 {
1759  return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1760  (__v2df) __B,
1761  (__v2df)
1762  _mm_setzero_pd (),
1763  (__mmask8) -1);
1764 }
1765 
1766 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1767 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1768 {
1769  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1770  (__v2df) __B,
1771  (__v2df) __W,
1772  (__mmask8) __U);
1773 }
1774 
1775 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1776 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1777 {
1778  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1779  (__v2df) __B,
1780  (__v2df) _mm_setzero_pd (),
1781  (__mmask8) __U);
1782 }
1783 
1784 static __inline __m512 __DEFAULT_FN_ATTRS512
1785 _mm512_floor_ps(__m512 __A)
1786 {
1787  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1789  (__v16sf) __A, -1,
1791 }
1792 
1793 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1794 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1795 {
1796  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1798  (__v16sf) __W, __U,
1800 }
1801 
1802 static __inline __m512d __DEFAULT_FN_ATTRS512
1803 _mm512_floor_pd(__m512d __A)
1804 {
1805  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1807  (__v8df) __A, -1,
1809 }
1810 
1811 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1812 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1813 {
1814  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1816  (__v8df) __W, __U,
1818 }
1819 
1820 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1821 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1822 {
1823  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1825  (__v16sf) __W, __U,
1827 }
1828 
1829 static __inline __m512 __DEFAULT_FN_ATTRS512
1830 _mm512_ceil_ps(__m512 __A)
1831 {
1832  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1834  (__v16sf) __A, -1,
1836 }
1837 
1838 static __inline __m512d __DEFAULT_FN_ATTRS512
1839 _mm512_ceil_pd(__m512d __A)
1840 {
1841  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1843  (__v8df) __A, -1,
1845 }
1846 
1847 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1848 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1849 {
1850  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1852  (__v8df) __W, __U,
1854 }
1855 
1856 static __inline __m512i __DEFAULT_FN_ATTRS512
1857 _mm512_abs_epi64(__m512i __A)
1858 {
1859  return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
1860 }
1861 
1862 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1863 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1864 {
1865  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1866  (__v8di)_mm512_abs_epi64(__A),
1867  (__v8di)__W);
1868 }
1869 
1870 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1871 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1872 {
1873  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1874  (__v8di)_mm512_abs_epi64(__A),
1875  (__v8di)_mm512_setzero_si512());
1876 }
1877 
1878 static __inline __m512i __DEFAULT_FN_ATTRS512
1879 _mm512_abs_epi32(__m512i __A)
1880 {
1881  return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
1882 }
1883 
1884 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1885 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1886 {
1887  return (__m512i)__builtin_ia32_selectd_512(__U,
1888  (__v16si)_mm512_abs_epi32(__A),
1889  (__v16si)__W);
1890 }
1891 
1892 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1893 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
1894 {
1895  return (__m512i)__builtin_ia32_selectd_512(__U,
1896  (__v16si)_mm512_abs_epi32(__A),
1897  (__v16si)_mm512_setzero_si512());
1898 }
1899 
1900 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1901 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1902  __A = _mm_add_ss(__A, __B);
1903  return __builtin_ia32_selectss_128(__U, __A, __W);
1904 }
1905 
1906 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1907 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1908  __A = _mm_add_ss(__A, __B);
1909  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1910 }
1911 
1912 #define _mm_add_round_ss(A, B, R) \
1913  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1914  (__v4sf)(__m128)(B), \
1915  (__v4sf)_mm_setzero_ps(), \
1916  (__mmask8)-1, (int)(R))
1917 
1918 #define _mm_mask_add_round_ss(W, U, A, B, R) \
1919  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1920  (__v4sf)(__m128)(B), \
1921  (__v4sf)(__m128)(W), (__mmask8)(U), \
1922  (int)(R))
1923 
1924 #define _mm_maskz_add_round_ss(U, A, B, R) \
1925  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1926  (__v4sf)(__m128)(B), \
1927  (__v4sf)_mm_setzero_ps(), \
1928  (__mmask8)(U), (int)(R))
1929 
1930 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1931 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1932  __A = _mm_add_sd(__A, __B);
1933  return __builtin_ia32_selectsd_128(__U, __A, __W);
1934 }
1935 
1936 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1937 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1938  __A = _mm_add_sd(__A, __B);
1939  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1940 }
1941 #define _mm_add_round_sd(A, B, R) \
1942  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1943  (__v2df)(__m128d)(B), \
1944  (__v2df)_mm_setzero_pd(), \
1945  (__mmask8)-1, (int)(R))
1946 
1947 #define _mm_mask_add_round_sd(W, U, A, B, R) \
1948  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1949  (__v2df)(__m128d)(B), \
1950  (__v2df)(__m128d)(W), \
1951  (__mmask8)(U), (int)(R))
1952 
1953 #define _mm_maskz_add_round_sd(U, A, B, R) \
1954  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1955  (__v2df)(__m128d)(B), \
1956  (__v2df)_mm_setzero_pd(), \
1957  (__mmask8)(U), (int)(R))
1958 
1959 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1960 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1961  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1962  (__v8df)_mm512_add_pd(__A, __B),
1963  (__v8df)__W);
1964 }
1965 
1966 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1967 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1968  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1969  (__v8df)_mm512_add_pd(__A, __B),
1970  (__v8df)_mm512_setzero_pd());
1971 }
1972 
1973 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1974 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1975  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1976  (__v16sf)_mm512_add_ps(__A, __B),
1977  (__v16sf)__W);
1978 }
1979 
1980 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1981 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1982  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1983  (__v16sf)_mm512_add_ps(__A, __B),
1984  (__v16sf)_mm512_setzero_ps());
1985 }
1986 
1987 #define _mm512_add_round_pd(A, B, R) \
1988  (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1989  (__v8df)(__m512d)(B), (int)(R))
1990 
1991 #define _mm512_mask_add_round_pd(W, U, A, B, R) \
1992  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1993  (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1994  (__v8df)(__m512d)(W));
1995 
1996 #define _mm512_maskz_add_round_pd(U, A, B, R) \
1997  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1998  (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1999  (__v8df)_mm512_setzero_pd());
2000 
2001 #define _mm512_add_round_ps(A, B, R) \
2002  (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
2003  (__v16sf)(__m512)(B), (int)(R))
2004 
2005 #define _mm512_mask_add_round_ps(W, U, A, B, R) \
2006  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2007  (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2008  (__v16sf)(__m512)(W));
2009 
2010 #define _mm512_maskz_add_round_ps(U, A, B, R) \
2011  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2012  (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2013  (__v16sf)_mm512_setzero_ps());
2014 
2015 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2016 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2017  __A = _mm_sub_ss(__A, __B);
2018  return __builtin_ia32_selectss_128(__U, __A, __W);
2019 }
2020 
2021 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2022 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2023  __A = _mm_sub_ss(__A, __B);
2024  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2025 }
2026 #define _mm_sub_round_ss(A, B, R) \
2027  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2028  (__v4sf)(__m128)(B), \
2029  (__v4sf)_mm_setzero_ps(), \
2030  (__mmask8)-1, (int)(R))
2031 
2032 #define _mm_mask_sub_round_ss(W, U, A, B, R) \
2033  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2034  (__v4sf)(__m128)(B), \
2035  (__v4sf)(__m128)(W), (__mmask8)(U), \
2036  (int)(R))
2037 
2038 #define _mm_maskz_sub_round_ss(U, A, B, R) \
2039  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2040  (__v4sf)(__m128)(B), \
2041  (__v4sf)_mm_setzero_ps(), \
2042  (__mmask8)(U), (int)(R))
2043 
2044 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2045 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2046  __A = _mm_sub_sd(__A, __B);
2047  return __builtin_ia32_selectsd_128(__U, __A, __W);
2048 }
2049 
2050 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2051 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2052  __A = _mm_sub_sd(__A, __B);
2053  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2054 }
2055 
2056 #define _mm_sub_round_sd(A, B, R) \
2057  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2058  (__v2df)(__m128d)(B), \
2059  (__v2df)_mm_setzero_pd(), \
2060  (__mmask8)-1, (int)(R))
2061 
2062 #define _mm_mask_sub_round_sd(W, U, A, B, R) \
2063  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2064  (__v2df)(__m128d)(B), \
2065  (__v2df)(__m128d)(W), \
2066  (__mmask8)(U), (int)(R))
2067 
2068 #define _mm_maskz_sub_round_sd(U, A, B, R) \
2069  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2070  (__v2df)(__m128d)(B), \
2071  (__v2df)_mm_setzero_pd(), \
2072  (__mmask8)(U), (int)(R))
2073 
2074 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2075 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2076  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2077  (__v8df)_mm512_sub_pd(__A, __B),
2078  (__v8df)__W);
2079 }
2080 
2081 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2082 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2083  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2084  (__v8df)_mm512_sub_pd(__A, __B),
2085  (__v8df)_mm512_setzero_pd());
2086 }
2087 
2088 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2089 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2090  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2091  (__v16sf)_mm512_sub_ps(__A, __B),
2092  (__v16sf)__W);
2093 }
2094 
2095 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2096 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2097  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2098  (__v16sf)_mm512_sub_ps(__A, __B),
2099  (__v16sf)_mm512_setzero_ps());
2100 }
2101 
2102 #define _mm512_sub_round_pd(A, B, R) \
2103  (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2104  (__v8df)(__m512d)(B), (int)(R))
2105 
2106 #define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2107  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2108  (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2109  (__v8df)(__m512d)(W));
2110 
2111 #define _mm512_maskz_sub_round_pd(U, A, B, R) \
2112  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2113  (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2114  (__v8df)_mm512_setzero_pd());
2115 
2116 #define _mm512_sub_round_ps(A, B, R) \
2117  (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2118  (__v16sf)(__m512)(B), (int)(R))
2119 
2120 #define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2121  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2122  (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2123  (__v16sf)(__m512)(W));
2124 
2125 #define _mm512_maskz_sub_round_ps(U, A, B, R) \
2126  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2127  (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2128  (__v16sf)_mm512_setzero_ps());
2129 
2130 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2131 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2132  __A = _mm_mul_ss(__A, __B);
2133  return __builtin_ia32_selectss_128(__U, __A, __W);
2134 }
2135 
2136 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2137 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2138  __A = _mm_mul_ss(__A, __B);
2139  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2140 }
2141 #define _mm_mul_round_ss(A, B, R) \
2142  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2143  (__v4sf)(__m128)(B), \
2144  (__v4sf)_mm_setzero_ps(), \
2145  (__mmask8)-1, (int)(R))
2146 
2147 #define _mm_mask_mul_round_ss(W, U, A, B, R) \
2148  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2149  (__v4sf)(__m128)(B), \
2150  (__v4sf)(__m128)(W), (__mmask8)(U), \
2151  (int)(R))
2152 
2153 #define _mm_maskz_mul_round_ss(U, A, B, R) \
2154  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2155  (__v4sf)(__m128)(B), \
2156  (__v4sf)_mm_setzero_ps(), \
2157  (__mmask8)(U), (int)(R))
2158 
2159 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2160 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2161  __A = _mm_mul_sd(__A, __B);
2162  return __builtin_ia32_selectsd_128(__U, __A, __W);
2163 }
2164 
2165 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2166 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2167  __A = _mm_mul_sd(__A, __B);
2168  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2169 }
2170 
2171 #define _mm_mul_round_sd(A, B, R) \
2172  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2173  (__v2df)(__m128d)(B), \
2174  (__v2df)_mm_setzero_pd(), \
2175  (__mmask8)-1, (int)(R))
2176 
2177 #define _mm_mask_mul_round_sd(W, U, A, B, R) \
2178  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2179  (__v2df)(__m128d)(B), \
2180  (__v2df)(__m128d)(W), \
2181  (__mmask8)(U), (int)(R))
2182 
2183 #define _mm_maskz_mul_round_sd(U, A, B, R) \
2184  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2185  (__v2df)(__m128d)(B), \
2186  (__v2df)_mm_setzero_pd(), \
2187  (__mmask8)(U), (int)(R))
2188 
2189 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2190 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2191  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2192  (__v8df)_mm512_mul_pd(__A, __B),
2193  (__v8df)__W);
2194 }
2195 
2196 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2197 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2198  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2199  (__v8df)_mm512_mul_pd(__A, __B),
2200  (__v8df)_mm512_setzero_pd());
2201 }
2202 
2203 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2204 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2205  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2206  (__v16sf)_mm512_mul_ps(__A, __B),
2207  (__v16sf)__W);
2208 }
2209 
2210 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2211 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2212  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2213  (__v16sf)_mm512_mul_ps(__A, __B),
2214  (__v16sf)_mm512_setzero_ps());
2215 }
2216 
2217 #define _mm512_mul_round_pd(A, B, R) \
2218  (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2219  (__v8df)(__m512d)(B), (int)(R))
2220 
2221 #define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2222  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2223  (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2224  (__v8df)(__m512d)(W));
2225 
2226 #define _mm512_maskz_mul_round_pd(U, A, B, R) \
2227  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2228  (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2229  (__v8df)_mm512_setzero_pd());
2230 
2231 #define _mm512_mul_round_ps(A, B, R) \
2232  (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2233  (__v16sf)(__m512)(B), (int)(R))
2234 
2235 #define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2236  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2237  (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2238  (__v16sf)(__m512)(W));
2239 
2240 #define _mm512_maskz_mul_round_ps(U, A, B, R) \
2241  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2242  (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2243  (__v16sf)_mm512_setzero_ps());
2244 
2245 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2246 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2247  __A = _mm_div_ss(__A, __B);
2248  return __builtin_ia32_selectss_128(__U, __A, __W);
2249 }
2250 
2251 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2252 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2253  __A = _mm_div_ss(__A, __B);
2254  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2255 }
2256 
2257 #define _mm_div_round_ss(A, B, R) \
2258  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2259  (__v4sf)(__m128)(B), \
2260  (__v4sf)_mm_setzero_ps(), \
2261  (__mmask8)-1, (int)(R))
2262 
2263 #define _mm_mask_div_round_ss(W, U, A, B, R) \
2264  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2265  (__v4sf)(__m128)(B), \
2266  (__v4sf)(__m128)(W), (__mmask8)(U), \
2267  (int)(R))
2268 
2269 #define _mm_maskz_div_round_ss(U, A, B, R) \
2270  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2271  (__v4sf)(__m128)(B), \
2272  (__v4sf)_mm_setzero_ps(), \
2273  (__mmask8)(U), (int)(R))
2274 
2275 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2276 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2277  __A = _mm_div_sd(__A, __B);
2278  return __builtin_ia32_selectsd_128(__U, __A, __W);
2279 }
2280 
2281 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2282 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2283  __A = _mm_div_sd(__A, __B);
2284  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2285 }
2286 
2287 #define _mm_div_round_sd(A, B, R) \
2288  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2289  (__v2df)(__m128d)(B), \
2290  (__v2df)_mm_setzero_pd(), \
2291  (__mmask8)-1, (int)(R))
2292 
2293 #define _mm_mask_div_round_sd(W, U, A, B, R) \
2294  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2295  (__v2df)(__m128d)(B), \
2296  (__v2df)(__m128d)(W), \
2297  (__mmask8)(U), (int)(R))
2298 
2299 #define _mm_maskz_div_round_sd(U, A, B, R) \
2300  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2301  (__v2df)(__m128d)(B), \
2302  (__v2df)_mm_setzero_pd(), \
2303  (__mmask8)(U), (int)(R))
2304 
2305 static __inline __m512d __DEFAULT_FN_ATTRS512
2306 _mm512_div_pd(__m512d __a, __m512d __b)
2307 {
2308  return (__m512d)((__v8df)__a/(__v8df)__b);
2309 }
2310 
2311 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2312 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2313  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2314  (__v8df)_mm512_div_pd(__A, __B),
2315  (__v8df)__W);
2316 }
2317 
2318 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2319 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2320  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2321  (__v8df)_mm512_div_pd(__A, __B),
2322  (__v8df)_mm512_setzero_pd());
2323 }
2324 
2325 static __inline __m512 __DEFAULT_FN_ATTRS512
2326 _mm512_div_ps(__m512 __a, __m512 __b)
2327 {
2328  return (__m512)((__v16sf)__a/(__v16sf)__b);
2329 }
2330 
2331 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2332 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2333  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2334  (__v16sf)_mm512_div_ps(__A, __B),
2335  (__v16sf)__W);
2336 }
2337 
2338 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2339 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2340  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2341  (__v16sf)_mm512_div_ps(__A, __B),
2342  (__v16sf)_mm512_setzero_ps());
2343 }
2344 
2345 #define _mm512_div_round_pd(A, B, R) \
2346  (__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2347  (__v8df)(__m512d)(B), (int)(R))
2348 
2349 #define _mm512_mask_div_round_pd(W, U, A, B, R) \
2350  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2351  (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2352  (__v8df)(__m512d)(W));
2353 
2354 #define _mm512_maskz_div_round_pd(U, A, B, R) \
2355  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2356  (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2357  (__v8df)_mm512_setzero_pd());
2358 
2359 #define _mm512_div_round_ps(A, B, R) \
2360  (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2361  (__v16sf)(__m512)(B), (int)(R))
2362 
2363 #define _mm512_mask_div_round_ps(W, U, A, B, R) \
2364  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2365  (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2366  (__v16sf)(__m512)(W));
2367 
2368 #define _mm512_maskz_div_round_ps(U, A, B, R) \
2369  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2370  (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2371  (__v16sf)_mm512_setzero_ps());
2372 
2373 #define _mm512_roundscale_ps(A, B) \
2374  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2375  (__v16sf)_mm512_undefined_ps(), \
2376  (__mmask16)-1, \
2377  _MM_FROUND_CUR_DIRECTION)
2378 
2379 #define _mm512_mask_roundscale_ps(A, B, C, imm) \
2380  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2381  (__v16sf)(__m512)(A), (__mmask16)(B), \
2382  _MM_FROUND_CUR_DIRECTION)
2383 
2384 #define _mm512_maskz_roundscale_ps(A, B, imm) \
2385  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2386  (__v16sf)_mm512_setzero_ps(), \
2387  (__mmask16)(A), \
2388  _MM_FROUND_CUR_DIRECTION)
2389 
2390 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2391  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2392  (__v16sf)(__m512)(A), (__mmask16)(B), \
2393  (int)(R))
2394 
2395 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2396  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2397  (__v16sf)_mm512_setzero_ps(), \
2398  (__mmask16)(A), (int)(R))
2399 
2400 #define _mm512_roundscale_round_ps(A, imm, R) \
2401  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2402  (__v16sf)_mm512_undefined_ps(), \
2403  (__mmask16)-1, (int)(R))
2404 
2405 #define _mm512_roundscale_pd(A, B) \
2406  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2407  (__v8df)_mm512_undefined_pd(), \
2408  (__mmask8)-1, \
2409  _MM_FROUND_CUR_DIRECTION)
2410 
2411 #define _mm512_mask_roundscale_pd(A, B, C, imm) \
2412  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2413  (__v8df)(__m512d)(A), (__mmask8)(B), \
2414  _MM_FROUND_CUR_DIRECTION)
2415 
2416 #define _mm512_maskz_roundscale_pd(A, B, imm) \
2417  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2418  (__v8df)_mm512_setzero_pd(), \
2419  (__mmask8)(A), \
2420  _MM_FROUND_CUR_DIRECTION)
2421 
2422 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2423  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2424  (__v8df)(__m512d)(A), (__mmask8)(B), \
2425  (int)(R))
2426 
2427 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2428  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2429  (__v8df)_mm512_setzero_pd(), \
2430  (__mmask8)(A), (int)(R))
2431 
2432 #define _mm512_roundscale_round_pd(A, imm, R) \
2433  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2434  (__v8df)_mm512_undefined_pd(), \
2435  (__mmask8)-1, (int)(R))
2436 
2437 #define _mm512_fmadd_round_pd(A, B, C, R) \
2438  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2439  (__v8df)(__m512d)(B), \
2440  (__v8df)(__m512d)(C), \
2441  (__mmask8)-1, (int)(R))
2442 
2443 
2444 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2445  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2446  (__v8df)(__m512d)(B), \
2447  (__v8df)(__m512d)(C), \
2448  (__mmask8)(U), (int)(R))
2449 
2450 
2451 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2452  (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2453  (__v8df)(__m512d)(B), \
2454  (__v8df)(__m512d)(C), \
2455  (__mmask8)(U), (int)(R))
2456 
2457 
2458 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2459  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2460  (__v8df)(__m512d)(B), \
2461  (__v8df)(__m512d)(C), \
2462  (__mmask8)(U), (int)(R))
2463 
2464 
2465 #define _mm512_fmsub_round_pd(A, B, C, R) \
2466  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2467  (__v8df)(__m512d)(B), \
2468  -(__v8df)(__m512d)(C), \
2469  (__mmask8)-1, (int)(R))
2470 
2471 
2472 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2473  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2474  (__v8df)(__m512d)(B), \
2475  -(__v8df)(__m512d)(C), \
2476  (__mmask8)(U), (int)(R))
2477 
2478 
2479 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2480  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2481  (__v8df)(__m512d)(B), \
2482  -(__v8df)(__m512d)(C), \
2483  (__mmask8)(U), (int)(R))
2484 
2485 
2486 #define _mm512_fnmadd_round_pd(A, B, C, R) \
2487  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2488  (__v8df)(__m512d)(B), \
2489  (__v8df)(__m512d)(C), \
2490  (__mmask8)-1, (int)(R))
2491 
2492 
2493 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2494  (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2495  (__v8df)(__m512d)(B), \
2496  (__v8df)(__m512d)(C), \
2497  (__mmask8)(U), (int)(R))
2498 
2499 
2500 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2501  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2502  (__v8df)(__m512d)(B), \
2503  (__v8df)(__m512d)(C), \
2504  (__mmask8)(U), (int)(R))
2505 
2506 
2507 #define _mm512_fnmsub_round_pd(A, B, C, R) \
2508  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2509  (__v8df)(__m512d)(B), \
2510  -(__v8df)(__m512d)(C), \
2511  (__mmask8)-1, (int)(R))
2512 
2513 
2514 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2515  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2516  (__v8df)(__m512d)(B), \
2517  -(__v8df)(__m512d)(C), \
2518  (__mmask8)(U), (int)(R))
2519 
2520 
2521 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2522 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2523 {
2524  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2525  (__v8df) __B,
2526  (__v8df) __C,
2527  (__mmask8) -1,
2529 }
2530 
2531 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2532 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2533 {
2534  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2535  (__v8df) __B,
2536  (__v8df) __C,
2537  (__mmask8) __U,
2539 }
2540 
2541 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2542 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2543 {
2544  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2545  (__v8df) __B,
2546  (__v8df) __C,
2547  (__mmask8) __U,
2549 }
2550 
2551 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2552 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2553 {
2554  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2555  (__v8df) __B,
2556  (__v8df) __C,
2557  (__mmask8) __U,
2559 }
2560 
2561 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2562 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2563 {
2564  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2565  (__v8df) __B,
2566  -(__v8df) __C,
2567  (__mmask8) -1,
2569 }
2570 
2571 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2572 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2573 {
2574  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2575  (__v8df) __B,
2576  -(__v8df) __C,
2577  (__mmask8) __U,
2579 }
2580 
2581 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2582 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2583 {
2584  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2585  (__v8df) __B,
2586  -(__v8df) __C,
2587  (__mmask8) __U,
2589 }
2590 
2591 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2592 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2593 {
2594  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2595  -(__v8df) __B,
2596  (__v8df) __C,
2597  (__mmask8) -1,
2599 }
2600 
2601 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2602 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2603 {
2604  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2605  (__v8df) __B,
2606  (__v8df) __C,
2607  (__mmask8) __U,
2609 }
2610 
2611 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2612 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2613 {
2614  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2615  (__v8df) __B,
2616  (__v8df) __C,
2617  (__mmask8) __U,
2619 }
2620 
2621 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2622 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2623 {
2624  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2625  -(__v8df) __B,
2626  -(__v8df) __C,
2627  (__mmask8) -1,
2629 }
2630 
2631 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2632 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2633 {
2634  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2635  (__v8df) __B,
2636  -(__v8df) __C,
2637  (__mmask8) __U,
2639 }
2640 
2641 #define _mm512_fmadd_round_ps(A, B, C, R) \
2642  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2643  (__v16sf)(__m512)(B), \
2644  (__v16sf)(__m512)(C), \
2645  (__mmask16)-1, (int)(R))
2646 
2647 
2648 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2649  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2650  (__v16sf)(__m512)(B), \
2651  (__v16sf)(__m512)(C), \
2652  (__mmask16)(U), (int)(R))
2653 
2654 
2655 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2656  (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2657  (__v16sf)(__m512)(B), \
2658  (__v16sf)(__m512)(C), \
2659  (__mmask16)(U), (int)(R))
2660 
2661 
2662 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2663  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2664  (__v16sf)(__m512)(B), \
2665  (__v16sf)(__m512)(C), \
2666  (__mmask16)(U), (int)(R))
2667 
2668 
2669 #define _mm512_fmsub_round_ps(A, B, C, R) \
2670  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2671  (__v16sf)(__m512)(B), \
2672  -(__v16sf)(__m512)(C), \
2673  (__mmask16)-1, (int)(R))
2674 
2675 
2676 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2677  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2678  (__v16sf)(__m512)(B), \
2679  -(__v16sf)(__m512)(C), \
2680  (__mmask16)(U), (int)(R))
2681 
2682 
2683 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2684  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2685  (__v16sf)(__m512)(B), \
2686  -(__v16sf)(__m512)(C), \
2687  (__mmask16)(U), (int)(R))
2688 
2689 
2690 #define _mm512_fnmadd_round_ps(A, B, C, R) \
2691  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2692  -(__v16sf)(__m512)(B), \
2693  (__v16sf)(__m512)(C), \
2694  (__mmask16)-1, (int)(R))
2695 
2696 
2697 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2698  (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2699  (__v16sf)(__m512)(B), \
2700  (__v16sf)(__m512)(C), \
2701  (__mmask16)(U), (int)(R))
2702 
2703 
2704 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2705  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2706  (__v16sf)(__m512)(B), \
2707  (__v16sf)(__m512)(C), \
2708  (__mmask16)(U), (int)(R))
2709 
2710 
2711 #define _mm512_fnmsub_round_ps(A, B, C, R) \
2712  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2713  -(__v16sf)(__m512)(B), \
2714  -(__v16sf)(__m512)(C), \
2715  (__mmask16)-1, (int)(R))
2716 
2717 
2718 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2719  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2720  (__v16sf)(__m512)(B), \
2721  -(__v16sf)(__m512)(C), \
2722  (__mmask16)(U), (int)(R))
2723 
2724 
2725 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2726 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2727 {
2728  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2729  (__v16sf) __B,
2730  (__v16sf) __C,
2731  (__mmask16) -1,
2733 }
2734 
2735 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2736 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2737 {
2738  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2739  (__v16sf) __B,
2740  (__v16sf) __C,
2741  (__mmask16) __U,
2743 }
2744 
2745 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2746 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2747 {
2748  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2749  (__v16sf) __B,
2750  (__v16sf) __C,
2751  (__mmask16) __U,
2753 }
2754 
2755 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2756 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2757 {
2758  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2759  (__v16sf) __B,
2760  (__v16sf) __C,
2761  (__mmask16) __U,
2763 }
2764 
2765 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2766 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2767 {
2768  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2769  (__v16sf) __B,
2770  -(__v16sf) __C,
2771  (__mmask16) -1,
2773 }
2774 
2775 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2776 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2777 {
2778  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2779  (__v16sf) __B,
2780  -(__v16sf) __C,
2781  (__mmask16) __U,
2783 }
2784 
2785 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2786 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2787 {
2788  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2789  (__v16sf) __B,
2790  -(__v16sf) __C,
2791  (__mmask16) __U,
2793 }
2794 
2795 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2796 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2797 {
2798  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2799  -(__v16sf) __B,
2800  (__v16sf) __C,
2801  (__mmask16) -1,
2803 }
2804 
2805 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2806 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2807 {
2808  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2809  (__v16sf) __B,
2810  (__v16sf) __C,
2811  (__mmask16) __U,
2813 }
2814 
2815 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2816 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2817 {
2818  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2819  (__v16sf) __B,
2820  (__v16sf) __C,
2821  (__mmask16) __U,
2823 }
2824 
2825 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2826 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2827 {
2828  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2829  -(__v16sf) __B,
2830  -(__v16sf) __C,
2831  (__mmask16) -1,
2833 }
2834 
2835 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2836 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2837 {
2838  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2839  (__v16sf) __B,
2840  -(__v16sf) __C,
2841  (__mmask16) __U,
2843 }
2844 
2845 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
2846  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2847  (__v8df)(__m512d)(B), \
2848  (__v8df)(__m512d)(C), \
2849  (__mmask8)-1, (int)(R))
2850 
2851 
2852 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2853  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2854  (__v8df)(__m512d)(B), \
2855  (__v8df)(__m512d)(C), \
2856  (__mmask8)(U), (int)(R))
2857 
2858 
2859 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2860  (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2861  (__v8df)(__m512d)(B), \
2862  (__v8df)(__m512d)(C), \
2863  (__mmask8)(U), (int)(R))
2864 
2865 
2866 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2867  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2868  (__v8df)(__m512d)(B), \
2869  (__v8df)(__m512d)(C), \
2870  (__mmask8)(U), (int)(R))
2871 
2872 
2873 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
2874  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2875  (__v8df)(__m512d)(B), \
2876  -(__v8df)(__m512d)(C), \
2877  (__mmask8)-1, (int)(R))
2878 
2879 
2880 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2881  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2882  (__v8df)(__m512d)(B), \
2883  -(__v8df)(__m512d)(C), \
2884  (__mmask8)(U), (int)(R))
2885 
2886 
2887 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2888  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2889  (__v8df)(__m512d)(B), \
2890  -(__v8df)(__m512d)(C), \
2891  (__mmask8)(U), (int)(R))
2892 
2893 
2894 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2895 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2896 {
2897  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2898  (__v8df) __B,
2899  (__v8df) __C,
2900  (__mmask8) -1,
2902 }
2903 
2904 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2905 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2906 {
2907  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2908  (__v8df) __B,
2909  (__v8df) __C,
2910  (__mmask8) __U,
2912 }
2913 
2914 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2915 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2916 {
2917  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2918  (__v8df) __B,
2919  (__v8df) __C,
2920  (__mmask8) __U,
2922 }
2923 
2924 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2925 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2926 {
2927  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2928  (__v8df) __B,
2929  (__v8df) __C,
2930  (__mmask8) __U,
2932 }
2933 
2934 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2935 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2936 {
2937  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2938  (__v8df) __B,
2939  -(__v8df) __C,
2940  (__mmask8) -1,
2942 }
2943 
2944 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2945 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2946 {
2947  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2948  (__v8df) __B,
2949  -(__v8df) __C,
2950  (__mmask8) __U,
2952 }
2953 
2954 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2955 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2956 {
2957  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2958  (__v8df) __B,
2959  -(__v8df) __C,
2960  (__mmask8) __U,
2962 }
2963 
2964 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
2965  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2966  (__v16sf)(__m512)(B), \
2967  (__v16sf)(__m512)(C), \
2968  (__mmask16)-1, (int)(R))
2969 
2970 
2971 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2972  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2973  (__v16sf)(__m512)(B), \
2974  (__v16sf)(__m512)(C), \
2975  (__mmask16)(U), (int)(R))
2976 
2977 
2978 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2979  (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2980  (__v16sf)(__m512)(B), \
2981  (__v16sf)(__m512)(C), \
2982  (__mmask16)(U), (int)(R))
2983 
2984 
2985 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2986  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2987  (__v16sf)(__m512)(B), \
2988  (__v16sf)(__m512)(C), \
2989  (__mmask16)(U), (int)(R))
2990 
2991 
2992 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
2993  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2994  (__v16sf)(__m512)(B), \
2995  -(__v16sf)(__m512)(C), \
2996  (__mmask16)-1, (int)(R))
2997 
2998 
2999 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3000  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3001  (__v16sf)(__m512)(B), \
3002  -(__v16sf)(__m512)(C), \
3003  (__mmask16)(U), (int)(R))
3004 
3005 
3006 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3007  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3008  (__v16sf)(__m512)(B), \
3009  -(__v16sf)(__m512)(C), \
3010  (__mmask16)(U), (int)(R))
3011 
3012 
3013 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3014 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3015 {
3016  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3017  (__v16sf) __B,
3018  (__v16sf) __C,
3019  (__mmask16) -1,
3021 }
3022 
3023 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3024 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3025 {
3026  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3027  (__v16sf) __B,
3028  (__v16sf) __C,
3029  (__mmask16) __U,
3031 }
3032 
3033 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3034 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3035 {
3036  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3037  (__v16sf) __B,
3038  (__v16sf) __C,
3039  (__mmask16) __U,
3041 }
3042 
3043 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3044 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3045 {
3046  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3047  (__v16sf) __B,
3048  (__v16sf) __C,
3049  (__mmask16) __U,
3051 }
3052 
3053 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3054 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3055 {
3056  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3057  (__v16sf) __B,
3058  -(__v16sf) __C,
3059  (__mmask16) -1,
3061 }
3062 
3063 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3064 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3065 {
3066  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3067  (__v16sf) __B,
3068  -(__v16sf) __C,
3069  (__mmask16) __U,
3071 }
3072 
3073 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3074 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3075 {
3076  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3077  (__v16sf) __B,
3078  -(__v16sf) __C,
3079  (__mmask16) __U,
3081 }
3082 
3083 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3084  (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3085  (__v8df)(__m512d)(B), \
3086  (__v8df)(__m512d)(C), \
3087  (__mmask8)(U), (int)(R))
3088 
3089 
3090 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3091 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3092 {
3093  return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3094  (__v8df) __B,
3095  (__v8df) __C,
3096  (__mmask8) __U,
3098 }
3099 
3100 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3101  (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3102  (__v16sf)(__m512)(B), \
3103  (__v16sf)(__m512)(C), \
3104  (__mmask16)(U), (int)(R))
3105 
3106 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3107 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3108 {
3109  return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3110  (__v16sf) __B,
3111  (__v16sf) __C,
3112  (__mmask16) __U,
3114 }
3115 
3116 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3117  (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3118  (__v8df)(__m512d)(B), \
3119  (__v8df)(__m512d)(C), \
3120  (__mmask8)(U), (int)(R))
3121 
3122 
3123 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3124 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3125 {
3126  return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3127  (__v8df) __B,
3128  (__v8df) __C,
3129  (__mmask8) __U,
3131 }
3132 
3133 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3134  (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3135  (__v16sf)(__m512)(B), \
3136  (__v16sf)(__m512)(C), \
3137  (__mmask16)(U), (int)(R))
3138 
3139 
3140 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3141 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3142 {
3143  return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3144  (__v16sf) __B,
3145  (__v16sf) __C,
3146  (__mmask16) __U,
3148 }
3149 
3150 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3151  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3152  -(__v8df)(__m512d)(B), \
3153  (__v8df)(__m512d)(C), \
3154  (__mmask8)(U), (int)(R))
3155 
3156 
3157 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3158 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3159 {
3160  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3161  -(__v8df) __B,
3162  (__v8df) __C,
3163  (__mmask8) __U,
3165 }
3166 
3167 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3168  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3169  -(__v16sf)(__m512)(B), \
3170  (__v16sf)(__m512)(C), \
3171  (__mmask16)(U), (int)(R))
3172 
3173 
3174 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3175 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3176 {
3177  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3178  -(__v16sf) __B,
3179  (__v16sf) __C,
3180  (__mmask16) __U,
3182 }
3183 
3184 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3185  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3186  -(__v8df)(__m512d)(B), \
3187  -(__v8df)(__m512d)(C), \
3188  (__mmask8)(U), (int)(R))
3189 
3190 
3191 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3192  (__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3193  (__v8df)(__m512d)(B), \
3194  (__v8df)(__m512d)(C), \
3195  (__mmask8)(U), (int)(R))
3196 
3197 
3198 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3199 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3200 {
3201  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3202  -(__v8df) __B,
3203  -(__v8df) __C,
3204  (__mmask8) __U,
3206 }
3207 
3208 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3209 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3210 {
3211  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
3212  (__v8df) __B,
3213  (__v8df) __C,
3214  (__mmask8) __U,
3216 }
3217 
3218 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3219  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3220  -(__v16sf)(__m512)(B), \
3221  -(__v16sf)(__m512)(C), \
3222  (__mmask16)(U), (int)(R))
3223 
3224 
3225 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3226  (__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3227  (__v16sf)(__m512)(B), \
3228  (__v16sf)(__m512)(C), \
3229  (__mmask16)(U), (int)(R))
3230 
3231 
3232 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3233 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3234 {
3235  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3236  -(__v16sf) __B,
3237  -(__v16sf) __C,
3238  (__mmask16) __U,
3240 }
3241 
3242 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3243 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3244 {
3245  return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
3246  (__v16sf) __B,
3247  (__v16sf) __C,
3248  (__mmask16) __U,
3250 }
3251 
3252 
3253 
3254 /* Vector permutations */
3255 
3256 static __inline __m512i __DEFAULT_FN_ATTRS512
3257 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3258 {
3259  return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3260  (__v16si) __B);
3261 }
3262 
3263 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3264 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3265  __m512i __B)
3266 {
3267  return (__m512i)__builtin_ia32_selectd_512(__U,
3268  (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3269  (__v16si)__A);
3270 }
3271 
3272 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3273 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3274  __m512i __B)
3275 {
3276  return (__m512i)__builtin_ia32_selectd_512(__U,
3277  (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3278  (__v16si)__I);
3279 }
3280 
3281 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3282 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3283  __m512i __B)
3284 {
3285  return (__m512i)__builtin_ia32_selectd_512(__U,
3286  (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3287  (__v16si)_mm512_setzero_si512());
3288 }
3289 
3290 static __inline __m512i __DEFAULT_FN_ATTRS512
3291 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3292 {
3293  return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3294  (__v8di) __B);
3295 }
3296 
3297 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3298 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3299  __m512i __B)
3300 {
3301  return (__m512i)__builtin_ia32_selectq_512(__U,
3302  (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3303  (__v8di)__A);
3304 }
3305 
3306 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3307 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
3308  __m512i __B)
3309 {
3310  return (__m512i)__builtin_ia32_selectq_512(__U,
3311  (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3312  (__v8di)__I);
3313 }
3314 
3315 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3316 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3317  __m512i __B)
3318 {
3319  return (__m512i)__builtin_ia32_selectq_512(__U,
3320  (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3321  (__v8di)_mm512_setzero_si512());
3322 }
3323 
3324 #define _mm512_alignr_epi64(A, B, I) \
3325  (__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3326  (__v8di)(__m512i)(B), (int)(I))
3327 
3328 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3329  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3330  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3331  (__v8di)(__m512i)(W))
3332 
3333 #define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3334  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3335  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3336  (__v8di)_mm512_setzero_si512())
3337 
3338 #define _mm512_alignr_epi32(A, B, I) \
3339  (__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3340  (__v16si)(__m512i)(B), (int)(I))
3341 
3342 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3343  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3344  (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3345  (__v16si)(__m512i)(W))
3346 
3347 #define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3348  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3349  (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3350  (__v16si)_mm512_setzero_si512())
3351 /* Vector Extract */
3352 
3353 #define _mm512_extractf64x4_pd(A, I) \
3354  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3355  (__v4df)_mm256_undefined_pd(), \
3356  (__mmask8)-1)
3357 
3358 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3359  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3360  (__v4df)(__m256d)(W), \
3361  (__mmask8)(U))
3362 
3363 #define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3364  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3365  (__v4df)_mm256_setzero_pd(), \
3366  (__mmask8)(U))
3367 
3368 #define _mm512_extractf32x4_ps(A, I) \
3369  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3370  (__v4sf)_mm_undefined_ps(), \
3371  (__mmask8)-1)
3372 
3373 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3374  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3375  (__v4sf)(__m128)(W), \
3376  (__mmask8)(U))
3377 
3378 #define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3379  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3380  (__v4sf)_mm_setzero_ps(), \
3381  (__mmask8)(U))
3382 
3383 /* Vector Blend */
3384 
3385 static __inline __m512d __DEFAULT_FN_ATTRS512
3386 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3387 {
3388  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3389  (__v8df) __W,
3390  (__v8df) __A);
3391 }
3392 
3393 static __inline __m512 __DEFAULT_FN_ATTRS512
3394 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3395 {
3396  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3397  (__v16sf) __W,
3398  (__v16sf) __A);
3399 }
3400 
3401 static __inline __m512i __DEFAULT_FN_ATTRS512
3402 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3403 {
3404  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3405  (__v8di) __W,
3406  (__v8di) __A);
3407 }
3408 
3409 static __inline __m512i __DEFAULT_FN_ATTRS512
3410 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3411 {
3412  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3413  (__v16si) __W,
3414  (__v16si) __A);
3415 }
3416 
3417 /* Compare */
3418 
3419 #define _mm512_cmp_round_ps_mask(A, B, P, R) \
3420  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3421  (__v16sf)(__m512)(B), (int)(P), \
3422  (__mmask16)-1, (int)(R))
3423 
3424 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3425  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3426  (__v16sf)(__m512)(B), (int)(P), \
3427  (__mmask16)(U), (int)(R))
3428 
3429 #define _mm512_cmp_ps_mask(A, B, P) \
3430  _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3431 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3432  _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3433 
3434 #define _mm512_cmpeq_ps_mask(A, B) \
3435  _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3436 #define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3437  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3438 
3439 #define _mm512_cmplt_ps_mask(A, B) \
3440  _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3441 #define _mm512_mask_cmplt_ps_mask(k, A, B) \
3442  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3443 
3444 #define _mm512_cmple_ps_mask(A, B) \
3445  _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3446 #define _mm512_mask_cmple_ps_mask(k, A, B) \
3447  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3448 
3449 #define _mm512_cmpunord_ps_mask(A, B) \
3450  _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3451 #define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3452  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3453 
3454 #define _mm512_cmpneq_ps_mask(A, B) \
3455  _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3456 #define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3457  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3458 
3459 #define _mm512_cmpnlt_ps_mask(A, B) \
3460  _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3461 #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3462  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3463 
3464 #define _mm512_cmpnle_ps_mask(A, B) \
3465  _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3466 #define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3467  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3468 
3469 #define _mm512_cmpord_ps_mask(A, B) \
3470  _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3471 #define _mm512_mask_cmpord_ps_mask(k, A, B) \
3472  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3473 
3474 #define _mm512_cmp_round_pd_mask(A, B, P, R) \
3475  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3476  (__v8df)(__m512d)(B), (int)(P), \
3477  (__mmask8)-1, (int)(R))
3478 
3479 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3480  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3481  (__v8df)(__m512d)(B), (int)(P), \
3482  (__mmask8)(U), (int)(R))
3483 
3484 #define _mm512_cmp_pd_mask(A, B, P) \
3485  _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3486 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3487  _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3488 
3489 #define _mm512_cmpeq_pd_mask(A, B) \
3490  _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3491 #define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3492  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3493 
3494 #define _mm512_cmplt_pd_mask(A, B) \
3495  _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3496 #define _mm512_mask_cmplt_pd_mask(k, A, B) \
3497  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3498 
3499 #define _mm512_cmple_pd_mask(A, B) \
3500  _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3501 #define _mm512_mask_cmple_pd_mask(k, A, B) \
3502  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3503 
3504 #define _mm512_cmpunord_pd_mask(A, B) \
3505  _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3506 #define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3507  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3508 
3509 #define _mm512_cmpneq_pd_mask(A, B) \
3510  _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3511 #define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3512  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3513 
3514 #define _mm512_cmpnlt_pd_mask(A, B) \
3515  _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3516 #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3517  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3518 
3519 #define _mm512_cmpnle_pd_mask(A, B) \
3520  _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3521 #define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3522  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3523 
3524 #define _mm512_cmpord_pd_mask(A, B) \
3525  _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3526 #define _mm512_mask_cmpord_pd_mask(k, A, B) \
3527  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3528 
3529 /* Conversion */
3530 
3531 #define _mm512_cvtt_roundps_epu32(A, R) \
3532  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3533  (__v16si)_mm512_undefined_epi32(), \
3534  (__mmask16)-1, (int)(R))
3535 
3536 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3537  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3538  (__v16si)(__m512i)(W), \
3539  (__mmask16)(U), (int)(R))
3540 
3541 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3542  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3543  (__v16si)_mm512_setzero_si512(), \
3544  (__mmask16)(U), (int)(R))
3545 
3546 
3547 static __inline __m512i __DEFAULT_FN_ATTRS512
3549 {
3550  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3551  (__v16si)
3553  (__mmask16) -1,
3555 }
3556 
3557 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3558 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3559 {
3560  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3561  (__v16si) __W,
3562  (__mmask16) __U,
3564 }
3565 
3566 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3567 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3568 {
3569  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3570  (__v16si) _mm512_setzero_si512 (),
3571  (__mmask16) __U,
3573 }
3574 
3575 #define _mm512_cvt_roundepi32_ps(A, R) \
3576  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3577  (__v16sf)_mm512_setzero_ps(), \
3578  (__mmask16)-1, (int)(R))
3579 
3580 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3581  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3582  (__v16sf)(__m512)(W), \
3583  (__mmask16)(U), (int)(R))
3584 
3585 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3586  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3587  (__v16sf)_mm512_setzero_ps(), \
3588  (__mmask16)(U), (int)(R))
3589 
3590 #define _mm512_cvt_roundepu32_ps(A, R) \
3591  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3592  (__v16sf)_mm512_setzero_ps(), \
3593  (__mmask16)-1, (int)(R))
3594 
3595 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3596  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3597  (__v16sf)(__m512)(W), \
3598  (__mmask16)(U), (int)(R))
3599 
3600 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3601  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3602  (__v16sf)_mm512_setzero_ps(), \
3603  (__mmask16)(U), (int)(R))
3604 
3605 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3606 _mm512_cvtepu32_ps (__m512i __A)
3607 {
3608  return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3609 }
3610 
3611 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3612 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3613 {
3614  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3615  (__v16sf)_mm512_cvtepu32_ps(__A),
3616  (__v16sf)__W);
3617 }
3618 
3619 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3620 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3621 {
3622  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3623  (__v16sf)_mm512_cvtepu32_ps(__A),
3624  (__v16sf)_mm512_setzero_ps());
3625 }
3626 
3627 static __inline __m512d __DEFAULT_FN_ATTRS512
3629 {
3630  return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3631 }
3632 
3633 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3634 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3635 {
3636  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3637  (__v8df)_mm512_cvtepi32_pd(__A),
3638  (__v8df)__W);
3639 }
3640 
3641 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3642 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3643 {
3644  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3645  (__v8df)_mm512_cvtepi32_pd(__A),
3646  (__v8df)_mm512_setzero_pd());
3647 }
3648 
3649 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3651 {
3652  return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3653 }
3654 
3655 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3656 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3657 {
3658  return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3659 }
3660 
3661 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3662 _mm512_cvtepi32_ps (__m512i __A)
3663 {
3664  return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3665 }
3666 
3667 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3668 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3669 {
3670  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3671  (__v16sf)_mm512_cvtepi32_ps(__A),
3672  (__v16sf)__W);
3673 }
3674 
3675 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3676 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3677 {
3678  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3679  (__v16sf)_mm512_cvtepi32_ps(__A),
3680  (__v16sf)_mm512_setzero_ps());
3681 }
3682 
3683 static __inline __m512d __DEFAULT_FN_ATTRS512
3685 {
3686  return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3687 }
3688 
3689 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3690 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3691 {
3692  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3693  (__v8df)_mm512_cvtepu32_pd(__A),
3694  (__v8df)__W);
3695 }
3696 
3697 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3698 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3699 {
3700  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3701  (__v8df)_mm512_cvtepu32_pd(__A),
3702  (__v8df)_mm512_setzero_pd());
3703 }
3704 
3705 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3707 {
3708  return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3709 }
3710 
3711 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3712 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3713 {
3714  return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3715 }
3716 
3717 #define _mm512_cvt_roundpd_ps(A, R) \
3718  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3719  (__v8sf)_mm256_setzero_ps(), \
3720  (__mmask8)-1, (int)(R))
3721 
3722 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3723  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3724  (__v8sf)(__m256)(W), (__mmask8)(U), \
3725  (int)(R))
3726 
3727 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3728  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3729  (__v8sf)_mm256_setzero_ps(), \
3730  (__mmask8)(U), (int)(R))
3731 
3732 static __inline__ __m256 __DEFAULT_FN_ATTRS512
3733 _mm512_cvtpd_ps (__m512d __A)
3734 {
3735  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3736  (__v8sf) _mm256_undefined_ps (),
3737  (__mmask8) -1,
3739 }
3740 
3741 static __inline__ __m256 __DEFAULT_FN_ATTRS512
3742 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3743 {
3744  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3745  (__v8sf) __W,
3746  (__mmask8) __U,
3748 }
3749 
3750 static __inline__ __m256 __DEFAULT_FN_ATTRS512
3751 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
3752 {
3753  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3754  (__v8sf) _mm256_setzero_ps (),
3755  (__mmask8) __U,
3757 }
3758 
3759 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3760 _mm512_cvtpd_pslo (__m512d __A)
3761 {
3762  return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3763  (__v8sf) _mm256_setzero_ps (),
3764  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3765 }
3766 
3767 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3768 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3769 {
3770  return (__m512) __builtin_shufflevector (
3772  __U, __A),
3773  (__v8sf) _mm256_setzero_ps (),
3774  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3775 }
3776 
3777 #define _mm512_cvt_roundps_ph(A, I) \
3778  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3779  (__v16hi)_mm256_undefined_si256(), \
3780  (__mmask16)-1)
3781 
3782 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3783  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3784  (__v16hi)(__m256i)(U), \
3785  (__mmask16)(W))
3786 
3787 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3788  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3789  (__v16hi)_mm256_setzero_si256(), \
3790  (__mmask16)(W))
3791 
3792 #define _mm512_cvtps_ph(A, I) \
3793  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3794  (__v16hi)_mm256_setzero_si256(), \
3795  (__mmask16)-1)
3796 
3797 #define _mm512_mask_cvtps_ph(U, W, A, I) \
3798  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3799  (__v16hi)(__m256i)(U), \
3800  (__mmask16)(W))
3801 
3802 #define _mm512_maskz_cvtps_ph(W, A, I) \
3803  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3804  (__v16hi)_mm256_setzero_si256(), \
3805  (__mmask16)(W))
3806 
3807 #define _mm512_cvt_roundph_ps(A, R) \
3808  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3809  (__v16sf)_mm512_undefined_ps(), \
3810  (__mmask16)-1, (int)(R))
3811 
3812 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3813  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3814  (__v16sf)(__m512)(W), \
3815  (__mmask16)(U), (int)(R))
3816 
3817 #define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3818  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3819  (__v16sf)_mm512_setzero_ps(), \
3820  (__mmask16)(U), (int)(R))
3821 
3822 
3823 static __inline __m512 __DEFAULT_FN_ATTRS512
3824 _mm512_cvtph_ps(__m256i __A)
3825 {
3826  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3827  (__v16sf)
3828  _mm512_setzero_ps (),
3829  (__mmask16) -1,
3831 }
3832 
3833 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3834 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3835 {
3836  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3837  (__v16sf) __W,
3838  (__mmask16) __U,
3840 }
3841 
3842 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3843 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
3844 {
3845  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3846  (__v16sf) _mm512_setzero_ps (),
3847  (__mmask16) __U,
3849 }
3850 
3851 #define _mm512_cvtt_roundpd_epi32(A, R) \
3852  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3853  (__v8si)_mm256_setzero_si256(), \
3854  (__mmask8)-1, (int)(R))
3855 
3856 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3857  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3858  (__v8si)(__m256i)(W), \
3859  (__mmask8)(U), (int)(R))
3860 
3861 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3862  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3863  (__v8si)_mm256_setzero_si256(), \
3864  (__mmask8)(U), (int)(R))
3865 
3866 static __inline __m256i __DEFAULT_FN_ATTRS512
3868 {
3869  return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3870  (__v8si)_mm256_setzero_si256(),
3871  (__mmask8) -1,
3873 }
3874 
3875 static __inline__ __m256i __DEFAULT_FN_ATTRS512
3876 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3877 {
3878  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3879  (__v8si) __W,
3880  (__mmask8) __U,
3882 }
3883 
3884 static __inline__ __m256i __DEFAULT_FN_ATTRS512
3885 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
3886 {
3887  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3888  (__v8si) _mm256_setzero_si256 (),
3889  (__mmask8) __U,
3891 }
3892 
3893 #define _mm512_cvtt_roundps_epi32(A, R) \
3894  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3895  (__v16si)_mm512_setzero_si512(), \
3896  (__mmask16)-1, (int)(R))
3897 
3898 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3899  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3900  (__v16si)(__m512i)(W), \
3901  (__mmask16)(U), (int)(R))
3902 
3903 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3904  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3905  (__v16si)_mm512_setzero_si512(), \
3906  (__mmask16)(U), (int)(R))
3907 
3908 static __inline __m512i __DEFAULT_FN_ATTRS512
3910 {
3911  return (__m512i)
3912  __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3913  (__v16si) _mm512_setzero_si512 (),
3914  (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
3915 }
3916 
3917 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3918 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3919 {
3920  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3921  (__v16si) __W,
3922  (__mmask16) __U,
3924 }
3925 
3926 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3927 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
3928 {
3929  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3930  (__v16si) _mm512_setzero_si512 (),
3931  (__mmask16) __U,
3933 }
3934 
3935 #define _mm512_cvt_roundps_epi32(A, R) \
3936  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3937  (__v16si)_mm512_setzero_si512(), \
3938  (__mmask16)-1, (int)(R))
3939 
3940 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3941  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3942  (__v16si)(__m512i)(W), \
3943  (__mmask16)(U), (int)(R))
3944 
3945 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3946  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3947  (__v16si)_mm512_setzero_si512(), \
3948  (__mmask16)(U), (int)(R))
3949 
3950 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3952 {
3953  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3954  (__v16si) _mm512_undefined_epi32 (),
3955  (__mmask16) -1,
3957 }
3958 
3959 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3960 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3961 {
3962  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3963  (__v16si) __W,
3964  (__mmask16) __U,
3966 }
3967 
3968 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3969 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
3970 {
3971  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3972  (__v16si)
3974  (__mmask16) __U,
3976 }
3977 
3978 #define _mm512_cvt_roundpd_epi32(A, R) \
3979  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3980  (__v8si)_mm256_setzero_si256(), \
3981  (__mmask8)-1, (int)(R))
3982 
3983 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3984  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3985  (__v8si)(__m256i)(W), \
3986  (__mmask8)(U), (int)(R))
3987 
3988 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3989  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3990  (__v8si)_mm256_setzero_si256(), \
3991  (__mmask8)(U), (int)(R))
3992 
3993 static __inline__ __m256i __DEFAULT_FN_ATTRS512
3994 _mm512_cvtpd_epi32 (__m512d __A)
3995 {
3996  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3997  (__v8si)
3999  (__mmask8) -1,
4001 }
4002 
4003 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4004 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4005 {
4006  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4007  (__v8si) __W,
4008  (__mmask8) __U,
4010 }
4011 
4012 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4013 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
4014 {
4015  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4016  (__v8si)
4018  (__mmask8) __U,
4020 }
4021 
4022 #define _mm512_cvt_roundps_epu32(A, R) \
4023  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4024  (__v16si)_mm512_setzero_si512(), \
4025  (__mmask16)-1, (int)(R))
4026 
4027 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
4028  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4029  (__v16si)(__m512i)(W), \
4030  (__mmask16)(U), (int)(R))
4031 
4032 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
4033  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4034  (__v16si)_mm512_setzero_si512(), \
4035  (__mmask16)(U), (int)(R))
4036 
4037 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4038 _mm512_cvtps_epu32 ( __m512 __A)
4039 {
4040  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4041  (__v16si)\
4043  (__mmask16) -1,\
4045 }
4046 
4047 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4048 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4049 {
4050  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4051  (__v16si) __W,
4052  (__mmask16) __U,
4054 }
4055 
4056 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4057 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4058 {
4059  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4060  (__v16si)
4062  (__mmask16) __U ,
4064 }
4065 
4066 #define _mm512_cvt_roundpd_epu32(A, R) \
4067  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4068  (__v8si)_mm256_setzero_si256(), \
4069  (__mmask8)-1, (int)(R))
4070 
4071 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
4072  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4073  (__v8si)(__m256i)(W), \
4074  (__mmask8)(U), (int)(R))
4075 
4076 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
4077  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4078  (__v8si)_mm256_setzero_si256(), \
4079  (__mmask8)(U), (int)(R))
4080 
4081 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4082 _mm512_cvtpd_epu32 (__m512d __A)
4083 {
4084  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4085  (__v8si)
4087  (__mmask8) -1,
4089 }
4090 
4091 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4092 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4093 {
4094  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4095  (__v8si) __W,
4096  (__mmask8) __U,
4098 }
4099 
4100 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4101 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4102 {
4103  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4104  (__v8si)
4106  (__mmask8) __U,
4108 }
4109 
4110 static __inline__ double __DEFAULT_FN_ATTRS512
4112 {
4113  return __a[0];
4114 }
4115 
4116 static __inline__ float __DEFAULT_FN_ATTRS512
4118 {
4119  return __a[0];
4120 }
4121 
4122 /* Unpack and Interleave */
4123 
4124 static __inline __m512d __DEFAULT_FN_ATTRS512
4125 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
4126 {
4127  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4128  1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4129 }
4130 
4131 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4132 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4133 {
4134  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4135  (__v8df)_mm512_unpackhi_pd(__A, __B),
4136  (__v8df)__W);
4137 }
4138 
4139 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4140 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4141 {
4142  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4143  (__v8df)_mm512_unpackhi_pd(__A, __B),
4144  (__v8df)_mm512_setzero_pd());
4145 }
4146 
4147 static __inline __m512d __DEFAULT_FN_ATTRS512
4148 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
4149 {
4150  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4151  0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4152 }
4153 
4154 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4155 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4156 {
4157  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4158  (__v8df)_mm512_unpacklo_pd(__A, __B),
4159  (__v8df)__W);
4160 }
4161 
4162 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4163 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4164 {
4165  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4166  (__v8df)_mm512_unpacklo_pd(__A, __B),
4167  (__v8df)_mm512_setzero_pd());
4168 }
4169 
4170 static __inline __m512 __DEFAULT_FN_ATTRS512
4171 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
4172 {
4173  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4174  2, 18, 3, 19,
4175  2+4, 18+4, 3+4, 19+4,
4176  2+8, 18+8, 3+8, 19+8,
4177  2+12, 18+12, 3+12, 19+12);
4178 }
4179 
4180 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4181 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4182 {
4183  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4184  (__v16sf)_mm512_unpackhi_ps(__A, __B),
4185  (__v16sf)__W);
4186 }
4187 
4188 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4189 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4190 {
4191  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4192  (__v16sf)_mm512_unpackhi_ps(__A, __B),
4193  (__v16sf)_mm512_setzero_ps());
4194 }
4195 
4196 static __inline __m512 __DEFAULT_FN_ATTRS512
4197 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
4198 {
4199  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4200  0, 16, 1, 17,
4201  0+4, 16+4, 1+4, 17+4,
4202  0+8, 16+8, 1+8, 17+8,
4203  0+12, 16+12, 1+12, 17+12);
4204 }
4205 
4206 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4207 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4208 {
4209  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4210  (__v16sf)_mm512_unpacklo_ps(__A, __B),
4211  (__v16sf)__W);
4212 }
4213 
4214 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4215 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4216 {
4217  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4218  (__v16sf)_mm512_unpacklo_ps(__A, __B),
4219  (__v16sf)_mm512_setzero_ps());
4220 }
4221 
4222 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4223 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4224 {
4225  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4226  2, 18, 3, 19,
4227  2+4, 18+4, 3+4, 19+4,
4228  2+8, 18+8, 3+8, 19+8,
4229  2+12, 18+12, 3+12, 19+12);
4230 }
4231 
4232 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4233 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4234 {
4235  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4236  (__v16si)_mm512_unpackhi_epi32(__A, __B),
4237  (__v16si)__W);
4238 }
4239 
4240 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4241 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4242 {
4243  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4244  (__v16si)_mm512_unpackhi_epi32(__A, __B),
4245  (__v16si)_mm512_setzero_si512());
4246 }
4247 
4248 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4249 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4250 {
4251  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4252  0, 16, 1, 17,
4253  0+4, 16+4, 1+4, 17+4,
4254  0+8, 16+8, 1+8, 17+8,
4255  0+12, 16+12, 1+12, 17+12);
4256 }
4257 
4258 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4259 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4260 {
4261  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4262  (__v16si)_mm512_unpacklo_epi32(__A, __B),
4263  (__v16si)__W);
4264 }
4265 
4266 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4267 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4268 {
4269  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4270  (__v16si)_mm512_unpacklo_epi32(__A, __B),
4271  (__v16si)_mm512_setzero_si512());
4272 }
4273 
4274 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4275 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4276 {
4277  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4278  1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4279 }
4280 
4281 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4282 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4283 {
4284  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4285  (__v8di)_mm512_unpackhi_epi64(__A, __B),
4286  (__v8di)__W);
4287 }
4288 
4289 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4290 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4291 {
4292  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4293  (__v8di)_mm512_unpackhi_epi64(__A, __B),
4294  (__v8di)_mm512_setzero_si512());
4295 }
4296 
4297 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4298 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4299 {
4300  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4301  0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4302 }
4303 
4304 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4305 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4306 {
4307  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4308  (__v8di)_mm512_unpacklo_epi64(__A, __B),
4309  (__v8di)__W);
4310 }
4311 
4312 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4313 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4314 {
4315  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4316  (__v8di)_mm512_unpacklo_epi64(__A, __B),
4317  (__v8di)_mm512_setzero_si512());
4318 }
4319 
4320 
4321 /* SIMD load ops */
4322 
4323 static __inline __m512i __DEFAULT_FN_ATTRS512
4324 _mm512_loadu_si512 (void const *__P)
4325 {
4326  struct __loadu_si512 {
4327  __m512i __v;
4328  } __attribute__((__packed__, __may_alias__));
4329  return ((struct __loadu_si512*)__P)->__v;
4330 }
4331 
4332 static __inline __m512i __DEFAULT_FN_ATTRS512
4333 _mm512_loadu_epi32 (void const *__P)
4334 {
4335  struct __loadu_epi32 {
4336  __m512i __v;
4337  } __attribute__((__packed__, __may_alias__));
4338  return ((struct __loadu_epi32*)__P)->__v;
4339 }
4340 
4341 static __inline __m512i __DEFAULT_FN_ATTRS512
4342 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4343 {
4344  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4345  (__v16si) __W,
4346  (__mmask16) __U);
4347 }
4348 
4349 
4350 static __inline __m512i __DEFAULT_FN_ATTRS512
4351 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
4352 {
4353  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4354  (__v16si)
4356  (__mmask16) __U);
4357 }
4358 
4359 static __inline __m512i __DEFAULT_FN_ATTRS512
4360 _mm512_loadu_epi64 (void const *__P)
4361 {
4362  struct __loadu_epi64 {
4363  __m512i __v;
4364  } __attribute__((__packed__, __may_alias__));
4365  return ((struct __loadu_epi64*)__P)->__v;
4366 }
4367 
4368 static __inline __m512i __DEFAULT_FN_ATTRS512
4369 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4370 {
4371  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4372  (__v8di) __W,
4373  (__mmask8) __U);
4374 }
4375 
4376 static __inline __m512i __DEFAULT_FN_ATTRS512
4377 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
4378 {
4379  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4380  (__v8di)
4382  (__mmask8) __U);
4383 }
4384 
4385 static __inline __m512 __DEFAULT_FN_ATTRS512
4386 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4387 {
4388  return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4389  (__v16sf) __W,
4390  (__mmask16) __U);
4391 }
4392 
4393 static __inline __m512 __DEFAULT_FN_ATTRS512
4394 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
4395 {
4396  return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4397  (__v16sf)
4398  _mm512_setzero_ps (),
4399  (__mmask16) __U);
4400 }
4401 
4402 static __inline __m512d __DEFAULT_FN_ATTRS512
4403 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4404 {
4405  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4406  (__v8df) __W,
4407  (__mmask8) __U);
4408 }
4409 
4410 static __inline __m512d __DEFAULT_FN_ATTRS512
4411 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
4412 {
4413  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4414  (__v8df)
4415  _mm512_setzero_pd (),
4416  (__mmask8) __U);
4417 }
4418 
4419 static __inline __m512d __DEFAULT_FN_ATTRS512
4420 _mm512_loadu_pd(void const *__p)
4421 {
4422  struct __loadu_pd {
4423  __m512d __v;
4424  } __attribute__((__packed__, __may_alias__));
4425  return ((struct __loadu_pd*)__p)->__v;
4426 }
4427 
4428 static __inline __m512 __DEFAULT_FN_ATTRS512
4429 _mm512_loadu_ps(void const *__p)
4430 {
4431  struct __loadu_ps {
4432  __m512 __v;
4433  } __attribute__((__packed__, __may_alias__));
4434  return ((struct __loadu_ps*)__p)->__v;
4435 }
4436 
4437 static __inline __m512 __DEFAULT_FN_ATTRS512
4438 _mm512_load_ps(void const *__p)
4439 {
4440  return *(__m512*)__p;
4441 }
4442 
4443 static __inline __m512 __DEFAULT_FN_ATTRS512
4444 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4445 {
4446  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4447  (__v16sf) __W,
4448  (__mmask16) __U);
4449 }
4450 
4451 static __inline __m512 __DEFAULT_FN_ATTRS512
4452 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4453 {
4454  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4455  (__v16sf)
4456  _mm512_setzero_ps (),
4457  (__mmask16) __U);
4458 }
4459 
4460 static __inline __m512d __DEFAULT_FN_ATTRS512
4461 _mm512_load_pd(void const *__p)
4462 {
4463  return *(__m512d*)__p;
4464 }
4465 
4466 static __inline __m512d __DEFAULT_FN_ATTRS512
4467 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4468 {
4469  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4470  (__v8df) __W,
4471  (__mmask8) __U);
4472 }
4473 
4474 static __inline __m512d __DEFAULT_FN_ATTRS512
4475 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4476 {
4477  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4478  (__v8df)
4479  _mm512_setzero_pd (),
4480  (__mmask8) __U);
4481 }
4482 
4483 static __inline __m512i __DEFAULT_FN_ATTRS512
4484 _mm512_load_si512 (void const *__P)
4485 {
4486  return *(__m512i *) __P;
4487 }
4488 
4489 static __inline __m512i __DEFAULT_FN_ATTRS512
4490 _mm512_load_epi32 (void const *__P)
4491 {
4492  return *(__m512i *) __P;
4493 }
4494 
4495 static __inline __m512i __DEFAULT_FN_ATTRS512
4496 _mm512_load_epi64 (void const *__P)
4497 {
4498  return *(__m512i *) __P;
4499 }
4500 
4501 /* SIMD store ops */
4502 
4503 static __inline void __DEFAULT_FN_ATTRS512
4504 _mm512_storeu_epi64 (void *__P, __m512i __A)
4505 {
4506  struct __storeu_epi64 {
4507  __m512i __v;
4508  } __attribute__((__packed__, __may_alias__));
4509  ((struct __storeu_epi64*)__P)->__v = __A;
4510 }
4511 
4512 static __inline void __DEFAULT_FN_ATTRS512
4513 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4514 {
4515  __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4516  (__mmask8) __U);
4517 }
4518 
4519 static __inline void __DEFAULT_FN_ATTRS512
4520 _mm512_storeu_si512 (void *__P, __m512i __A)
4521 {
4522  struct __storeu_si512 {
4523  __m512i __v;
4524  } __attribute__((__packed__, __may_alias__));
4525  ((struct __storeu_si512*)__P)->__v = __A;
4526 }
4527 
4528 static __inline void __DEFAULT_FN_ATTRS512
4529 _mm512_storeu_epi32 (void *__P, __m512i __A)
4530 {
4531  struct __storeu_epi32 {
4532  __m512i __v;
4533  } __attribute__((__packed__, __may_alias__));
4534  ((struct __storeu_epi32*)__P)->__v = __A;
4535 }
4536 
4537 static __inline void __DEFAULT_FN_ATTRS512
4538 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
4539 {
4540  __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4541  (__mmask16) __U);
4542 }
4543 
4544 static __inline void __DEFAULT_FN_ATTRS512
4545 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4546 {
4547  __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4548 }
4549 
4550 static __inline void __DEFAULT_FN_ATTRS512
4551 _mm512_storeu_pd(void *__P, __m512d __A)
4552 {
4553  struct __storeu_pd {
4554  __m512d __v;
4555  } __attribute__((__packed__, __may_alias__));
4556  ((struct __storeu_pd*)__P)->__v = __A;
4557 }
4558 
4559 static __inline void __DEFAULT_FN_ATTRS512
4560 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4561 {
4562  __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4563  (__mmask16) __U);
4564 }
4565 
4566 static __inline void __DEFAULT_FN_ATTRS512
4567 _mm512_storeu_ps(void *__P, __m512 __A)
4568 {
4569  struct __storeu_ps {
4570  __m512 __v;
4571  } __attribute__((__packed__, __may_alias__));
4572  ((struct __storeu_ps*)__P)->__v = __A;
4573 }
4574 
4575 static __inline void __DEFAULT_FN_ATTRS512
4576 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4577 {
4578  __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4579 }
4580 
4581 static __inline void __DEFAULT_FN_ATTRS512
4582 _mm512_store_pd(void *__P, __m512d __A)
4583 {
4584  *(__m512d*)__P = __A;
4585 }
4586 
4587 static __inline void __DEFAULT_FN_ATTRS512
4588 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4589 {
4590  __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4591  (__mmask16) __U);
4592 }
4593 
4594 static __inline void __DEFAULT_FN_ATTRS512
4595 _mm512_store_ps(void *__P, __m512 __A)
4596 {
4597  *(__m512*)__P = __A;
4598 }
4599 
4600 static __inline void __DEFAULT_FN_ATTRS512
4601 _mm512_store_si512 (void *__P, __m512i __A)
4602 {
4603  *(__m512i *) __P = __A;
4604 }
4605 
4606 static __inline void __DEFAULT_FN_ATTRS512
4607 _mm512_store_epi32 (void *__P, __m512i __A)
4608 {
4609  *(__m512i *) __P = __A;
4610 }
4611 
4612 static __inline void __DEFAULT_FN_ATTRS512
4613 _mm512_store_epi64 (void *__P, __m512i __A)
4614 {
4615  *(__m512i *) __P = __A;
4616 }
4617 
4618 /* Mask ops */
4619 
4620 static __inline __mmask16 __DEFAULT_FN_ATTRS
4621 _mm512_knot(__mmask16 __M)
4622 {
4623  return __builtin_ia32_knothi(__M);
4624 }
4625 
4626 /* Integer compare */
4627 
4628 #define _mm512_cmpeq_epi32_mask(A, B) \
4629  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4630 #define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4631  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4632 #define _mm512_cmpge_epi32_mask(A, B) \
4633  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4634 #define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4635  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4636 #define _mm512_cmpgt_epi32_mask(A, B) \
4637  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4638 #define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4639  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4640 #define _mm512_cmple_epi32_mask(A, B) \
4641  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4642 #define _mm512_mask_cmple_epi32_mask(k, A, B) \
4643  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4644 #define _mm512_cmplt_epi32_mask(A, B) \
4645  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4646 #define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4647  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4648 #define _mm512_cmpneq_epi32_mask(A, B) \
4649  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4650 #define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4651  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4652 
4653 #define _mm512_cmpeq_epu32_mask(A, B) \
4654  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4655 #define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4656  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4657 #define _mm512_cmpge_epu32_mask(A, B) \
4658  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4659 #define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4660  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4661 #define _mm512_cmpgt_epu32_mask(A, B) \
4662  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4663 #define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4664  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4665 #define _mm512_cmple_epu32_mask(A, B) \
4666  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4667 #define _mm512_mask_cmple_epu32_mask(k, A, B) \
4668  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4669 #define _mm512_cmplt_epu32_mask(A, B) \
4670  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4671 #define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4672  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4673 #define _mm512_cmpneq_epu32_mask(A, B) \
4674  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4675 #define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4676  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4677 
4678 #define _mm512_cmpeq_epi64_mask(A, B) \
4679  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4680 #define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4681  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4682 #define _mm512_cmpge_epi64_mask(A, B) \
4683  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4684 #define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4685  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4686 #define _mm512_cmpgt_epi64_mask(A, B) \
4687  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4688 #define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4689  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4690 #define _mm512_cmple_epi64_mask(A, B) \
4691  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4692 #define _mm512_mask_cmple_epi64_mask(k, A, B) \
4693  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4694 #define _mm512_cmplt_epi64_mask(A, B) \
4695  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4696 #define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4697  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4698 #define _mm512_cmpneq_epi64_mask(A, B) \
4699  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4700 #define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4701  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4702 
4703 #define _mm512_cmpeq_epu64_mask(A, B) \
4704  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4705 #define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4706  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4707 #define _mm512_cmpge_epu64_mask(A, B) \
4708  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4709 #define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4710  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4711 #define _mm512_cmpgt_epu64_mask(A, B) \
4712  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4713 #define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4714  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4715 #define _mm512_cmple_epu64_mask(A, B) \
4716  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4717 #define _mm512_mask_cmple_epu64_mask(k, A, B) \
4718  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4719 #define _mm512_cmplt_epu64_mask(A, B) \
4720  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4721 #define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4722  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4723 #define _mm512_cmpneq_epu64_mask(A, B) \
4724  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4725 #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4726  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4727 
4728 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4730 {
4731  /* This function always performs a signed extension, but __v16qi is a char
4732  which may be signed or unsigned, so use __v16qs. */
4733  return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4734 }
4735 
4736 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4737 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4738 {
4739  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4740  (__v16si)_mm512_cvtepi8_epi32(__A),
4741  (__v16si)__W);
4742 }
4743 
4744 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4745 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
4746 {
4747  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4748  (__v16si)_mm512_cvtepi8_epi32(__A),
4749  (__v16si)_mm512_setzero_si512());
4750 }
4751 
4752 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4754 {
4755  /* This function always performs a signed extension, but __v16qi is a char
4756  which may be signed or unsigned, so use __v16qs. */
4757  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4758 }
4759 
4760 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4761 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4762 {
4763  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4764  (__v8di)_mm512_cvtepi8_epi64(__A),
4765  (__v8di)__W);
4766 }
4767 
4768 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4769 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4770 {
4771  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4772  (__v8di)_mm512_cvtepi8_epi64(__A),
4773  (__v8di)_mm512_setzero_si512 ());
4774 }
4775 
4776 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4778 {
4779  return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4780 }
4781 
4782 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4783 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4784 {
4785  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4786  (__v8di)_mm512_cvtepi32_epi64(__X),
4787  (__v8di)__W);
4788 }
4789 
4790 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4791 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
4792 {
4793  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4794  (__v8di)_mm512_cvtepi32_epi64(__X),
4795  (__v8di)_mm512_setzero_si512());
4796 }
4797 
4798 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4800 {
4801  return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4802 }
4803 
4804 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4805 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4806 {
4807  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4808  (__v16si)_mm512_cvtepi16_epi32(__A),
4809  (__v16si)__W);
4810 }
4811 
4812 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4813 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
4814 {
4815  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4816  (__v16si)_mm512_cvtepi16_epi32(__A),
4817  (__v16si)_mm512_setzero_si512 ());
4818 }
4819 
4820 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4822 {
4823  return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4824 }
4825 
4826 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4827 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4828 {
4829  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4830  (__v8di)_mm512_cvtepi16_epi64(__A),
4831  (__v8di)__W);
4832 }
4833 
4834 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4835 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4836 {
4837  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4838  (__v8di)_mm512_cvtepi16_epi64(__A),
4839  (__v8di)_mm512_setzero_si512());
4840 }
4841 
4842 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4844 {
4845  return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4846 }
4847 
4848 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4849 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4850 {
4851  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4852  (__v16si)_mm512_cvtepu8_epi32(__A),
4853  (__v16si)__W);
4854 }
4855 
4856 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4857 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
4858 {
4859  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4860  (__v16si)_mm512_cvtepu8_epi32(__A),
4861  (__v16si)_mm512_setzero_si512());
4862 }
4863 
4864 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4866 {
4867  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4868 }
4869 
4870 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4871 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4872 {
4873  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4874  (__v8di)_mm512_cvtepu8_epi64(__A),
4875  (__v8di)__W);
4876 }
4877 
4878 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4879 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
4880 {
4881  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4882  (__v8di)_mm512_cvtepu8_epi64(__A),
4883  (__v8di)_mm512_setzero_si512());
4884 }
4885 
4886 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4888 {
4889  return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4890 }
4891 
4892 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4893 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4894 {
4895  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4896  (__v8di)_mm512_cvtepu32_epi64(__X),
4897  (__v8di)__W);
4898 }
4899 
4900 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4901 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
4902 {
4903  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4904  (__v8di)_mm512_cvtepu32_epi64(__X),
4905  (__v8di)_mm512_setzero_si512());
4906 }
4907 
4908 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4910 {
4911  return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4912 }
4913 
4914 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4915 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4916 {
4917  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4918  (__v16si)_mm512_cvtepu16_epi32(__A),
4919  (__v16si)__W);
4920 }
4921 
4922 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4923 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
4924 {
4925  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4926  (__v16si)_mm512_cvtepu16_epi32(__A),
4927  (__v16si)_mm512_setzero_si512());
4928 }
4929 
4930 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4932 {
4933  return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4934 }
4935 
4936 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4937 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4938 {
4939  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4940  (__v8di)_mm512_cvtepu16_epi64(__A),
4941  (__v8di)__W);
4942 }
4943 
4944 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4945 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4946 {
4947  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4948  (__v8di)_mm512_cvtepu16_epi64(__A),
4949  (__v8di)_mm512_setzero_si512());
4950 }
4951 
4952 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4953 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4954 {
4955  return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
4956 }
4957 
4958 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4959 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4960 {
4961  return (__m512i)__builtin_ia32_selectd_512(__U,
4962  (__v16si)_mm512_rorv_epi32(__A, __B),
4963  (__v16si)__W);
4964 }
4965 
4966 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4967 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4968 {
4969  return (__m512i)__builtin_ia32_selectd_512(__U,
4970  (__v16si)_mm512_rorv_epi32(__A, __B),
4971  (__v16si)_mm512_setzero_si512());
4972 }
4973 
4974 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4975 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4976 {
4977  return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
4978 }
4979 
4980 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4981 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4982 {
4983  return (__m512i)__builtin_ia32_selectq_512(__U,
4984  (__v8di)_mm512_rorv_epi64(__A, __B),
4985  (__v8di)__W);
4986 }
4987 
4988 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4989 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4990 {
4991  return (__m512i)__builtin_ia32_selectq_512(__U,
4992  (__v8di)_mm512_rorv_epi64(__A, __B),
4993  (__v8di)_mm512_setzero_si512());
4994 }
4995 
4996 
4997 
4998 #define _mm512_cmp_epi32_mask(a, b, p) \
4999  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5000  (__v16si)(__m512i)(b), (int)(p), \
5001  (__mmask16)-1)
5002 
5003 #define _mm512_cmp_epu32_mask(a, b, p) \
5004  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5005  (__v16si)(__m512i)(b), (int)(p), \
5006  (__mmask16)-1)
5007 
5008 #define _mm512_cmp_epi64_mask(a, b, p) \
5009  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5010  (__v8di)(__m512i)(b), (int)(p), \
5011  (__mmask8)-1)
5012 
5013 #define _mm512_cmp_epu64_mask(a, b, p) \
5014  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5015  (__v8di)(__m512i)(b), (int)(p), \
5016  (__mmask8)-1)
5017 
5018 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
5019  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5020  (__v16si)(__m512i)(b), (int)(p), \
5021  (__mmask16)(m))
5022 
5023 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
5024  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5025  (__v16si)(__m512i)(b), (int)(p), \
5026  (__mmask16)(m))
5027 
5028 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
5029  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5030  (__v8di)(__m512i)(b), (int)(p), \
5031  (__mmask8)(m))
5032 
5033 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
5034  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5035  (__v8di)(__m512i)(b), (int)(p), \
5036  (__mmask8)(m))
5037 
5038 #define _mm512_rol_epi32(a, b) \
5039  (__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b))
5040 
5041 #define _mm512_mask_rol_epi32(W, U, a, b) \
5042  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5043  (__v16si)_mm512_rol_epi32((a), (b)), \
5044  (__v16si)(__m512i)(W))
5045 
5046 #define _mm512_maskz_rol_epi32(U, a, b) \
5047  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5048  (__v16si)_mm512_rol_epi32((a), (b)), \
5049  (__v16si)_mm512_setzero_si512())
5050 
5051 #define _mm512_rol_epi64(a, b) \
5052  (__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b))
5053 
5054 #define _mm512_mask_rol_epi64(W, U, a, b) \
5055  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5056  (__v8di)_mm512_rol_epi64((a), (b)), \
5057  (__v8di)(__m512i)(W))
5058 
5059 #define _mm512_maskz_rol_epi64(U, a, b) \
5060  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5061  (__v8di)_mm512_rol_epi64((a), (b)), \
5062  (__v8di)_mm512_setzero_si512())
5063 
5064 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5065 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
5066 {
5067  return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
5068 }
5069 
5070 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5071 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5072 {
5073  return (__m512i)__builtin_ia32_selectd_512(__U,
5074  (__v16si)_mm512_rolv_epi32(__A, __B),
5075  (__v16si)__W);
5076 }
5077 
5078 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5079 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5080 {
5081  return (__m512i)__builtin_ia32_selectd_512(__U,
5082  (__v16si)_mm512_rolv_epi32(__A, __B),
5083  (__v16si)_mm512_setzero_si512());
5084 }
5085 
5086 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5087 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
5088 {
5089  return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
5090 }
5091 
5092 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5093 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5094 {
5095  return (__m512i)__builtin_ia32_selectq_512(__U,
5096  (__v8di)_mm512_rolv_epi64(__A, __B),
5097  (__v8di)__W);
5098 }
5099 
5100 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5101 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5102 {
5103  return (__m512i)__builtin_ia32_selectq_512(__U,
5104  (__v8di)_mm512_rolv_epi64(__A, __B),
5105  (__v8di)_mm512_setzero_si512());
5106 }
5107 
5108 #define _mm512_ror_epi32(A, B) \
5109  (__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B))
5110 
5111 #define _mm512_mask_ror_epi32(W, U, A, B) \
5112  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5113  (__v16si)_mm512_ror_epi32((A), (B)), \
5114  (__v16si)(__m512i)(W))
5115 
5116 #define _mm512_maskz_ror_epi32(U, A, B) \
5117  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5118  (__v16si)_mm512_ror_epi32((A), (B)), \
5119  (__v16si)_mm512_setzero_si512())
5120 
5121 #define _mm512_ror_epi64(A, B) \
5122  (__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B))
5123 
5124 #define _mm512_mask_ror_epi64(W, U, A, B) \
5125  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5126  (__v8di)_mm512_ror_epi64((A), (B)), \
5127  (__v8di)(__m512i)(W))
5128 
5129 #define _mm512_maskz_ror_epi64(U, A, B) \
5130  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5131  (__v8di)_mm512_ror_epi64((A), (B)), \
5132  (__v8di)_mm512_setzero_si512())
5133 
5134 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5135 _mm512_slli_epi32(__m512i __A, int __B)
5136 {
5137  return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5138 }
5139 
5140 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5141 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5142 {
5143  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5144  (__v16si)_mm512_slli_epi32(__A, __B),
5145  (__v16si)__W);
5146 }
5147 
5148 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5149 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) {
5150  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5151  (__v16si)_mm512_slli_epi32(__A, __B),
5152  (__v16si)_mm512_setzero_si512());
5153 }
5154 
5155 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5156 _mm512_slli_epi64(__m512i __A, int __B)
5157 {
5158  return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5159 }
5160 
5161 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5162 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5163 {
5164  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5165  (__v8di)_mm512_slli_epi64(__A, __B),
5166  (__v8di)__W);
5167 }
5168 
5169 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5170 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
5171 {
5172  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5173  (__v8di)_mm512_slli_epi64(__A, __B),
5174  (__v8di)_mm512_setzero_si512());
5175 }
5176 
5177 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5178 _mm512_srli_epi32(__m512i __A, int __B)
5179 {
5180  return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5181 }
5182 
5183 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5184 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5185 {
5186  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5187  (__v16si)_mm512_srli_epi32(__A, __B),
5188  (__v16si)__W);
5189 }
5190 
5191 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5192 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) {
5193  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5194  (__v16si)_mm512_srli_epi32(__A, __B),
5195  (__v16si)_mm512_setzero_si512());
5196 }
5197 
5198 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5199 _mm512_srli_epi64(__m512i __A, int __B)
5200 {
5201  return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5202 }
5203 
5204 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5205 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5206 {
5207  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5208  (__v8di)_mm512_srli_epi64(__A, __B),
5209  (__v8di)__W);
5210 }
5211 
5212 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5213 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
5214 {
5215  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5216  (__v8di)_mm512_srli_epi64(__A, __B),
5217  (__v8di)_mm512_setzero_si512());
5218 }
5219 
5220 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5221 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5222 {
5223  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5224  (__v16si) __W,
5225  (__mmask16) __U);
5226 }
5227 
5228 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5229 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
5230 {
5231  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5232  (__v16si)
5234  (__mmask16) __U);
5235 }
5236 
5237 static __inline__ void __DEFAULT_FN_ATTRS512
5238 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5239 {
5240  __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5241  (__mmask16) __U);
5242 }
5243 
5244 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5245 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5246 {
5247  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5248  (__v16si) __A,
5249  (__v16si) __W);
5250 }
5251 
5252 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5253 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
5254 {
5255  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5256  (__v16si) __A,
5257  (__v16si) _mm512_setzero_si512 ());
5258 }
5259 
5260 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5261 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5262 {
5263  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5264  (__v8di) __A,
5265  (__v8di) __W);
5266 }
5267 
5268 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5269 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
5270 {
5271  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5272  (__v8di) __A,
5273  (__v8di) _mm512_setzero_si512 ());
5274 }
5275 
5276 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5277 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5278 {
5279  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5280  (__v8di) __W,
5281  (__mmask8) __U);
5282 }
5283 
5284 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5285 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5286 {
5287  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5288  (__v8di)
5290  (__mmask8) __U);
5291 }
5292 
5293 static __inline__ void __DEFAULT_FN_ATTRS512
5294 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5295 {
5296  __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5297  (__mmask8) __U);
5298 }
5299 
5300 static __inline__ __m512d __DEFAULT_FN_ATTRS512
5301 _mm512_movedup_pd (__m512d __A)
5302 {
5303  return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5304  0, 0, 2, 2, 4, 4, 6, 6);
5305 }
5306 
5307 static __inline__ __m512d __DEFAULT_FN_ATTRS512
5308 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5309 {
5310  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5311  (__v8df)_mm512_movedup_pd(__A),
5312  (__v8df)__W);
5313 }
5314 
5315 static __inline__ __m512d __DEFAULT_FN_ATTRS512
5316 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
5317 {
5318  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5319  (__v8df)_mm512_movedup_pd(__A),
5320  (__v8df)_mm512_setzero_pd());
5321 }
5322 
5323 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5324  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5325  (__v8df)(__m512d)(B), \
5326  (__v8di)(__m512i)(C), (int)(imm), \
5327  (__mmask8)-1, (int)(R))
5328 
5329 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5330  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5331  (__v8df)(__m512d)(B), \
5332  (__v8di)(__m512i)(C), (int)(imm), \
5333  (__mmask8)(U), (int)(R))
5334 
5335 #define _mm512_fixupimm_pd(A, B, C, imm) \
5336  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5337  (__v8df)(__m512d)(B), \
5338  (__v8di)(__m512i)(C), (int)(imm), \
5339  (__mmask8)-1, \
5340  _MM_FROUND_CUR_DIRECTION)
5341 
5342 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5343  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5344  (__v8df)(__m512d)(B), \
5345  (__v8di)(__m512i)(C), (int)(imm), \
5346  (__mmask8)(U), \
5347  _MM_FROUND_CUR_DIRECTION)
5348 
5349 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5350  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5351  (__v8df)(__m512d)(B), \
5352  (__v8di)(__m512i)(C), \
5353  (int)(imm), (__mmask8)(U), \
5354  (int)(R))
5355 
5356 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5357  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5358  (__v8df)(__m512d)(B), \
5359  (__v8di)(__m512i)(C), \
5360  (int)(imm), (__mmask8)(U), \
5361  _MM_FROUND_CUR_DIRECTION)
5362 
5363 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5364  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5365  (__v16sf)(__m512)(B), \
5366  (__v16si)(__m512i)(C), (int)(imm), \
5367  (__mmask16)-1, (int)(R))
5368 
5369 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5370  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5371  (__v16sf)(__m512)(B), \
5372  (__v16si)(__m512i)(C), (int)(imm), \
5373  (__mmask16)(U), (int)(R))
5374 
5375 #define _mm512_fixupimm_ps(A, B, C, imm) \
5376  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5377  (__v16sf)(__m512)(B), \
5378  (__v16si)(__m512i)(C), (int)(imm), \
5379  (__mmask16)-1, \
5380  _MM_FROUND_CUR_DIRECTION)
5381 
5382 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5383  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5384  (__v16sf)(__m512)(B), \
5385  (__v16si)(__m512i)(C), (int)(imm), \
5386  (__mmask16)(U), \
5387  _MM_FROUND_CUR_DIRECTION)
5388 
5389 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5390  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5391  (__v16sf)(__m512)(B), \
5392  (__v16si)(__m512i)(C), \
5393  (int)(imm), (__mmask16)(U), \
5394  (int)(R))
5395 
5396 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5397  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5398  (__v16sf)(__m512)(B), \
5399  (__v16si)(__m512i)(C), \
5400  (int)(imm), (__mmask16)(U), \
5401  _MM_FROUND_CUR_DIRECTION)
5402 
5403 #define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5404  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5405  (__v2df)(__m128d)(B), \
5406  (__v2di)(__m128i)(C), (int)(imm), \
5407  (__mmask8)-1, (int)(R))
5408 
5409 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5410  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5411  (__v2df)(__m128d)(B), \
5412  (__v2di)(__m128i)(C), (int)(imm), \
5413  (__mmask8)(U), (int)(R))
5414 
5415 #define _mm_fixupimm_sd(A, B, C, imm) \
5416  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5417  (__v2df)(__m128d)(B), \
5418  (__v2di)(__m128i)(C), (int)(imm), \
5419  (__mmask8)-1, \
5420  _MM_FROUND_CUR_DIRECTION)
5421 
5422 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5423  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5424  (__v2df)(__m128d)(B), \
5425  (__v2di)(__m128i)(C), (int)(imm), \
5426  (__mmask8)(U), \
5427  _MM_FROUND_CUR_DIRECTION)
5428 
5429 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5430  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5431  (__v2df)(__m128d)(B), \
5432  (__v2di)(__m128i)(C), (int)(imm), \
5433  (__mmask8)(U), (int)(R))
5434 
5435 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5436  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5437  (__v2df)(__m128d)(B), \
5438  (__v2di)(__m128i)(C), (int)(imm), \
5439  (__mmask8)(U), \
5440  _MM_FROUND_CUR_DIRECTION)
5441 
5442 #define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5443  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5444  (__v4sf)(__m128)(B), \
5445  (__v4si)(__m128i)(C), (int)(imm), \
5446  (__mmask8)-1, (int)(R))
5447 
5448 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5449  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5450  (__v4sf)(__m128)(B), \
5451  (__v4si)(__m128i)(C), (int)(imm), \
5452  (__mmask8)(U), (int)(R))
5453 
5454 #define _mm_fixupimm_ss(A, B, C, imm) \
5455  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5456  (__v4sf)(__m128)(B), \
5457  (__v4si)(__m128i)(C), (int)(imm), \
5458  (__mmask8)-1, \
5459  _MM_FROUND_CUR_DIRECTION)
5460 
5461 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5462  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5463  (__v4sf)(__m128)(B), \
5464  (__v4si)(__m128i)(C), (int)(imm), \
5465  (__mmask8)(U), \
5466  _MM_FROUND_CUR_DIRECTION)
5467 
5468 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5469  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5470  (__v4sf)(__m128)(B), \
5471  (__v4si)(__m128i)(C), (int)(imm), \
5472  (__mmask8)(U), (int)(R))
5473 
5474 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5475  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5476  (__v4sf)(__m128)(B), \
5477  (__v4si)(__m128i)(C), (int)(imm), \
5478  (__mmask8)(U), \
5479  _MM_FROUND_CUR_DIRECTION)
5480 
5481 #define _mm_getexp_round_sd(A, B, R) \
5482  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5483  (__v2df)(__m128d)(B), \
5484  (__v2df)_mm_setzero_pd(), \
5485  (__mmask8)-1, (int)(R))
5486 
5487 
5488 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5489 _mm_getexp_sd (__m128d __A, __m128d __B)
5490 {
5491  return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5492  (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5493 }
5494 
5495 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5496 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5497 {
5498  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5499  (__v2df) __B,
5500  (__v2df) __W,
5501  (__mmask8) __U,
5503 }
5504 
5505 #define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5506  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5507  (__v2df)(__m128d)(B), \
5508  (__v2df)(__m128d)(W), \
5509  (__mmask8)(U), (int)(R))
5510 
5511 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5512 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5513 {
5514  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5515  (__v2df) __B,
5516  (__v2df) _mm_setzero_pd (),
5517  (__mmask8) __U,
5519 }
5520 
5521 #define _mm_maskz_getexp_round_sd(U, A, B, R) \
5522  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5523  (__v2df)(__m128d)(B), \
5524  (__v2df)_mm_setzero_pd(), \
5525  (__mmask8)(U), (int)(R))
5526 
5527 #define _mm_getexp_round_ss(A, B, R) \
5528  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5529  (__v4sf)(__m128)(B), \
5530  (__v4sf)_mm_setzero_ps(), \
5531  (__mmask8)-1, (int)(R))
5532 
5533 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5534 _mm_getexp_ss (__m128 __A, __m128 __B)
5535 {
5536  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5537  (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5538 }
5539 
5540 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5541 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5542 {
5543  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5544  (__v4sf) __B,
5545  (__v4sf) __W,
5546  (__mmask8) __U,
5548 }
5549 
5550 #define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5551  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5552  (__v4sf)(__m128)(B), \
5553  (__v4sf)(__m128)(W), \
5554  (__mmask8)(U), (int)(R))
5555 
5556 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5557 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5558 {
5559  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5560  (__v4sf) __B,
5561  (__v4sf) _mm_setzero_ps (),
5562  (__mmask8) __U,
5564 }
5565 
5566 #define _mm_maskz_getexp_round_ss(U, A, B, R) \
5567  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5568  (__v4sf)(__m128)(B), \
5569  (__v4sf)_mm_setzero_ps(), \
5570  (__mmask8)(U), (int)(R))
5571 
5572 #define _mm_getmant_round_sd(A, B, C, D, R) \
5573  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5574  (__v2df)(__m128d)(B), \
5575  (int)(((D)<<2) | (C)), \
5576  (__v2df)_mm_setzero_pd(), \
5577  (__mmask8)-1, (int)(R))
5578 
5579 #define _mm_getmant_sd(A, B, C, D) \
5580  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5581  (__v2df)(__m128d)(B), \
5582  (int)(((D)<<2) | (C)), \
5583  (__v2df)_mm_setzero_pd(), \
5584  (__mmask8)-1, \
5585  _MM_FROUND_CUR_DIRECTION)
5586 
5587 #define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5588  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5589  (__v2df)(__m128d)(B), \
5590  (int)(((D)<<2) | (C)), \
5591  (__v2df)(__m128d)(W), \
5592  (__mmask8)(U), \
5593  _MM_FROUND_CUR_DIRECTION)
5594 
5595 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5596  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5597  (__v2df)(__m128d)(B), \
5598  (int)(((D)<<2) | (C)), \
5599  (__v2df)(__m128d)(W), \
5600  (__mmask8)(U), (int)(R))
5601 
5602 #define _mm_maskz_getmant_sd(U, A, B, C, D) \
5603  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5604  (__v2df)(__m128d)(B), \
5605  (int)(((D)<<2) | (C)), \
5606  (__v2df)_mm_setzero_pd(), \
5607  (__mmask8)(U), \
5608  _MM_FROUND_CUR_DIRECTION)
5609 
5610 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5611  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5612  (__v2df)(__m128d)(B), \
5613  (int)(((D)<<2) | (C)), \
5614  (__v2df)_mm_setzero_pd(), \
5615  (__mmask8)(U), (int)(R))
5616 
5617 #define _mm_getmant_round_ss(A, B, C, D, R) \
5618  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5619  (__v4sf)(__m128)(B), \
5620  (int)(((D)<<2) | (C)), \
5621  (__v4sf)_mm_setzero_ps(), \
5622  (__mmask8)-1, (int)(R))
5623 
5624 #define _mm_getmant_ss(A, B, C, D) \
5625  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5626  (__v4sf)(__m128)(B), \
5627  (int)(((D)<<2) | (C)), \
5628  (__v4sf)_mm_setzero_ps(), \
5629  (__mmask8)-1, \
5630  _MM_FROUND_CUR_DIRECTION)
5631 
5632 #define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5633  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5634  (__v4sf)(__m128)(B), \
5635  (int)(((D)<<2) | (C)), \
5636  (__v4sf)(__m128)(W), \
5637  (__mmask8)(U), \
5638  _MM_FROUND_CUR_DIRECTION)
5639 
5640 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5641  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5642  (__v4sf)(__m128)(B), \
5643  (int)(((D)<<2) | (C)), \
5644  (__v4sf)(__m128)(W), \
5645  (__mmask8)(U), (int)(R))
5646 
5647 #define _mm_maskz_getmant_ss(U, A, B, C, D) \
5648  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5649  (__v4sf)(__m128)(B), \
5650  (int)(((D)<<2) | (C)), \
5651  (__v4sf)_mm_setzero_ps(), \
5652  (__mmask8)(U), \
5653  _MM_FROUND_CUR_DIRECTION)
5654 
5655 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5656  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5657  (__v4sf)(__m128)(B), \
5658  (int)(((D)<<2) | (C)), \
5659  (__v4sf)_mm_setzero_ps(), \
5660  (__mmask8)(U), (int)(R))
5661 
5662 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5663 _mm512_kmov (__mmask16 __A)
5664 {
5665  return __A;
5666 }
5667 
5668 #define _mm_comi_round_sd(A, B, P, R) \
5669  (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5670  (int)(P), (int)(R))
5671 
5672 #define _mm_comi_round_ss(A, B, P, R) \
5673  (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5674  (int)(P), (int)(R))
5675 
5676 #ifdef __x86_64__
5677 #define _mm_cvt_roundsd_si64(A, R) \
5678  (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
5679 #endif
5680 
5681 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5682 _mm512_sll_epi32(__m512i __A, __m128i __B)
5683 {
5684  return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5685 }
5686 
5687 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5688 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5689 {
5690  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5691  (__v16si)_mm512_sll_epi32(__A, __B),
5692  (__v16si)__W);
5693 }
5694 
5695 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5696 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5697 {
5698  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5699  (__v16si)_mm512_sll_epi32(__A, __B),
5700  (__v16si)_mm512_setzero_si512());
5701 }
5702 
5703 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5704 _mm512_sll_epi64(__m512i __A, __m128i __B)
5705 {
5706  return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5707 }
5708 
5709 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5710 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5711 {
5712  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5713  (__v8di)_mm512_sll_epi64(__A, __B),
5714  (__v8di)__W);
5715 }
5716 
5717 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5718 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5719 {
5720  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5721  (__v8di)_mm512_sll_epi64(__A, __B),
5722  (__v8di)_mm512_setzero_si512());
5723 }
5724 
5725 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5726 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
5727 {
5728  return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5729 }
5730 
5731 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5732 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5733 {
5734  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5735  (__v16si)_mm512_sllv_epi32(__X, __Y),
5736  (__v16si)__W);
5737 }
5738 
5739 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5740 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5741 {
5742  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5743  (__v16si)_mm512_sllv_epi32(__X, __Y),
5744  (__v16si)_mm512_setzero_si512());
5745 }
5746 
5747 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5748 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
5749 {
5750  return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5751 }
5752 
5753 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5754 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5755 {
5756  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5757  (__v8di)_mm512_sllv_epi64(__X, __Y),
5758  (__v8di)__W);
5759 }
5760 
5761 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5762 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5763 {
5764  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5765  (__v8di)_mm512_sllv_epi64(__X, __Y),
5766  (__v8di)_mm512_setzero_si512());
5767 }
5768 
5769 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5770 _mm512_sra_epi32(__m512i __A, __m128i __B)
5771 {
5772  return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5773 }
5774 
5775 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5776 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5777 {
5778  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5779  (__v16si)_mm512_sra_epi32(__A, __B),
5780  (__v16si)__W);
5781 }
5782 
5783 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5784 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5785 {
5786  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,