clang 17.0.0git
avx512fintrin.h
Go to the documentation of this file.
1/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
11#endif
12
13#ifndef __AVX512FINTRIN_H
14#define __AVX512FINTRIN_H
15
16typedef char __v64qi __attribute__((__vector_size__(64)));
17typedef short __v32hi __attribute__((__vector_size__(64)));
18typedef double __v8df __attribute__((__vector_size__(64)));
19typedef float __v16sf __attribute__((__vector_size__(64)));
20typedef long long __v8di __attribute__((__vector_size__(64)));
21typedef int __v16si __attribute__((__vector_size__(64)));
22
23/* Unsigned types */
24typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
25typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
26typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
27typedef unsigned int __v16su __attribute__((__vector_size__(64)));
28
29/* We need an explicitly signed variant for char. Note that this shouldn't
30 * appear in the interface though. */
31typedef signed char __v64qs __attribute__((__vector_size__(64)));
32
33typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
34typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
35typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
36
37typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
38typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
39typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
40
41typedef unsigned char __mmask8;
42typedef unsigned short __mmask16;
43
44/* Rounding mode macros. */
45#define _MM_FROUND_TO_NEAREST_INT 0x00
46#define _MM_FROUND_TO_NEG_INF 0x01
47#define _MM_FROUND_TO_POS_INF 0x02
48#define _MM_FROUND_TO_ZERO 0x03
49#define _MM_FROUND_CUR_DIRECTION 0x04
50
51/* Constants for integer comparison predicates */
52typedef enum {
53 _MM_CMPINT_EQ, /* Equal */
54 _MM_CMPINT_LT, /* Less than */
55 _MM_CMPINT_LE, /* Less than or Equal */
57 _MM_CMPINT_NE, /* Not Equal */
58 _MM_CMPINT_NLT, /* Not Less than */
59#define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */
60 _MM_CMPINT_NLE /* Not Less than or Equal */
61#define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */
63
64typedef enum
65{
151 _MM_PERM_DDDD = 0xFF
153
154typedef enum
155{
156 _MM_MANT_NORM_1_2, /* interval [1, 2) */
157 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
158 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
159 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
161
162typedef enum
163{
164 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
165 _MM_MANT_SIGN_zero, /* sign = 0 */
166 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
168
169/* Define the default attributes for the functions in this file. */
170#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
171#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
172#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
173
174/* Create vectors with repeated elements */
175
176static __inline __m512i __DEFAULT_FN_ATTRS512
178{
179 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
180}
181
182#define _mm512_setzero_epi32 _mm512_setzero_si512
183
184static __inline__ __m512d __DEFAULT_FN_ATTRS512
186{
187 return (__m512d)__builtin_ia32_undef512();
188}
189
190static __inline__ __m512 __DEFAULT_FN_ATTRS512
192{
193 return (__m512)__builtin_ia32_undef512();
194}
195
196static __inline__ __m512 __DEFAULT_FN_ATTRS512
198{
199 return (__m512)__builtin_ia32_undef512();
200}
201
202static __inline__ __m512i __DEFAULT_FN_ATTRS512
204{
205 return (__m512i)__builtin_ia32_undef512();
206}
207
208static __inline__ __m512i __DEFAULT_FN_ATTRS512
210{
211 return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
212 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
213}
214
215static __inline__ __m512i __DEFAULT_FN_ATTRS512
216_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
217{
218 return (__m512i)__builtin_ia32_selectd_512(__M,
219 (__v16si) _mm512_broadcastd_epi32(__A),
220 (__v16si) __O);
221}
222
223static __inline__ __m512i __DEFAULT_FN_ATTRS512
225{
226 return (__m512i)__builtin_ia32_selectd_512(__M,
227 (__v16si) _mm512_broadcastd_epi32(__A),
228 (__v16si) _mm512_setzero_si512());
229}
230
231static __inline__ __m512i __DEFAULT_FN_ATTRS512
233{
234 return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
235 0, 0, 0, 0, 0, 0, 0, 0);
236}
237
238static __inline__ __m512i __DEFAULT_FN_ATTRS512
239_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
240{
241 return (__m512i)__builtin_ia32_selectq_512(__M,
242 (__v8di) _mm512_broadcastq_epi64(__A),
243 (__v8di) __O);
244
245}
246
247static __inline__ __m512i __DEFAULT_FN_ATTRS512
249{
250 return (__m512i)__builtin_ia32_selectq_512(__M,
251 (__v8di) _mm512_broadcastq_epi64(__A),
252 (__v8di) _mm512_setzero_si512());
253}
254
255
256static __inline __m512 __DEFAULT_FN_ATTRS512
258{
259 return __extension__ (__m512){ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
260 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
261}
262
263#define _mm512_setzero _mm512_setzero_ps
264
265static __inline __m512d __DEFAULT_FN_ATTRS512
267{
268 return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
269}
270
271static __inline __m512 __DEFAULT_FN_ATTRS512
273{
274 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
275 __w, __w, __w, __w, __w, __w, __w, __w };
276}
277
278static __inline __m512d __DEFAULT_FN_ATTRS512
279_mm512_set1_pd(double __w)
280{
281 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
282}
283
284static __inline __m512i __DEFAULT_FN_ATTRS512
286{
287 return __extension__ (__m512i)(__v64qi){
288 __w, __w, __w, __w, __w, __w, __w, __w,
289 __w, __w, __w, __w, __w, __w, __w, __w,
290 __w, __w, __w, __w, __w, __w, __w, __w,
291 __w, __w, __w, __w, __w, __w, __w, __w,
292 __w, __w, __w, __w, __w, __w, __w, __w,
293 __w, __w, __w, __w, __w, __w, __w, __w,
294 __w, __w, __w, __w, __w, __w, __w, __w,
295 __w, __w, __w, __w, __w, __w, __w, __w };
296}
297
298static __inline __m512i __DEFAULT_FN_ATTRS512
300{
301 return __extension__ (__m512i)(__v32hi){
302 __w, __w, __w, __w, __w, __w, __w, __w,
303 __w, __w, __w, __w, __w, __w, __w, __w,
304 __w, __w, __w, __w, __w, __w, __w, __w,
305 __w, __w, __w, __w, __w, __w, __w, __w };
306}
307
308static __inline __m512i __DEFAULT_FN_ATTRS512
310{
311 return __extension__ (__m512i)(__v16si){
312 __s, __s, __s, __s, __s, __s, __s, __s,
313 __s, __s, __s, __s, __s, __s, __s, __s };
314}
315
316static __inline __m512i __DEFAULT_FN_ATTRS512
318{
319 return (__m512i)__builtin_ia32_selectd_512(__M,
320 (__v16si)_mm512_set1_epi32(__A),
321 (__v16si)_mm512_setzero_si512());
322}
323
324static __inline __m512i __DEFAULT_FN_ATTRS512
325_mm512_set1_epi64(long long __d)
326{
327 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
328}
329
330static __inline __m512i __DEFAULT_FN_ATTRS512
332{
333 return (__m512i)__builtin_ia32_selectq_512(__M,
334 (__v8di)_mm512_set1_epi64(__A),
335 (__v8di)_mm512_setzero_si512());
336}
337
338static __inline__ __m512 __DEFAULT_FN_ATTRS512
340{
341 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
342 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
343}
344
345static __inline __m512i __DEFAULT_FN_ATTRS512
346_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
347{
348 return __extension__ (__m512i)(__v16si)
349 { __D, __C, __B, __A, __D, __C, __B, __A,
350 __D, __C, __B, __A, __D, __C, __B, __A };
351}
352
353static __inline __m512i __DEFAULT_FN_ATTRS512
354_mm512_set4_epi64 (long long __A, long long __B, long long __C,
355 long long __D)
356{
357 return __extension__ (__m512i) (__v8di)
358 { __D, __C, __B, __A, __D, __C, __B, __A };
359}
360
361static __inline __m512d __DEFAULT_FN_ATTRS512
362_mm512_set4_pd (double __A, double __B, double __C, double __D)
363{
364 return __extension__ (__m512d)
365 { __D, __C, __B, __A, __D, __C, __B, __A };
366}
367
368static __inline __m512 __DEFAULT_FN_ATTRS512
369_mm512_set4_ps (float __A, float __B, float __C, float __D)
370{
371 return __extension__ (__m512)
372 { __D, __C, __B, __A, __D, __C, __B, __A,
373 __D, __C, __B, __A, __D, __C, __B, __A };
374}
375
376#define _mm512_setr4_epi32(e0,e1,e2,e3) \
377 _mm512_set4_epi32((e3),(e2),(e1),(e0))
378
379#define _mm512_setr4_epi64(e0,e1,e2,e3) \
380 _mm512_set4_epi64((e3),(e2),(e1),(e0))
381
382#define _mm512_setr4_pd(e0,e1,e2,e3) \
383 _mm512_set4_pd((e3),(e2),(e1),(e0))
384
385#define _mm512_setr4_ps(e0,e1,e2,e3) \
386 _mm512_set4_ps((e3),(e2),(e1),(e0))
387
388static __inline__ __m512d __DEFAULT_FN_ATTRS512
390{
391 return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
392 0, 0, 0, 0, 0, 0, 0, 0);
393}
394
395/* Cast between vector types */
396
397static __inline __m512d __DEFAULT_FN_ATTRS512
399{
400 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
401 1, 2, 3, 4, 5, 6, 7);
402}
403
404static __inline __m512 __DEFAULT_FN_ATTRS512
406{
407 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
408 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
409}
410
411static __inline __m128d __DEFAULT_FN_ATTRS512
413{
414 return __builtin_shufflevector(__a, __a, 0, 1);
415}
416
417static __inline __m256d __DEFAULT_FN_ATTRS512
419{
420 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
421}
422
423static __inline __m128 __DEFAULT_FN_ATTRS512
425{
426 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
427}
428
429static __inline __m256 __DEFAULT_FN_ATTRS512
431{
432 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
433}
434
435static __inline __m512 __DEFAULT_FN_ATTRS512
436_mm512_castpd_ps (__m512d __A)
437{
438 return (__m512) (__A);
439}
440
441static __inline __m512i __DEFAULT_FN_ATTRS512
443{
444 return (__m512i) (__A);
445}
446
447static __inline__ __m512d __DEFAULT_FN_ATTRS512
449{
450 __m256d __B = __builtin_nondeterministic_value(__B);
451 return __builtin_shufflevector(
452 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
453 __B, 0, 1, 2, 3, 4, 5, 6, 7);
454}
455
456static __inline __m512d __DEFAULT_FN_ATTRS512
458{
459 return (__m512d) (__A);
460}
461
462static __inline __m512i __DEFAULT_FN_ATTRS512
464{
465 return (__m512i) (__A);
466}
467
468static __inline__ __m512 __DEFAULT_FN_ATTRS512
470{
471 __m256 __B = __builtin_nondeterministic_value(__B);
472 return __builtin_shufflevector(
473 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7),
474 __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
475}
476
477static __inline__ __m512i __DEFAULT_FN_ATTRS512
479{
480 __m256i __B = __builtin_nondeterministic_value(__B);
481 return __builtin_shufflevector(
482 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
483 __B, 0, 1, 2, 3, 4, 5, 6, 7);
484}
485
486static __inline__ __m512i __DEFAULT_FN_ATTRS512
488{
489 return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7);
490}
491
492static __inline __m512 __DEFAULT_FN_ATTRS512
494{
495 return (__m512) (__A);
496}
497
498static __inline __m512d __DEFAULT_FN_ATTRS512
500{
501 return (__m512d) (__A);
502}
503
504static __inline __m128i __DEFAULT_FN_ATTRS512
506{
507 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
508}
509
510static __inline __m256i __DEFAULT_FN_ATTRS512
512{
513 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
514}
515
516static __inline__ __mmask16 __DEFAULT_FN_ATTRS
518{
519 return (__mmask16)__a;
520}
521
522static __inline__ int __DEFAULT_FN_ATTRS
524{
525 return (int)__a;
526}
527
528/// Constructs a 512-bit floating-point vector of [8 x double] from a
529/// 128-bit floating-point vector of [2 x double]. The lower 128 bits
530/// contain the value of the source vector. The upper 384 bits are set
531/// to zero.
532///
533/// \headerfile <x86intrin.h>
534///
535/// This intrinsic has no corresponding instruction.
536///
537/// \param __a
538/// A 128-bit vector of [2 x double].
539/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
540/// contain the value of the parameter. The upper 384 bits are set to zero.
541static __inline __m512d __DEFAULT_FN_ATTRS512
543{
544 return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
545}
546
547/// Constructs a 512-bit floating-point vector of [8 x double] from a
548/// 256-bit floating-point vector of [4 x double]. The lower 256 bits
549/// contain the value of the source vector. The upper 256 bits are set
550/// to zero.
551///
552/// \headerfile <x86intrin.h>
553///
554/// This intrinsic has no corresponding instruction.
555///
556/// \param __a
557/// A 256-bit vector of [4 x double].
558/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
559/// contain the value of the parameter. The upper 256 bits are set to zero.
560static __inline __m512d __DEFAULT_FN_ATTRS512
562{
563 return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
564}
565
566/// Constructs a 512-bit floating-point vector of [16 x float] from a
567/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
568/// the value of the source vector. The upper 384 bits are set to zero.
569///
570/// \headerfile <x86intrin.h>
571///
572/// This intrinsic has no corresponding instruction.
573///
574/// \param __a
575/// A 128-bit vector of [4 x float].
576/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
577/// contain the value of the parameter. The upper 384 bits are set to zero.
578static __inline __m512 __DEFAULT_FN_ATTRS512
580{
581 return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
582}
583
584/// Constructs a 512-bit floating-point vector of [16 x float] from a
585/// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain
586/// the value of the source vector. The upper 256 bits are set to zero.
587///
588/// \headerfile <x86intrin.h>
589///
590/// This intrinsic has no corresponding instruction.
591///
592/// \param __a
593/// A 256-bit vector of [8 x float].
594/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
595/// contain the value of the parameter. The upper 256 bits are set to zero.
596static __inline __m512 __DEFAULT_FN_ATTRS512
598{
599 return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
600}
601
602/// Constructs a 512-bit integer vector from a 128-bit integer vector.
603/// The lower 128 bits contain the value of the source vector. The upper
604/// 384 bits are set to zero.
605///
606/// \headerfile <x86intrin.h>
607///
608/// This intrinsic has no corresponding instruction.
609///
610/// \param __a
611/// A 128-bit integer vector.
612/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
613/// the parameter. The upper 384 bits are set to zero.
614static __inline __m512i __DEFAULT_FN_ATTRS512
616{
617 return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
618}
619
620/// Constructs a 512-bit integer vector from a 256-bit integer vector.
621/// The lower 256 bits contain the value of the source vector. The upper
622/// 256 bits are set to zero.
623///
624/// \headerfile <x86intrin.h>
625///
626/// This intrinsic has no corresponding instruction.
627///
628/// \param __a
629/// A 256-bit integer vector.
630/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
631/// the parameter. The upper 256 bits are set to zero.
632static __inline __m512i __DEFAULT_FN_ATTRS512
634{
635 return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
636}
637
638/* Bitwise operators */
639static __inline__ __m512i __DEFAULT_FN_ATTRS512
640_mm512_and_epi32(__m512i __a, __m512i __b)
641{
642 return (__m512i)((__v16su)__a & (__v16su)__b);
643}
644
645static __inline__ __m512i __DEFAULT_FN_ATTRS512
646_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
647{
648 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
649 (__v16si) _mm512_and_epi32(__a, __b),
650 (__v16si) __src);
651}
652
653static __inline__ __m512i __DEFAULT_FN_ATTRS512
655{
657 __k, __a, __b);
658}
659
660static __inline__ __m512i __DEFAULT_FN_ATTRS512
661_mm512_and_epi64(__m512i __a, __m512i __b)
662{
663 return (__m512i)((__v8du)__a & (__v8du)__b);
664}
665
666static __inline__ __m512i __DEFAULT_FN_ATTRS512
667_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
668{
669 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
670 (__v8di) _mm512_and_epi64(__a, __b),
671 (__v8di) __src);
672}
673
674static __inline__ __m512i __DEFAULT_FN_ATTRS512
676{
678 __k, __a, __b);
679}
680
681static __inline__ __m512i __DEFAULT_FN_ATTRS512
682_mm512_andnot_si512 (__m512i __A, __m512i __B)
683{
684 return (__m512i)(~(__v8du)__A & (__v8du)__B);
685}
686
687static __inline__ __m512i __DEFAULT_FN_ATTRS512
688_mm512_andnot_epi32 (__m512i __A, __m512i __B)
689{
690 return (__m512i)(~(__v16su)__A & (__v16su)__B);
691}
692
693static __inline__ __m512i __DEFAULT_FN_ATTRS512
694_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
695{
696 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
697 (__v16si)_mm512_andnot_epi32(__A, __B),
698 (__v16si)__W);
699}
700
701static __inline__ __m512i __DEFAULT_FN_ATTRS512
702_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
703{
705 __U, __A, __B);
706}
707
708static __inline__ __m512i __DEFAULT_FN_ATTRS512
709_mm512_andnot_epi64(__m512i __A, __m512i __B)
710{
711 return (__m512i)(~(__v8du)__A & (__v8du)__B);
712}
713
714static __inline__ __m512i __DEFAULT_FN_ATTRS512
715_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
716{
717 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
718 (__v8di)_mm512_andnot_epi64(__A, __B),
719 (__v8di)__W);
720}
721
722static __inline__ __m512i __DEFAULT_FN_ATTRS512
723_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
724{
726 __U, __A, __B);
727}
728
729static __inline__ __m512i __DEFAULT_FN_ATTRS512
730_mm512_or_epi32(__m512i __a, __m512i __b)
731{
732 return (__m512i)((__v16su)__a | (__v16su)__b);
733}
734
735static __inline__ __m512i __DEFAULT_FN_ATTRS512
736_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
737{
738 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
739 (__v16si)_mm512_or_epi32(__a, __b),
740 (__v16si)__src);
741}
742
743static __inline__ __m512i __DEFAULT_FN_ATTRS512
745{
746 return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
747}
748
749static __inline__ __m512i __DEFAULT_FN_ATTRS512
750_mm512_or_epi64(__m512i __a, __m512i __b)
751{
752 return (__m512i)((__v8du)__a | (__v8du)__b);
753}
754
755static __inline__ __m512i __DEFAULT_FN_ATTRS512
756_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
757{
758 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
759 (__v8di)_mm512_or_epi64(__a, __b),
760 (__v8di)__src);
761}
762
763static __inline__ __m512i __DEFAULT_FN_ATTRS512
764_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
765{
766 return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
767}
768
769static __inline__ __m512i __DEFAULT_FN_ATTRS512
770_mm512_xor_epi32(__m512i __a, __m512i __b)
771{
772 return (__m512i)((__v16su)__a ^ (__v16su)__b);
773}
774
775static __inline__ __m512i __DEFAULT_FN_ATTRS512
776_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
777{
778 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
779 (__v16si)_mm512_xor_epi32(__a, __b),
780 (__v16si)__src);
781}
782
783static __inline__ __m512i __DEFAULT_FN_ATTRS512
785{
786 return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
787}
788
789static __inline__ __m512i __DEFAULT_FN_ATTRS512
790_mm512_xor_epi64(__m512i __a, __m512i __b)
791{
792 return (__m512i)((__v8du)__a ^ (__v8du)__b);
793}
794
795static __inline__ __m512i __DEFAULT_FN_ATTRS512
796_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
797{
798 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
799 (__v8di)_mm512_xor_epi64(__a, __b),
800 (__v8di)__src);
801}
802
803static __inline__ __m512i __DEFAULT_FN_ATTRS512
805{
806 return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
807}
808
809static __inline__ __m512i __DEFAULT_FN_ATTRS512
810_mm512_and_si512(__m512i __a, __m512i __b)
811{
812 return (__m512i)((__v8du)__a & (__v8du)__b);
813}
814
815static __inline__ __m512i __DEFAULT_FN_ATTRS512
816_mm512_or_si512(__m512i __a, __m512i __b)
817{
818 return (__m512i)((__v8du)__a | (__v8du)__b);
819}
820
821static __inline__ __m512i __DEFAULT_FN_ATTRS512
822_mm512_xor_si512(__m512i __a, __m512i __b)
823{
824 return (__m512i)((__v8du)__a ^ (__v8du)__b);
825}
826
827/* Arithmetic */
828
829static __inline __m512d __DEFAULT_FN_ATTRS512
830_mm512_add_pd(__m512d __a, __m512d __b)
831{
832 return (__m512d)((__v8df)__a + (__v8df)__b);
833}
834
835static __inline __m512 __DEFAULT_FN_ATTRS512
836_mm512_add_ps(__m512 __a, __m512 __b)
837{
838 return (__m512)((__v16sf)__a + (__v16sf)__b);
839}
840
841static __inline __m512d __DEFAULT_FN_ATTRS512
842_mm512_mul_pd(__m512d __a, __m512d __b)
843{
844 return (__m512d)((__v8df)__a * (__v8df)__b);
845}
846
847static __inline __m512 __DEFAULT_FN_ATTRS512
848_mm512_mul_ps(__m512 __a, __m512 __b)
849{
850 return (__m512)((__v16sf)__a * (__v16sf)__b);
851}
852
853static __inline __m512d __DEFAULT_FN_ATTRS512
854_mm512_sub_pd(__m512d __a, __m512d __b)
855{
856 return (__m512d)((__v8df)__a - (__v8df)__b);
857}
858
859static __inline __m512 __DEFAULT_FN_ATTRS512
860_mm512_sub_ps(__m512 __a, __m512 __b)
861{
862 return (__m512)((__v16sf)__a - (__v16sf)__b);
863}
864
865static __inline__ __m512i __DEFAULT_FN_ATTRS512
866_mm512_add_epi64 (__m512i __A, __m512i __B)
867{
868 return (__m512i) ((__v8du) __A + (__v8du) __B);
869}
870
871static __inline__ __m512i __DEFAULT_FN_ATTRS512
872_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
873{
874 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
875 (__v8di)_mm512_add_epi64(__A, __B),
876 (__v8di)__W);
877}
878
879static __inline__ __m512i __DEFAULT_FN_ATTRS512
880_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
881{
882 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
883 (__v8di)_mm512_add_epi64(__A, __B),
884 (__v8di)_mm512_setzero_si512());
885}
886
887static __inline__ __m512i __DEFAULT_FN_ATTRS512
888_mm512_sub_epi64 (__m512i __A, __m512i __B)
889{
890 return (__m512i) ((__v8du) __A - (__v8du) __B);
891}
892
893static __inline__ __m512i __DEFAULT_FN_ATTRS512
894_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
895{
896 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
897 (__v8di)_mm512_sub_epi64(__A, __B),
898 (__v8di)__W);
899}
900
901static __inline__ __m512i __DEFAULT_FN_ATTRS512
902_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
903{
904 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
905 (__v8di)_mm512_sub_epi64(__A, __B),
906 (__v8di)_mm512_setzero_si512());
907}
908
909static __inline__ __m512i __DEFAULT_FN_ATTRS512
910_mm512_add_epi32 (__m512i __A, __m512i __B)
911{
912 return (__m512i) ((__v16su) __A + (__v16su) __B);
913}
914
915static __inline__ __m512i __DEFAULT_FN_ATTRS512
916_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
917{
918 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
919 (__v16si)_mm512_add_epi32(__A, __B),
920 (__v16si)__W);
921}
922
923static __inline__ __m512i __DEFAULT_FN_ATTRS512
924_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
925{
926 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
927 (__v16si)_mm512_add_epi32(__A, __B),
928 (__v16si)_mm512_setzero_si512());
929}
930
931static __inline__ __m512i __DEFAULT_FN_ATTRS512
932_mm512_sub_epi32 (__m512i __A, __m512i __B)
933{
934 return (__m512i) ((__v16su) __A - (__v16su) __B);
935}
936
937static __inline__ __m512i __DEFAULT_FN_ATTRS512
938_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
939{
940 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
941 (__v16si)_mm512_sub_epi32(__A, __B),
942 (__v16si)__W);
943}
944
945static __inline__ __m512i __DEFAULT_FN_ATTRS512
946_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
947{
948 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
949 (__v16si)_mm512_sub_epi32(__A, __B),
950 (__v16si)_mm512_setzero_si512());
951}
952
953#define _mm512_max_round_pd(A, B, R) \
954 ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
955 (__v8df)(__m512d)(B), (int)(R)))
956
957#define _mm512_mask_max_round_pd(W, U, A, B, R) \
958 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
959 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
960 (__v8df)(W)))
961
962#define _mm512_maskz_max_round_pd(U, A, B, R) \
963 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
964 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
965 (__v8df)_mm512_setzero_pd()))
966
967static __inline__ __m512d __DEFAULT_FN_ATTRS512
968_mm512_max_pd(__m512d __A, __m512d __B)
969{
970 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
972}
973
974static __inline__ __m512d __DEFAULT_FN_ATTRS512
975_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
976{
977 return (__m512d)__builtin_ia32_selectpd_512(__U,
978 (__v8df)_mm512_max_pd(__A, __B),
979 (__v8df)__W);
980}
981
982static __inline__ __m512d __DEFAULT_FN_ATTRS512
983_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
984{
985 return (__m512d)__builtin_ia32_selectpd_512(__U,
986 (__v8df)_mm512_max_pd(__A, __B),
987 (__v8df)_mm512_setzero_pd());
988}
989
990#define _mm512_max_round_ps(A, B, R) \
991 ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
992 (__v16sf)(__m512)(B), (int)(R)))
993
994#define _mm512_mask_max_round_ps(W, U, A, B, R) \
995 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
996 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
997 (__v16sf)(W)))
998
999#define _mm512_maskz_max_round_ps(U, A, B, R) \
1000 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1001 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
1002 (__v16sf)_mm512_setzero_ps()))
1003
1004static __inline__ __m512 __DEFAULT_FN_ATTRS512
1005_mm512_max_ps(__m512 __A, __m512 __B)
1006{
1007 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
1009}
1010
1011static __inline__ __m512 __DEFAULT_FN_ATTRS512
1012_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1013{
1014 return (__m512)__builtin_ia32_selectps_512(__U,
1015 (__v16sf)_mm512_max_ps(__A, __B),
1016 (__v16sf)__W);
1017}
1018
1019static __inline__ __m512 __DEFAULT_FN_ATTRS512
1020_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1021{
1022 return (__m512)__builtin_ia32_selectps_512(__U,
1023 (__v16sf)_mm512_max_ps(__A, __B),
1024 (__v16sf)_mm512_setzero_ps());
1025}
1026
1027static __inline__ __m128 __DEFAULT_FN_ATTRS128
1028_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1029 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1030 (__v4sf) __B,
1031 (__v4sf) __W,
1032 (__mmask8) __U,
1034}
1035
1036static __inline__ __m128 __DEFAULT_FN_ATTRS128
1037_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1038 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1039 (__v4sf) __B,
1040 (__v4sf) _mm_setzero_ps (),
1041 (__mmask8) __U,
1043}
1044
1045#define _mm_max_round_ss(A, B, R) \
1046 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1047 (__v4sf)(__m128)(B), \
1048 (__v4sf)_mm_setzero_ps(), \
1049 (__mmask8)-1, (int)(R)))
1050
1051#define _mm_mask_max_round_ss(W, U, A, B, R) \
1052 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1053 (__v4sf)(__m128)(B), \
1054 (__v4sf)(__m128)(W), (__mmask8)(U), \
1055 (int)(R)))
1056
1057#define _mm_maskz_max_round_ss(U, A, B, R) \
1058 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1059 (__v4sf)(__m128)(B), \
1060 (__v4sf)_mm_setzero_ps(), \
1061 (__mmask8)(U), (int)(R)))
1062
1063static __inline__ __m128d __DEFAULT_FN_ATTRS128
1064_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1065 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1066 (__v2df) __B,
1067 (__v2df) __W,
1068 (__mmask8) __U,
1070}
1071
1072static __inline__ __m128d __DEFAULT_FN_ATTRS128
1073_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1074 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1075 (__v2df) __B,
1076 (__v2df) _mm_setzero_pd (),
1077 (__mmask8) __U,
1079}
1080
1081#define _mm_max_round_sd(A, B, R) \
1082 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1083 (__v2df)(__m128d)(B), \
1084 (__v2df)_mm_setzero_pd(), \
1085 (__mmask8)-1, (int)(R)))
1086
1087#define _mm_mask_max_round_sd(W, U, A, B, R) \
1088 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1089 (__v2df)(__m128d)(B), \
1090 (__v2df)(__m128d)(W), \
1091 (__mmask8)(U), (int)(R)))
1092
1093#define _mm_maskz_max_round_sd(U, A, B, R) \
1094 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1095 (__v2df)(__m128d)(B), \
1096 (__v2df)_mm_setzero_pd(), \
1097 (__mmask8)(U), (int)(R)))
1098
1099static __inline __m512i
1101_mm512_max_epi32(__m512i __A, __m512i __B)
1102{
1103 return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
1104}
1105
1106static __inline__ __m512i __DEFAULT_FN_ATTRS512
1107_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1108{
1109 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1110 (__v16si)_mm512_max_epi32(__A, __B),
1111 (__v16si)__W);
1112}
1113
1114static __inline__ __m512i __DEFAULT_FN_ATTRS512
1115_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1116{
1117 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1118 (__v16si)_mm512_max_epi32(__A, __B),
1119 (__v16si)_mm512_setzero_si512());
1120}
1121
1122static __inline __m512i __DEFAULT_FN_ATTRS512
1123_mm512_max_epu32(__m512i __A, __m512i __B)
1124{
1125 return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
1126}
1127
1128static __inline__ __m512i __DEFAULT_FN_ATTRS512
1129_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1130{
1131 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1132 (__v16si)_mm512_max_epu32(__A, __B),
1133 (__v16si)__W);
1134}
1135
1136static __inline__ __m512i __DEFAULT_FN_ATTRS512
1137_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1138{
1139 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1140 (__v16si)_mm512_max_epu32(__A, __B),
1141 (__v16si)_mm512_setzero_si512());
1142}
1143
1144static __inline __m512i __DEFAULT_FN_ATTRS512
1145_mm512_max_epi64(__m512i __A, __m512i __B)
1146{
1147 return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
1148}
1149
1150static __inline__ __m512i __DEFAULT_FN_ATTRS512
1151_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1152{
1153 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1154 (__v8di)_mm512_max_epi64(__A, __B),
1155 (__v8di)__W);
1156}
1157
1158static __inline__ __m512i __DEFAULT_FN_ATTRS512
1159_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1160{
1161 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1162 (__v8di)_mm512_max_epi64(__A, __B),
1163 (__v8di)_mm512_setzero_si512());
1164}
1165
1166static __inline __m512i __DEFAULT_FN_ATTRS512
1167_mm512_max_epu64(__m512i __A, __m512i __B)
1168{
1169 return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
1170}
1171
1172static __inline__ __m512i __DEFAULT_FN_ATTRS512
1173_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1174{
1175 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1176 (__v8di)_mm512_max_epu64(__A, __B),
1177 (__v8di)__W);
1178}
1179
1180static __inline__ __m512i __DEFAULT_FN_ATTRS512
1181_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1182{
1183 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1184 (__v8di)_mm512_max_epu64(__A, __B),
1185 (__v8di)_mm512_setzero_si512());
1186}
1187
1188#define _mm512_min_round_pd(A, B, R) \
1189 ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1190 (__v8df)(__m512d)(B), (int)(R)))
1191
1192#define _mm512_mask_min_round_pd(W, U, A, B, R) \
1193 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1194 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1195 (__v8df)(W)))
1196
1197#define _mm512_maskz_min_round_pd(U, A, B, R) \
1198 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1199 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1200 (__v8df)_mm512_setzero_pd()))
1201
1202static __inline__ __m512d __DEFAULT_FN_ATTRS512
1203_mm512_min_pd(__m512d __A, __m512d __B)
1204{
1205 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1207}
1208
1209static __inline__ __m512d __DEFAULT_FN_ATTRS512
1210_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1211{
1212 return (__m512d)__builtin_ia32_selectpd_512(__U,
1213 (__v8df)_mm512_min_pd(__A, __B),
1214 (__v8df)__W);
1215}
1216
1217static __inline__ __m512d __DEFAULT_FN_ATTRS512
1218_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1219{
1220 return (__m512d)__builtin_ia32_selectpd_512(__U,
1221 (__v8df)_mm512_min_pd(__A, __B),
1222 (__v8df)_mm512_setzero_pd());
1223}
1224
1225#define _mm512_min_round_ps(A, B, R) \
1226 ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1227 (__v16sf)(__m512)(B), (int)(R)))
1228
1229#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1230 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1231 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1232 (__v16sf)(W)))
1233
1234#define _mm512_maskz_min_round_ps(U, A, B, R) \
1235 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1236 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1237 (__v16sf)_mm512_setzero_ps()))
1238
1239static __inline__ __m512 __DEFAULT_FN_ATTRS512
1240_mm512_min_ps(__m512 __A, __m512 __B)
1241{
1242 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1244}
1245
1246static __inline__ __m512 __DEFAULT_FN_ATTRS512
1247_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1248{
1249 return (__m512)__builtin_ia32_selectps_512(__U,
1250 (__v16sf)_mm512_min_ps(__A, __B),
1251 (__v16sf)__W);
1252}
1253
1254static __inline__ __m512 __DEFAULT_FN_ATTRS512
1255_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1256{
1257 return (__m512)__builtin_ia32_selectps_512(__U,
1258 (__v16sf)_mm512_min_ps(__A, __B),
1259 (__v16sf)_mm512_setzero_ps());
1260}
1261
1262static __inline__ __m128 __DEFAULT_FN_ATTRS128
1263_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1264 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1265 (__v4sf) __B,
1266 (__v4sf) __W,
1267 (__mmask8) __U,
1269}
1270
1271static __inline__ __m128 __DEFAULT_FN_ATTRS128
1272_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1273 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1274 (__v4sf) __B,
1275 (__v4sf) _mm_setzero_ps (),
1276 (__mmask8) __U,
1278}
1279
1280#define _mm_min_round_ss(A, B, R) \
1281 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1282 (__v4sf)(__m128)(B), \
1283 (__v4sf)_mm_setzero_ps(), \
1284 (__mmask8)-1, (int)(R)))
1285
1286#define _mm_mask_min_round_ss(W, U, A, B, R) \
1287 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1288 (__v4sf)(__m128)(B), \
1289 (__v4sf)(__m128)(W), (__mmask8)(U), \
1290 (int)(R)))
1291
1292#define _mm_maskz_min_round_ss(U, A, B, R) \
1293 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1294 (__v4sf)(__m128)(B), \
1295 (__v4sf)_mm_setzero_ps(), \
1296 (__mmask8)(U), (int)(R)))
1297
1298static __inline__ __m128d __DEFAULT_FN_ATTRS128
1299_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1300 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1301 (__v2df) __B,
1302 (__v2df) __W,
1303 (__mmask8) __U,
1305}
1306
1307static __inline__ __m128d __DEFAULT_FN_ATTRS128
1308_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1309 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1310 (__v2df) __B,
1311 (__v2df) _mm_setzero_pd (),
1312 (__mmask8) __U,
1314}
1315
1316#define _mm_min_round_sd(A, B, R) \
1317 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1318 (__v2df)(__m128d)(B), \
1319 (__v2df)_mm_setzero_pd(), \
1320 (__mmask8)-1, (int)(R)))
1321
1322#define _mm_mask_min_round_sd(W, U, A, B, R) \
1323 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1324 (__v2df)(__m128d)(B), \
1325 (__v2df)(__m128d)(W), \
1326 (__mmask8)(U), (int)(R)))
1327
1328#define _mm_maskz_min_round_sd(U, A, B, R) \
1329 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1330 (__v2df)(__m128d)(B), \
1331 (__v2df)_mm_setzero_pd(), \
1332 (__mmask8)(U), (int)(R)))
1333
1334static __inline __m512i
1336_mm512_min_epi32(__m512i __A, __m512i __B)
1337{
1338 return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
1339}
1340
1341static __inline__ __m512i __DEFAULT_FN_ATTRS512
1342_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1343{
1344 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1345 (__v16si)_mm512_min_epi32(__A, __B),
1346 (__v16si)__W);
1347}
1348
1349static __inline__ __m512i __DEFAULT_FN_ATTRS512
1350_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1351{
1352 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1353 (__v16si)_mm512_min_epi32(__A, __B),
1354 (__v16si)_mm512_setzero_si512());
1355}
1356
1357static __inline __m512i __DEFAULT_FN_ATTRS512
1358_mm512_min_epu32(__m512i __A, __m512i __B)
1359{
1360 return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
1361}
1362
1363static __inline__ __m512i __DEFAULT_FN_ATTRS512
1364_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1365{
1366 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1367 (__v16si)_mm512_min_epu32(__A, __B),
1368 (__v16si)__W);
1369}
1370
1371static __inline__ __m512i __DEFAULT_FN_ATTRS512
1372_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1373{
1374 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1375 (__v16si)_mm512_min_epu32(__A, __B),
1376 (__v16si)_mm512_setzero_si512());
1377}
1378
1379static __inline __m512i __DEFAULT_FN_ATTRS512
1380_mm512_min_epi64(__m512i __A, __m512i __B)
1381{
1382 return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
1383}
1384
1385static __inline__ __m512i __DEFAULT_FN_ATTRS512
1386_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1387{
1388 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1389 (__v8di)_mm512_min_epi64(__A, __B),
1390 (__v8di)__W);
1391}
1392
1393static __inline__ __m512i __DEFAULT_FN_ATTRS512
1394_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1395{
1396 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1397 (__v8di)_mm512_min_epi64(__A, __B),
1398 (__v8di)_mm512_setzero_si512());
1399}
1400
1401static __inline __m512i __DEFAULT_FN_ATTRS512
1402_mm512_min_epu64(__m512i __A, __m512i __B)
1403{
1404 return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
1405}
1406
1407static __inline__ __m512i __DEFAULT_FN_ATTRS512
1408_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1409{
1410 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1411 (__v8di)_mm512_min_epu64(__A, __B),
1412 (__v8di)__W);
1413}
1414
1415static __inline__ __m512i __DEFAULT_FN_ATTRS512
1416_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1417{
1418 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1419 (__v8di)_mm512_min_epu64(__A, __B),
1420 (__v8di)_mm512_setzero_si512());
1421}
1422
1423static __inline __m512i __DEFAULT_FN_ATTRS512
1424_mm512_mul_epi32(__m512i __X, __m512i __Y)
1425{
1426 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1427}
1428
1429static __inline __m512i __DEFAULT_FN_ATTRS512
1430_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1431{
1432 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1433 (__v8di)_mm512_mul_epi32(__X, __Y),
1434 (__v8di)__W);
1435}
1436
1437static __inline __m512i __DEFAULT_FN_ATTRS512
1438_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1439{
1440 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1441 (__v8di)_mm512_mul_epi32(__X, __Y),
1442 (__v8di)_mm512_setzero_si512 ());
1443}
1444
1445static __inline __m512i __DEFAULT_FN_ATTRS512
1446_mm512_mul_epu32(__m512i __X, __m512i __Y)
1447{
1448 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1449}
1450
1451static __inline __m512i __DEFAULT_FN_ATTRS512
1452_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1453{
1454 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1455 (__v8di)_mm512_mul_epu32(__X, __Y),
1456 (__v8di)__W);
1457}
1458
1459static __inline __m512i __DEFAULT_FN_ATTRS512
1460_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1461{
1462 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1463 (__v8di)_mm512_mul_epu32(__X, __Y),
1464 (__v8di)_mm512_setzero_si512 ());
1465}
1466
1467static __inline __m512i __DEFAULT_FN_ATTRS512
1468_mm512_mullo_epi32 (__m512i __A, __m512i __B)
1469{
1470 return (__m512i) ((__v16su) __A * (__v16su) __B);
1471}
1472
1473static __inline __m512i __DEFAULT_FN_ATTRS512
1474_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1475{
1476 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1477 (__v16si)_mm512_mullo_epi32(__A, __B),
1478 (__v16si)_mm512_setzero_si512());
1479}
1480
1481static __inline __m512i __DEFAULT_FN_ATTRS512
1482_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1483{
1484 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1485 (__v16si)_mm512_mullo_epi32(__A, __B),
1486 (__v16si)__W);
1487}
1488
1489static __inline__ __m512i __DEFAULT_FN_ATTRS512
1490_mm512_mullox_epi64 (__m512i __A, __m512i __B) {
1491 return (__m512i) ((__v8du) __A * (__v8du) __B);
1492}
1493
1494static __inline__ __m512i __DEFAULT_FN_ATTRS512
1495_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1496 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1497 (__v8di)_mm512_mullox_epi64(__A, __B),
1498 (__v8di)__W);
1499}
1500
1501#define _mm512_sqrt_round_pd(A, R) \
1502 ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)))
1503
1504#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1505 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1506 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1507 (__v8df)(__m512d)(W)))
1508
1509#define _mm512_maskz_sqrt_round_pd(U, A, R) \
1510 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1511 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1512 (__v8df)_mm512_setzero_pd()))
1513
1514static __inline__ __m512d __DEFAULT_FN_ATTRS512
1515_mm512_sqrt_pd(__m512d __A)
1516{
1517 return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1519}
1520
1521static __inline__ __m512d __DEFAULT_FN_ATTRS512
1522_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1523{
1524 return (__m512d)__builtin_ia32_selectpd_512(__U,
1525 (__v8df)_mm512_sqrt_pd(__A),
1526 (__v8df)__W);
1527}
1528
1529static __inline__ __m512d __DEFAULT_FN_ATTRS512
1531{
1532 return (__m512d)__builtin_ia32_selectpd_512(__U,
1533 (__v8df)_mm512_sqrt_pd(__A),
1534 (__v8df)_mm512_setzero_pd());
1535}
1536
1537#define _mm512_sqrt_round_ps(A, R) \
1538 ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)))
1539
1540#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1541 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1542 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1543 (__v16sf)(__m512)(W)))
1544
1545#define _mm512_maskz_sqrt_round_ps(U, A, R) \
1546 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1547 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1548 (__v16sf)_mm512_setzero_ps()))
1549
1550static __inline__ __m512 __DEFAULT_FN_ATTRS512
1552{
1553 return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1555}
1556
1557static __inline__ __m512 __DEFAULT_FN_ATTRS512
1558_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1559{
1560 return (__m512)__builtin_ia32_selectps_512(__U,
1561 (__v16sf)_mm512_sqrt_ps(__A),
1562 (__v16sf)__W);
1563}
1564
1565static __inline__ __m512 __DEFAULT_FN_ATTRS512
1567{
1568 return (__m512)__builtin_ia32_selectps_512(__U,
1569 (__v16sf)_mm512_sqrt_ps(__A),
1570 (__v16sf)_mm512_setzero_ps());
1571}
1572
1573static __inline__ __m512d __DEFAULT_FN_ATTRS512
1575{
1576 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1577 (__v8df)
1579 (__mmask8) -1);}
1580
1581static __inline__ __m512d __DEFAULT_FN_ATTRS512
1582_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1583{
1584 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1585 (__v8df) __W,
1586 (__mmask8) __U);
1587}
1588
1589static __inline__ __m512d __DEFAULT_FN_ATTRS512
1591{
1592 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1593 (__v8df)
1595 (__mmask8) __U);
1596}
1597
1598static __inline__ __m512 __DEFAULT_FN_ATTRS512
1600{
1601 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1602 (__v16sf)
1604 (__mmask16) -1);
1605}
1606
1607static __inline__ __m512 __DEFAULT_FN_ATTRS512
1608_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1609{
1610 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1611 (__v16sf) __W,
1612 (__mmask16) __U);
1613}
1614
1615static __inline__ __m512 __DEFAULT_FN_ATTRS512
1617{
1618 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1619 (__v16sf)
1621 (__mmask16) __U);
1622}
1623
1624static __inline__ __m128 __DEFAULT_FN_ATTRS128
1625_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1626{
1627 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1628 (__v4sf) __B,
1629 (__v4sf)
1630 _mm_setzero_ps (),
1631 (__mmask8) -1);
1632}
1633
1634static __inline__ __m128 __DEFAULT_FN_ATTRS128
1635_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1636{
1637 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1638 (__v4sf) __B,
1639 (__v4sf) __W,
1640 (__mmask8) __U);
1641}
1642
1643static __inline__ __m128 __DEFAULT_FN_ATTRS128
1644_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1645{
1646 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1647 (__v4sf) __B,
1648 (__v4sf) _mm_setzero_ps (),
1649 (__mmask8) __U);
1650}
1651
1652static __inline__ __m128d __DEFAULT_FN_ATTRS128
1653_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1654{
1655 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1656 (__v2df) __B,
1657 (__v2df)
1658 _mm_setzero_pd (),
1659 (__mmask8) -1);
1660}
1661
1662static __inline__ __m128d __DEFAULT_FN_ATTRS128
1663_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1664{
1665 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1666 (__v2df) __B,
1667 (__v2df) __W,
1668 (__mmask8) __U);
1669}
1670
1671static __inline__ __m128d __DEFAULT_FN_ATTRS128
1672_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1673{
1674 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1675 (__v2df) __B,
1676 (__v2df) _mm_setzero_pd (),
1677 (__mmask8) __U);
1678}
1679
1680static __inline__ __m512d __DEFAULT_FN_ATTRS512
1682{
1683 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1684 (__v8df)
1686 (__mmask8) -1);
1687}
1688
1689static __inline__ __m512d __DEFAULT_FN_ATTRS512
1690_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1691{
1692 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1693 (__v8df) __W,
1694 (__mmask8) __U);
1695}
1696
1697static __inline__ __m512d __DEFAULT_FN_ATTRS512
1699{
1700 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1701 (__v8df)
1703 (__mmask8) __U);
1704}
1705
1706static __inline__ __m512 __DEFAULT_FN_ATTRS512
1708{
1709 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1710 (__v16sf)
1712 (__mmask16) -1);
1713}
1714
1715static __inline__ __m512 __DEFAULT_FN_ATTRS512
1716_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1717{
1718 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1719 (__v16sf) __W,
1720 (__mmask16) __U);
1721}
1722
1723static __inline__ __m512 __DEFAULT_FN_ATTRS512
1725{
1726 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1727 (__v16sf)
1729 (__mmask16) __U);
1730}
1731
1732static __inline__ __m128 __DEFAULT_FN_ATTRS128
1733_mm_rcp14_ss(__m128 __A, __m128 __B)
1734{
1735 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1736 (__v4sf) __B,
1737 (__v4sf)
1738 _mm_setzero_ps (),
1739 (__mmask8) -1);
1740}
1741
1742static __inline__ __m128 __DEFAULT_FN_ATTRS128
1743_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1744{
1745 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1746 (__v4sf) __B,
1747 (__v4sf) __W,
1748 (__mmask8) __U);
1749}
1750
1751static __inline__ __m128 __DEFAULT_FN_ATTRS128
1752_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1753{
1754 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1755 (__v4sf) __B,
1756 (__v4sf) _mm_setzero_ps (),
1757 (__mmask8) __U);
1758}
1759
1760static __inline__ __m128d __DEFAULT_FN_ATTRS128
1761_mm_rcp14_sd(__m128d __A, __m128d __B)
1762{
1763 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1764 (__v2df) __B,
1765 (__v2df)
1766 _mm_setzero_pd (),
1767 (__mmask8) -1);
1768}
1769
1770static __inline__ __m128d __DEFAULT_FN_ATTRS128
1771_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1772{
1773 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1774 (__v2df) __B,
1775 (__v2df) __W,
1776 (__mmask8) __U);
1777}
1778
1779static __inline__ __m128d __DEFAULT_FN_ATTRS128
1780_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1781{
1782 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1783 (__v2df) __B,
1784 (__v2df) _mm_setzero_pd (),
1785 (__mmask8) __U);
1786}
1787
1788static __inline __m512 __DEFAULT_FN_ATTRS512
1790{
1791 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1793 (__v16sf) __A, (unsigned short)-1,
1795}
1796
1797static __inline__ __m512 __DEFAULT_FN_ATTRS512
1798_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1799{
1800 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1802 (__v16sf) __W, __U,
1804}
1805
1806static __inline __m512d __DEFAULT_FN_ATTRS512
1808{
1809 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1811 (__v8df) __A, (unsigned char)-1,
1813}
1814
1815static __inline__ __m512d __DEFAULT_FN_ATTRS512
1816_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1817{
1818 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1820 (__v8df) __W, __U,
1822}
1823
1824static __inline__ __m512 __DEFAULT_FN_ATTRS512
1825_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1826{
1827 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1829 (__v16sf) __W, __U,
1831}
1832
1833static __inline __m512 __DEFAULT_FN_ATTRS512
1835{
1836 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1838 (__v16sf) __A, (unsigned short)-1,
1840}
1841
1842static __inline __m512d __DEFAULT_FN_ATTRS512
1843_mm512_ceil_pd(__m512d __A)
1844{
1845 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1847 (__v8df) __A, (unsigned char)-1,
1849}
1850
1851static __inline__ __m512d __DEFAULT_FN_ATTRS512
1852_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1853{
1854 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1856 (__v8df) __W, __U,
1858}
1859
1860static __inline __m512i __DEFAULT_FN_ATTRS512
1862{
1863 return (__m512i)__builtin_elementwise_abs((__v8di)__A);
1864}
1865
1866static __inline__ __m512i __DEFAULT_FN_ATTRS512
1867_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1868{
1869 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1870 (__v8di)_mm512_abs_epi64(__A),
1871 (__v8di)__W);
1872}
1873
1874static __inline__ __m512i __DEFAULT_FN_ATTRS512
1876{
1877 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1878 (__v8di)_mm512_abs_epi64(__A),
1879 (__v8di)_mm512_setzero_si512());
1880}
1881
1882static __inline __m512i __DEFAULT_FN_ATTRS512
1884{
1885 return (__m512i)__builtin_elementwise_abs((__v16si) __A);
1886}
1887
1888static __inline__ __m512i __DEFAULT_FN_ATTRS512
1889_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1890{
1891 return (__m512i)__builtin_ia32_selectd_512(__U,
1892 (__v16si)_mm512_abs_epi32(__A),
1893 (__v16si)__W);
1894}
1895
1896static __inline__ __m512i __DEFAULT_FN_ATTRS512
1898{
1899 return (__m512i)__builtin_ia32_selectd_512(__U,
1900 (__v16si)_mm512_abs_epi32(__A),
1901 (__v16si)_mm512_setzero_si512());
1902}
1903
1904static __inline__ __m128 __DEFAULT_FN_ATTRS128
1905_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1906 __A = _mm_add_ss(__A, __B);
1907 return __builtin_ia32_selectss_128(__U, __A, __W);
1908}
1909
1910static __inline__ __m128 __DEFAULT_FN_ATTRS128
1911_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1912 __A = _mm_add_ss(__A, __B);
1913 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1914}
1915
1916#define _mm_add_round_ss(A, B, R) \
1917 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1918 (__v4sf)(__m128)(B), \
1919 (__v4sf)_mm_setzero_ps(), \
1920 (__mmask8)-1, (int)(R)))
1921
1922#define _mm_mask_add_round_ss(W, U, A, B, R) \
1923 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1924 (__v4sf)(__m128)(B), \
1925 (__v4sf)(__m128)(W), (__mmask8)(U), \
1926 (int)(R)))
1927
1928#define _mm_maskz_add_round_ss(U, A, B, R) \
1929 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1930 (__v4sf)(__m128)(B), \
1931 (__v4sf)_mm_setzero_ps(), \
1932 (__mmask8)(U), (int)(R)))
1933
1934static __inline__ __m128d __DEFAULT_FN_ATTRS128
1935_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1936 __A = _mm_add_sd(__A, __B);
1937 return __builtin_ia32_selectsd_128(__U, __A, __W);
1938}
1939
1940static __inline__ __m128d __DEFAULT_FN_ATTRS128
1941_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1942 __A = _mm_add_sd(__A, __B);
1943 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1944}
1945#define _mm_add_round_sd(A, B, R) \
1946 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1947 (__v2df)(__m128d)(B), \
1948 (__v2df)_mm_setzero_pd(), \
1949 (__mmask8)-1, (int)(R)))
1950
1951#define _mm_mask_add_round_sd(W, U, A, B, R) \
1952 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1953 (__v2df)(__m128d)(B), \
1954 (__v2df)(__m128d)(W), \
1955 (__mmask8)(U), (int)(R)))
1956
1957#define _mm_maskz_add_round_sd(U, A, B, R) \
1958 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1959 (__v2df)(__m128d)(B), \
1960 (__v2df)_mm_setzero_pd(), \
1961 (__mmask8)(U), (int)(R)))
1962
1963static __inline__ __m512d __DEFAULT_FN_ATTRS512
1964_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1965 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1966 (__v8df)_mm512_add_pd(__A, __B),
1967 (__v8df)__W);
1968}
1969
1970static __inline__ __m512d __DEFAULT_FN_ATTRS512
1971_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1972 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1973 (__v8df)_mm512_add_pd(__A, __B),
1974 (__v8df)_mm512_setzero_pd());
1975}
1976
1977static __inline__ __m512 __DEFAULT_FN_ATTRS512
1978_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1979 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1980 (__v16sf)_mm512_add_ps(__A, __B),
1981 (__v16sf)__W);
1982}
1983
1984static __inline__ __m512 __DEFAULT_FN_ATTRS512
1985_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1986 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1987 (__v16sf)_mm512_add_ps(__A, __B),
1988 (__v16sf)_mm512_setzero_ps());
1989}
1990
1991#define _mm512_add_round_pd(A, B, R) \
1992 ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1993 (__v8df)(__m512d)(B), (int)(R)))
1994
1995#define _mm512_mask_add_round_pd(W, U, A, B, R) \
1996 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1997 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1998 (__v8df)(__m512d)(W)))
1999
2000#define _mm512_maskz_add_round_pd(U, A, B, R) \
2001 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2002 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
2003 (__v8df)_mm512_setzero_pd()))
2004
2005#define _mm512_add_round_ps(A, B, R) \
2006 ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
2007 (__v16sf)(__m512)(B), (int)(R)))
2008
2009#define _mm512_mask_add_round_ps(W, U, A, B, R) \
2010 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2011 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2012 (__v16sf)(__m512)(W)))
2013
2014#define _mm512_maskz_add_round_ps(U, A, B, R) \
2015 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2016 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2017 (__v16sf)_mm512_setzero_ps()))
2018
2019static __inline__ __m128 __DEFAULT_FN_ATTRS128
2020_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2021 __A = _mm_sub_ss(__A, __B);
2022 return __builtin_ia32_selectss_128(__U, __A, __W);
2023}
2024
2025static __inline__ __m128 __DEFAULT_FN_ATTRS128
2026_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2027 __A = _mm_sub_ss(__A, __B);
2028 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2029}
2030#define _mm_sub_round_ss(A, B, R) \
2031 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2032 (__v4sf)(__m128)(B), \
2033 (__v4sf)_mm_setzero_ps(), \
2034 (__mmask8)-1, (int)(R)))
2035
2036#define _mm_mask_sub_round_ss(W, U, A, B, R) \
2037 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2038 (__v4sf)(__m128)(B), \
2039 (__v4sf)(__m128)(W), (__mmask8)(U), \
2040 (int)(R)))
2041
2042#define _mm_maskz_sub_round_ss(U, A, B, R) \
2043 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2044 (__v4sf)(__m128)(B), \
2045 (__v4sf)_mm_setzero_ps(), \
2046 (__mmask8)(U), (int)(R)))
2047
2048static __inline__ __m128d __DEFAULT_FN_ATTRS128
2049_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2050 __A = _mm_sub_sd(__A, __B);
2051 return __builtin_ia32_selectsd_128(__U, __A, __W);
2052}
2053
2054static __inline__ __m128d __DEFAULT_FN_ATTRS128
2055_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2056 __A = _mm_sub_sd(__A, __B);
2057 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2058}
2059
2060#define _mm_sub_round_sd(A, B, R) \
2061 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2062 (__v2df)(__m128d)(B), \
2063 (__v2df)_mm_setzero_pd(), \
2064 (__mmask8)-1, (int)(R)))
2065
2066#define _mm_mask_sub_round_sd(W, U, A, B, R) \
2067 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2068 (__v2df)(__m128d)(B), \
2069 (__v2df)(__m128d)(W), \
2070 (__mmask8)(U), (int)(R)))
2071
2072#define _mm_maskz_sub_round_sd(U, A, B, R) \
2073 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2074 (__v2df)(__m128d)(B), \
2075 (__v2df)_mm_setzero_pd(), \
2076 (__mmask8)(U), (int)(R)))
2077
2078static __inline__ __m512d __DEFAULT_FN_ATTRS512
2079_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2080 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2081 (__v8df)_mm512_sub_pd(__A, __B),
2082 (__v8df)__W);
2083}
2084
2085static __inline__ __m512d __DEFAULT_FN_ATTRS512
2086_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2087 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2088 (__v8df)_mm512_sub_pd(__A, __B),
2089 (__v8df)_mm512_setzero_pd());
2090}
2091
2092static __inline__ __m512 __DEFAULT_FN_ATTRS512
2093_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2094 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2095 (__v16sf)_mm512_sub_ps(__A, __B),
2096 (__v16sf)__W);
2097}
2098
2099static __inline__ __m512 __DEFAULT_FN_ATTRS512
2100_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2101 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2102 (__v16sf)_mm512_sub_ps(__A, __B),
2103 (__v16sf)_mm512_setzero_ps());
2104}
2105
2106#define _mm512_sub_round_pd(A, B, R) \
2107 ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2108 (__v8df)(__m512d)(B), (int)(R)))
2109
2110#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2111 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2112 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2113 (__v8df)(__m512d)(W)))
2114
2115#define _mm512_maskz_sub_round_pd(U, A, B, R) \
2116 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2117 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2118 (__v8df)_mm512_setzero_pd()))
2119
2120#define _mm512_sub_round_ps(A, B, R) \
2121 ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2122 (__v16sf)(__m512)(B), (int)(R)))
2123
2124#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2125 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2126 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2127 (__v16sf)(__m512)(W)))
2128
2129#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2130 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2131 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2132 (__v16sf)_mm512_setzero_ps()))
2133
2134static __inline__ __m128 __DEFAULT_FN_ATTRS128
2135_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2136 __A = _mm_mul_ss(__A, __B);
2137 return __builtin_ia32_selectss_128(__U, __A, __W);
2138}
2139
2140static __inline__ __m128 __DEFAULT_FN_ATTRS128
2141_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2142 __A = _mm_mul_ss(__A, __B);
2143 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2144}
2145#define _mm_mul_round_ss(A, B, R) \
2146 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2147 (__v4sf)(__m128)(B), \
2148 (__v4sf)_mm_setzero_ps(), \
2149 (__mmask8)-1, (int)(R)))
2150
2151#define _mm_mask_mul_round_ss(W, U, A, B, R) \
2152 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2153 (__v4sf)(__m128)(B), \
2154 (__v4sf)(__m128)(W), (__mmask8)(U), \
2155 (int)(R)))
2156
2157#define _mm_maskz_mul_round_ss(U, A, B, R) \
2158 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2159 (__v4sf)(__m128)(B), \
2160 (__v4sf)_mm_setzero_ps(), \
2161 (__mmask8)(U), (int)(R)))
2162
2163static __inline__ __m128d __DEFAULT_FN_ATTRS128
2164_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2165 __A = _mm_mul_sd(__A, __B);
2166 return __builtin_ia32_selectsd_128(__U, __A, __W);
2167}
2168
2169static __inline__ __m128d __DEFAULT_FN_ATTRS128
2170_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2171 __A = _mm_mul_sd(__A, __B);
2172 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2173}
2174
2175#define _mm_mul_round_sd(A, B, R) \
2176 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2177 (__v2df)(__m128d)(B), \
2178 (__v2df)_mm_setzero_pd(), \
2179 (__mmask8)-1, (int)(R)))
2180
2181#define _mm_mask_mul_round_sd(W, U, A, B, R) \
2182 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2183 (__v2df)(__m128d)(B), \
2184 (__v2df)(__m128d)(W), \
2185 (__mmask8)(U), (int)(R)))
2186
2187#define _mm_maskz_mul_round_sd(U, A, B, R) \
2188 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2189 (__v2df)(__m128d)(B), \
2190 (__v2df)_mm_setzero_pd(), \
2191 (__mmask8)(U), (int)(R)))
2192
2193static __inline__ __m512d __DEFAULT_FN_ATTRS512
2194_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2195 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2196 (__v8df)_mm512_mul_pd(__A, __B),
2197 (__v8df)__W);
2198}
2199
2200static __inline__ __m512d __DEFAULT_FN_ATTRS512
2201_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2202 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2203 (__v8df)_mm512_mul_pd(__A, __B),
2204 (__v8df)_mm512_setzero_pd());
2205}
2206
2207static __inline__ __m512 __DEFAULT_FN_ATTRS512
2208_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2209 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2210 (__v16sf)_mm512_mul_ps(__A, __B),
2211 (__v16sf)__W);
2212}
2213
2214static __inline__ __m512 __DEFAULT_FN_ATTRS512
2215_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2216 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2217 (__v16sf)_mm512_mul_ps(__A, __B),
2218 (__v16sf)_mm512_setzero_ps());
2219}
2220
2221#define _mm512_mul_round_pd(A, B, R) \
2222 ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2223 (__v8df)(__m512d)(B), (int)(R)))
2224
2225#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2226 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2227 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2228 (__v8df)(__m512d)(W)))
2229
2230#define _mm512_maskz_mul_round_pd(U, A, B, R) \
2231 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2232 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2233 (__v8df)_mm512_setzero_pd()))
2234
2235#define _mm512_mul_round_ps(A, B, R) \
2236 ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2237 (__v16sf)(__m512)(B), (int)(R)))
2238
2239#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2240 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2241 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2242 (__v16sf)(__m512)(W)))
2243
2244#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2245 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2246 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2247 (__v16sf)_mm512_setzero_ps()))
2248
2249static __inline__ __m128 __DEFAULT_FN_ATTRS128
2250_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2251 __A = _mm_div_ss(__A, __B);
2252 return __builtin_ia32_selectss_128(__U, __A, __W);
2253}
2254
2255static __inline__ __m128 __DEFAULT_FN_ATTRS128
2256_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2257 __A = _mm_div_ss(__A, __B);
2258 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2259}
2260
2261#define _mm_div_round_ss(A, B, R) \
2262 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2263 (__v4sf)(__m128)(B), \
2264 (__v4sf)_mm_setzero_ps(), \
2265 (__mmask8)-1, (int)(R)))
2266
2267#define _mm_mask_div_round_ss(W, U, A, B, R) \
2268 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2269 (__v4sf)(__m128)(B), \
2270 (__v4sf)(__m128)(W), (__mmask8)(U), \
2271 (int)(R)))
2272
2273#define _mm_maskz_div_round_ss(U, A, B, R) \
2274 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2275 (__v4sf)(__m128)(B), \
2276 (__v4sf)_mm_setzero_ps(), \
2277 (__mmask8)(U), (int)(R)))
2278
2279static __inline__ __m128d __DEFAULT_FN_ATTRS128
2280_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2281 __A = _mm_div_sd(__A, __B);
2282 return __builtin_ia32_selectsd_128(__U, __A, __W);
2283}
2284
2285static __inline__ __m128d __DEFAULT_FN_ATTRS128
2286_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2287 __A = _mm_div_sd(__A, __B);
2288 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2289}
2290
2291#define _mm_div_round_sd(A, B, R) \
2292 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2293 (__v2df)(__m128d)(B), \
2294 (__v2df)_mm_setzero_pd(), \
2295 (__mmask8)-1, (int)(R)))
2296
2297#define _mm_mask_div_round_sd(W, U, A, B, R) \
2298 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2299 (__v2df)(__m128d)(B), \
2300 (__v2df)(__m128d)(W), \
2301 (__mmask8)(U), (int)(R)))
2302
2303#define _mm_maskz_div_round_sd(U, A, B, R) \
2304 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2305 (__v2df)(__m128d)(B), \
2306 (__v2df)_mm_setzero_pd(), \
2307 (__mmask8)(U), (int)(R)))
2308
2309static __inline __m512d __DEFAULT_FN_ATTRS512
2310_mm512_div_pd(__m512d __a, __m512d __b)
2311{
2312 return (__m512d)((__v8df)__a/(__v8df)__b);
2313}
2314
2315static __inline__ __m512d __DEFAULT_FN_ATTRS512
2316_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2317 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2318 (__v8df)_mm512_div_pd(__A, __B),
2319 (__v8df)__W);
2320}
2321
2322static __inline__ __m512d __DEFAULT_FN_ATTRS512
2323_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2324 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2325 (__v8df)_mm512_div_pd(__A, __B),
2326 (__v8df)_mm512_setzero_pd());
2327}
2328
2329static __inline __m512 __DEFAULT_FN_ATTRS512
2330_mm512_div_ps(__m512 __a, __m512 __b)
2331{
2332 return (__m512)((__v16sf)__a/(__v16sf)__b);
2333}
2334
2335static __inline__ __m512 __DEFAULT_FN_ATTRS512
2336_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2337 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2338 (__v16sf)_mm512_div_ps(__A, __B),
2339 (__v16sf)__W);
2340}
2341
2342static __inline__ __m512 __DEFAULT_FN_ATTRS512
2343_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2344 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2345 (__v16sf)_mm512_div_ps(__A, __B),
2346 (__v16sf)_mm512_setzero_ps());
2347}
2348
2349#define _mm512_div_round_pd(A, B, R) \
2350 ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2351 (__v8df)(__m512d)(B), (int)(R)))
2352
2353#define _mm512_mask_div_round_pd(W, U, A, B, R) \
2354 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2355 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2356 (__v8df)(__m512d)(W)))
2357
2358#define _mm512_maskz_div_round_pd(U, A, B, R) \
2359 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2360 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2361 (__v8df)_mm512_setzero_pd()))
2362
2363#define _mm512_div_round_ps(A, B, R) \
2364 ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2365 (__v16sf)(__m512)(B), (int)(R)))
2366
2367#define _mm512_mask_div_round_ps(W, U, A, B, R) \
2368 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2369 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2370 (__v16sf)(__m512)(W)))
2371
2372#define _mm512_maskz_div_round_ps(U, A, B, R) \
2373 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2374 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2375 (__v16sf)_mm512_setzero_ps()))
2376
2377#define _mm512_roundscale_ps(A, B) \
2378 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2379 (__v16sf)_mm512_undefined_ps(), \
2380 (__mmask16)-1, \
2381 _MM_FROUND_CUR_DIRECTION))
2382
2383#define _mm512_mask_roundscale_ps(A, B, C, imm) \
2384 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2385 (__v16sf)(__m512)(A), (__mmask16)(B), \
2386 _MM_FROUND_CUR_DIRECTION))
2387
2388#define _mm512_maskz_roundscale_ps(A, B, imm) \
2389 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2390 (__v16sf)_mm512_setzero_ps(), \
2391 (__mmask16)(A), \
2392 _MM_FROUND_CUR_DIRECTION))
2393
2394#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2395 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2396 (__v16sf)(__m512)(A), (__mmask16)(B), \
2397 (int)(R)))
2398
2399#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2400 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2401 (__v16sf)_mm512_setzero_ps(), \
2402 (__mmask16)(A), (int)(R)))
2403
2404#define _mm512_roundscale_round_ps(A, imm, R) \
2405 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2406 (__v16sf)_mm512_undefined_ps(), \
2407 (__mmask16)-1, (int)(R)))
2408
2409#define _mm512_roundscale_pd(A, B) \
2410 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2411 (__v8df)_mm512_undefined_pd(), \
2412 (__mmask8)-1, \
2413 _MM_FROUND_CUR_DIRECTION))
2414
2415#define _mm512_mask_roundscale_pd(A, B, C, imm) \
2416 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2417 (__v8df)(__m512d)(A), (__mmask8)(B), \
2418 _MM_FROUND_CUR_DIRECTION))
2419
2420#define _mm512_maskz_roundscale_pd(A, B, imm) \
2421 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2422 (__v8df)_mm512_setzero_pd(), \
2423 (__mmask8)(A), \
2424 _MM_FROUND_CUR_DIRECTION))
2425
2426#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2427 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2428 (__v8df)(__m512d)(A), (__mmask8)(B), \
2429 (int)(R)))
2430
2431#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2432 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2433 (__v8df)_mm512_setzero_pd(), \
2434 (__mmask8)(A), (int)(R)))
2435
2436#define _mm512_roundscale_round_pd(A, imm, R) \
2437 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2438 (__v8df)_mm512_undefined_pd(), \
2439 (__mmask8)-1, (int)(R)))
2440
2441#define _mm512_fmadd_round_pd(A, B, C, R) \
2442 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2443 (__v8df)(__m512d)(B), \
2444 (__v8df)(__m512d)(C), \
2445 (__mmask8)-1, (int)(R)))
2446
2447
2448#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2449 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2450 (__v8df)(__m512d)(B), \
2451 (__v8df)(__m512d)(C), \
2452 (__mmask8)(U), (int)(R)))
2453
2454
2455#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2456 ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2457 (__v8df)(__m512d)(B), \
2458 (__v8df)(__m512d)(C), \
2459 (__mmask8)(U), (int)(R)))
2460
2461
2462#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2463 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2464 (__v8df)(__m512d)(B), \
2465 (__v8df)(__m512d)(C), \
2466 (__mmask8)(U), (int)(R)))
2467
2468
2469#define _mm512_fmsub_round_pd(A, B, C, R) \
2470 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2471 (__v8df)(__m512d)(B), \
2472 -(__v8df)(__m512d)(C), \
2473 (__mmask8)-1, (int)(R)))
2474
2475
2476#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2477 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2478 (__v8df)(__m512d)(B), \
2479 -(__v8df)(__m512d)(C), \
2480 (__mmask8)(U), (int)(R)))
2481
2482
2483#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2484 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2485 (__v8df)(__m512d)(B), \
2486 -(__v8df)(__m512d)(C), \
2487 (__mmask8)(U), (int)(R)))
2488
2489
2490#define _mm512_fnmadd_round_pd(A, B, C, R) \
2491 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2492 (__v8df)(__m512d)(B), \
2493 (__v8df)(__m512d)(C), \
2494 (__mmask8)-1, (int)(R)))
2495
2496
2497#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2498 ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2499 (__v8df)(__m512d)(B), \
2500 (__v8df)(__m512d)(C), \
2501 (__mmask8)(U), (int)(R)))
2502
2503
2504#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2505 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2506 (__v8df)(__m512d)(B), \
2507 (__v8df)(__m512d)(C), \
2508 (__mmask8)(U), (int)(R)))
2509
2510
2511#define _mm512_fnmsub_round_pd(A, B, C, R) \
2512 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2513 (__v8df)(__m512d)(B), \
2514 -(__v8df)(__m512d)(C), \
2515 (__mmask8)-1, (int)(R)))
2516
2517
2518#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2519 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2520 (__v8df)(__m512d)(B), \
2521 -(__v8df)(__m512d)(C), \
2522 (__mmask8)(U), (int)(R)))
2523
2524
2525static __inline__ __m512d __DEFAULT_FN_ATTRS512
2526_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2527{
2528 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2529 (__v8df) __B,
2530 (__v8df) __C,
2531 (__mmask8) -1,
2533}
2534
2535static __inline__ __m512d __DEFAULT_FN_ATTRS512
2536_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2537{
2538 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2539 (__v8df) __B,
2540 (__v8df) __C,
2541 (__mmask8) __U,
2543}
2544
2545static __inline__ __m512d __DEFAULT_FN_ATTRS512
2546_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2547{
2548 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2549 (__v8df) __B,
2550 (__v8df) __C,
2551 (__mmask8) __U,
2553}
2554
2555static __inline__ __m512d __DEFAULT_FN_ATTRS512
2556_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2557{
2558 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2559 (__v8df) __B,
2560 (__v8df) __C,
2561 (__mmask8) __U,
2563}
2564
2565static __inline__ __m512d __DEFAULT_FN_ATTRS512
2566_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2567{
2568 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2569 (__v8df) __B,
2570 -(__v8df) __C,
2571 (__mmask8) -1,
2573}
2574
2575static __inline__ __m512d __DEFAULT_FN_ATTRS512
2576_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2577{
2578 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2579 (__v8df) __B,
2580 -(__v8df) __C,
2581 (__mmask8) __U,
2583}
2584
2585static __inline__ __m512d __DEFAULT_FN_ATTRS512
2586_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2587{
2588 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2589 (__v8df) __B,
2590 -(__v8df) __C,
2591 (__mmask8) __U,
2593}
2594
2595static __inline__ __m512d __DEFAULT_FN_ATTRS512
2596_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2597{
2598 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2599 -(__v8df) __B,
2600 (__v8df) __C,
2601 (__mmask8) -1,
2603}
2604
2605static __inline__ __m512d __DEFAULT_FN_ATTRS512
2606_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2607{
2608 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2609 (__v8df) __B,
2610 (__v8df) __C,
2611 (__mmask8) __U,
2613}
2614
2615static __inline__ __m512d __DEFAULT_FN_ATTRS512
2616_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2617{
2618 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2619 (__v8df) __B,
2620 (__v8df) __C,
2621 (__mmask8) __U,
2623}
2624
2625static __inline__ __m512d __DEFAULT_FN_ATTRS512
2626_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2627{
2628 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2629 -(__v8df) __B,
2630 -(__v8df) __C,
2631 (__mmask8) -1,
2633}
2634
2635static __inline__ __m512d __DEFAULT_FN_ATTRS512
2636_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2637{
2638 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2639 (__v8df) __B,
2640 -(__v8df) __C,
2641 (__mmask8) __U,
2643}
2644
2645#define _mm512_fmadd_round_ps(A, B, C, R) \
2646 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2647 (__v16sf)(__m512)(B), \
2648 (__v16sf)(__m512)(C), \
2649 (__mmask16)-1, (int)(R)))
2650
2651
2652#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2653 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2654 (__v16sf)(__m512)(B), \
2655 (__v16sf)(__m512)(C), \
2656 (__mmask16)(U), (int)(R)))
2657
2658
2659#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2660 ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2661 (__v16sf)(__m512)(B), \
2662 (__v16sf)(__m512)(C), \
2663 (__mmask16)(U), (int)(R)))
2664
2665
2666#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2667 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2668 (__v16sf)(__m512)(B), \
2669 (__v16sf)(__m512)(C), \
2670 (__mmask16)(U), (int)(R)))
2671
2672
2673#define _mm512_fmsub_round_ps(A, B, C, R) \
2674 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2675 (__v16sf)(__m512)(B), \
2676 -(__v16sf)(__m512)(C), \
2677 (__mmask16)-1, (int)(R)))
2678
2679
2680#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2681 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2682 (__v16sf)(__m512)(B), \
2683 -(__v16sf)(__m512)(C), \
2684 (__mmask16)(U), (int)(R)))
2685
2686
2687#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2688 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2689 (__v16sf)(__m512)(B), \
2690 -(__v16sf)(__m512)(C), \
2691 (__mmask16)(U), (int)(R)))
2692
2693
2694#define _mm512_fnmadd_round_ps(A, B, C, R) \
2695 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2696 -(__v16sf)(__m512)(B), \
2697 (__v16sf)(__m512)(C), \
2698 (__mmask16)-1, (int)(R)))
2699
2700
2701#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2702 ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2703 (__v16sf)(__m512)(B), \
2704 (__v16sf)(__m512)(C), \
2705 (__mmask16)(U), (int)(R)))
2706
2707
2708#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2709 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2710 (__v16sf)(__m512)(B), \
2711 (__v16sf)(__m512)(C), \
2712 (__mmask16)(U), (int)(R)))
2713
2714
2715#define _mm512_fnmsub_round_ps(A, B, C, R) \
2716 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2717 -(__v16sf)(__m512)(B), \
2718 -(__v16sf)(__m512)(C), \
2719 (__mmask16)-1, (int)(R)))
2720
2721
2722#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2723 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2724 (__v16sf)(__m512)(B), \
2725 -(__v16sf)(__m512)(C), \
2726 (__mmask16)(U), (int)(R)))
2727
2728
2729static __inline__ __m512 __DEFAULT_FN_ATTRS512
2730_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2731{
2732 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2733 (__v16sf) __B,
2734 (__v16sf) __C,
2735 (__mmask16) -1,
2737}
2738
2739static __inline__ __m512 __DEFAULT_FN_ATTRS512
2740_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2741{
2742 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2743 (__v16sf) __B,
2744 (__v16sf) __C,
2745 (__mmask16) __U,
2747}
2748
2749static __inline__ __m512 __DEFAULT_FN_ATTRS512
2750_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2751{
2752 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2753 (__v16sf) __B,
2754 (__v16sf) __C,
2755 (__mmask16) __U,
2757}
2758
2759static __inline__ __m512 __DEFAULT_FN_ATTRS512
2760_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2761{
2762 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2763 (__v16sf) __B,
2764 (__v16sf) __C,
2765 (__mmask16) __U,
2767}
2768
2769static __inline__ __m512 __DEFAULT_FN_ATTRS512
2770_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2771{
2772 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2773 (__v16sf) __B,
2774 -(__v16sf) __C,
2775 (__mmask16) -1,
2777}
2778
2779static __inline__ __m512 __DEFAULT_FN_ATTRS512
2780_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2781{
2782 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2783 (__v16sf) __B,
2784 -(__v16sf) __C,
2785 (__mmask16) __U,
2787}
2788
2789static __inline__ __m512 __DEFAULT_FN_ATTRS512
2790_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2791{
2792 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2793 (__v16sf) __B,
2794 -(__v16sf) __C,
2795 (__mmask16) __U,
2797}
2798
2799static __inline__ __m512 __DEFAULT_FN_ATTRS512
2800_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2801{
2802 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2803 -(__v16sf) __B,
2804 (__v16sf) __C,
2805 (__mmask16) -1,
2807}
2808
2809static __inline__ __m512 __DEFAULT_FN_ATTRS512
2810_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2811{
2812 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2813 (__v16sf) __B,
2814 (__v16sf) __C,
2815 (__mmask16) __U,
2817}
2818
2819static __inline__ __m512 __DEFAULT_FN_ATTRS512
2820_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2821{
2822 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2823 (__v16sf) __B,
2824 (__v16sf) __C,
2825 (__mmask16) __U,
2827}
2828
2829static __inline__ __m512 __DEFAULT_FN_ATTRS512
2830_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2831{
2832 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2833 -(__v16sf) __B,
2834 -(__v16sf) __C,
2835 (__mmask16) -1,
2837}
2838
2839static __inline__ __m512 __DEFAULT_FN_ATTRS512
2840_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2841{
2842 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2843 (__v16sf) __B,
2844 -(__v16sf) __C,
2845 (__mmask16) __U,
2847}
2848
2849#define _mm512_fmaddsub_round_pd(A, B, C, R) \
2850 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2851 (__v8df)(__m512d)(B), \
2852 (__v8df)(__m512d)(C), \
2853 (__mmask8)-1, (int)(R)))
2854
2855
2856#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2857 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2858 (__v8df)(__m512d)(B), \
2859 (__v8df)(__m512d)(C), \
2860 (__mmask8)(U), (int)(R)))
2861
2862
2863#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2864 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2865 (__v8df)(__m512d)(B), \
2866 (__v8df)(__m512d)(C), \
2867 (__mmask8)(U), (int)(R)))
2868
2869
2870#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2871 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2872 (__v8df)(__m512d)(B), \
2873 (__v8df)(__m512d)(C), \
2874 (__mmask8)(U), (int)(R)))
2875
2876
2877#define _mm512_fmsubadd_round_pd(A, B, C, R) \
2878 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2879 (__v8df)(__m512d)(B), \
2880 -(__v8df)(__m512d)(C), \
2881 (__mmask8)-1, (int)(R)))
2882
2883
2884#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2885 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2886 (__v8df)(__m512d)(B), \
2887 -(__v8df)(__m512d)(C), \
2888 (__mmask8)(U), (int)(R)))
2889
2890
2891#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2892 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2893 (__v8df)(__m512d)(B), \
2894 -(__v8df)(__m512d)(C), \
2895 (__mmask8)(U), (int)(R)))
2896
2897
2898static __inline__ __m512d __DEFAULT_FN_ATTRS512
2899_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2900{
2901 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2902 (__v8df) __B,
2903 (__v8df) __C,
2904 (__mmask8) -1,
2906}
2907
2908static __inline__ __m512d __DEFAULT_FN_ATTRS512
2909_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2910{
2911 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2912 (__v8df) __B,
2913 (__v8df) __C,
2914 (__mmask8) __U,
2916}
2917
2918static __inline__ __m512d __DEFAULT_FN_ATTRS512
2919_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2920{
2921 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2922 (__v8df) __B,
2923 (__v8df) __C,
2924 (__mmask8) __U,
2926}
2927
2928static __inline__ __m512d __DEFAULT_FN_ATTRS512
2929_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2930{
2931 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2932 (__v8df) __B,
2933 (__v8df) __C,
2934 (__mmask8) __U,
2936}
2937
2938static __inline__ __m512d __DEFAULT_FN_ATTRS512
2939_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2940{
2941 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2942 (__v8df) __B,
2943 -(__v8df) __C,
2944 (__mmask8) -1,
2946}
2947
2948static __inline__ __m512d __DEFAULT_FN_ATTRS512
2949_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2950{
2951 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2952 (__v8df) __B,
2953 -(__v8df) __C,
2954 (__mmask8) __U,
2956}
2957
2958static __inline__ __m512d __DEFAULT_FN_ATTRS512
2959_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2960{
2961 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2962 (__v8df) __B,
2963 -(__v8df) __C,
2964 (__mmask8) __U,
2966}
2967
2968#define _mm512_fmaddsub_round_ps(A, B, C, R) \
2969 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2970 (__v16sf)(__m512)(B), \
2971 (__v16sf)(__m512)(C), \
2972 (__mmask16)-1, (int)(R)))
2973
2974
2975#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2976 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2977 (__v16sf)(__m512)(B), \
2978 (__v16sf)(__m512)(C), \
2979 (__mmask16)(U), (int)(R)))
2980
2981
2982#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2983 ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2984 (__v16sf)(__m512)(B), \
2985 (__v16sf)(__m512)(C), \
2986 (__mmask16)(U), (int)(R)))
2987
2988
2989#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2990 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2991 (__v16sf)(__m512)(B), \
2992 (__v16sf)(__m512)(C), \
2993 (__mmask16)(U), (int)(R)))
2994
2995
2996#define _mm512_fmsubadd_round_ps(A, B, C, R) \
2997 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2998 (__v16sf)(__m512)(B), \
2999 -(__v16sf)(__m512)(C), \
3000 (__mmask16)-1, (int)(R)))
3001
3002
3003#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3004 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3005 (__v16sf)(__m512)(B), \
3006 -(__v16sf)(__m512)(C), \
3007 (__mmask16)(U), (int)(R)))
3008
3009
3010#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3011 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3012 (__v16sf)(__m512)(B), \
3013 -(__v16sf)(__m512)(C), \
3014 (__mmask16)(U), (int)(R)))
3015
3016
3017static __inline__ __m512 __DEFAULT_FN_ATTRS512
3018_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3019{
3020 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3021 (__v16sf) __B,
3022 (__v16sf) __C,
3023 (__mmask16) -1,
3025}
3026
3027static __inline__ __m512 __DEFAULT_FN_ATTRS512
3028_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3029{
3030 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3031 (__v16sf) __B,
3032 (__v16sf) __C,
3033 (__mmask16) __U,
3035}
3036
3037static __inline__ __m512 __DEFAULT_FN_ATTRS512
3038_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3039{
3040 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3041 (__v16sf) __B,
3042 (__v16sf) __C,
3043 (__mmask16) __U,
3045}
3046
3047static __inline__ __m512 __DEFAULT_FN_ATTRS512
3048_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3049{
3050 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3051 (__v16sf) __B,
3052 (__v16sf) __C,
3053 (__mmask16) __U,
3055}
3056
3057static __inline__ __m512 __DEFAULT_FN_ATTRS512
3058_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3059{
3060 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3061 (__v16sf) __B,
3062 -(__v16sf) __C,
3063 (__mmask16) -1,
3065}
3066
3067static __inline__ __m512 __DEFAULT_FN_ATTRS512
3068_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3069{
3070 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3071 (__v16sf) __B,
3072 -(__v16sf) __C,
3073 (__mmask16) __U,
3075}
3076
3077static __inline__ __m512 __DEFAULT_FN_ATTRS512
3078_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3079{
3080 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3081 (__v16sf) __B,
3082 -(__v16sf) __C,
3083 (__mmask16) __U,
3085}
3086
3087#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3088 ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3089 (__v8df)(__m512d)(B), \
3090 (__v8df)(__m512d)(C), \
3091 (__mmask8)(U), (int)(R)))
3092
3093
3094static __inline__ __m512d __DEFAULT_FN_ATTRS512
3095_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3096{
3097 return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3098 (__v8df) __B,
3099 (__v8df) __C,
3100 (__mmask8) __U,
3102}
3103
3104#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3105 ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3106 (__v16sf)(__m512)(B), \
3107 (__v16sf)(__m512)(C), \
3108 (__mmask16)(U), (int)(R)))
3109
3110static __inline__ __m512 __DEFAULT_FN_ATTRS512
3111_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3112{
3113 return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3114 (__v16sf) __B,
3115 (__v16sf) __C,
3116 (__mmask16) __U,
3118}
3119
3120#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3121 ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3122 (__v8df)(__m512d)(B), \
3123 (__v8df)(__m512d)(C), \
3124 (__mmask8)(U), (int)(R)))
3125
3126
3127static __inline__ __m512d __DEFAULT_FN_ATTRS512
3128_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3129{
3130 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3131 (__v8df) __B,
3132 (__v8df) __C,
3133 (__mmask8) __U,
3135}
3136
3137#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3138 ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3139 (__v16sf)(__m512)(B), \
3140 (__v16sf)(__m512)(C), \
3141 (__mmask16)(U), (int)(R)))
3142
3143
3144static __inline__ __m512 __DEFAULT_FN_ATTRS512
3145_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3146{
3147 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3148 (__v16sf) __B,
3149 (__v16sf) __C,
3150 (__mmask16) __U,
3152}
3153
3154#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3155 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3156 -(__v8df)(__m512d)(B), \
3157 (__v8df)(__m512d)(C), \
3158 (__mmask8)(U), (int)(R)))
3159
3160
3161static __inline__ __m512d __DEFAULT_FN_ATTRS512
3162_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3163{
3164 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3165 -(__v8df) __B,
3166 (__v8df) __C,
3167 (__mmask8) __U,
3169}
3170
3171#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3172 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3173 -(__v16sf)(__m512)(B), \
3174 (__v16sf)(__m512)(C), \
3175 (__mmask16)(U), (int)(R)))
3176
3177
3178static __inline__ __m512 __DEFAULT_FN_ATTRS512
3179_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3180{
3181 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3182 -(__v16sf) __B,
3183 (__v16sf) __C,
3184 (__mmask16) __U,
3186}
3187
3188#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3189 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3190 -(__v8df)(__m512d)(B), \
3191 -(__v8df)(__m512d)(C), \
3192 (__mmask8)(U), (int)(R)))
3193
3194
3195#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3196 ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3197 (__v8df)(__m512d)(B), \
3198 (__v8df)(__m512d)(C), \
3199 (__mmask8)(U), (int)(R)))
3200
3201
3202static __inline__ __m512d __DEFAULT_FN_ATTRS512
3203_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3204{
3205 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3206 -(__v8df) __B,
3207 -(__v8df) __C,
3208 (__mmask8) __U,
3210}
3211
3212static __inline__ __m512d __DEFAULT_FN_ATTRS512
3213_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3214{
3215 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
3216 (__v8df) __B,
3217 (__v8df) __C,
3218 (__mmask8) __U,
3220}
3221
3222#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3223 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3224 -(__v16sf)(__m512)(B), \
3225 -(__v16sf)(__m512)(C), \
3226 (__mmask16)(U), (int)(R)))
3227
3228
3229#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3230 ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3231 (__v16sf)(__m512)(B), \
3232 (__v16sf)(__m512)(C), \
3233 (__mmask16)(U), (int)(R)))
3234
3235
3236static __inline__ __m512 __DEFAULT_FN_ATTRS512
3237_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3238{
3239 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3240 -(__v16sf) __B,
3241 -(__v16sf) __C,
3242 (__mmask16) __U,
3244}
3245
3246static __inline__ __m512 __DEFAULT_FN_ATTRS512
3247_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3248{
3249 return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
3250 (__v16sf) __B,
3251 (__v16sf) __C,
3252 (__mmask16) __U,
3254}
3255
3256
3257
3258/* Vector permutations */
3259
3260static __inline __m512i __DEFAULT_FN_ATTRS512
3261_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3262{
3263 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3264 (__v16si) __B);
3265}
3266
3267static __inline__ __m512i __DEFAULT_FN_ATTRS512
3268_mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3269 __m512i __B)
3270{
3271 return (__m512i)__builtin_ia32_selectd_512(__U,
3272 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3273 (__v16si)__A);
3274}
3275
3276static __inline__ __m512i __DEFAULT_FN_ATTRS512
3277_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3278 __m512i __B)
3279{
3280 return (__m512i)__builtin_ia32_selectd_512(__U,
3281 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3282 (__v16si)__I);
3283}
3284
3285static __inline__ __m512i __DEFAULT_FN_ATTRS512
3286_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3287 __m512i __B)
3288{
3289 return (__m512i)__builtin_ia32_selectd_512(__U,
3290 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3291 (__v16si)_mm512_setzero_si512());
3292}
3293
3294static __inline __m512i __DEFAULT_FN_ATTRS512
3295_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3296{
3297 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3298 (__v8di) __B);
3299}
3300
3301static __inline__ __m512i __DEFAULT_FN_ATTRS512
3302_mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3303 __m512i __B)
3304{
3305 return (__m512i)__builtin_ia32_selectq_512(__U,
3306 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3307 (__v8di)__A);
3308}
3309
3310static __inline__ __m512i __DEFAULT_FN_ATTRS512
3311_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
3312 __m512i __B)
3313{
3314 return (__m512i)__builtin_ia32_selectq_512(__U,
3315 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3316 (__v8di)__I);
3317}
3318
3319static __inline__ __m512i __DEFAULT_FN_ATTRS512
3320_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3321 __m512i __B)
3322{
3323 return (__m512i)__builtin_ia32_selectq_512(__U,
3324 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3325 (__v8di)_mm512_setzero_si512());
3326}
3327
3328#define _mm512_alignr_epi64(A, B, I) \
3329 ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3330 (__v8di)(__m512i)(B), (int)(I)))
3331
3332#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3333 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3334 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3335 (__v8di)(__m512i)(W)))
3336
3337#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3338 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3339 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3340 (__v8di)_mm512_setzero_si512()))
3341
3342#define _mm512_alignr_epi32(A, B, I) \
3343 ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3344 (__v16si)(__m512i)(B), (int)(I)))
3345
3346#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3347 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3348 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3349 (__v16si)(__m512i)(W)))
3350
3351#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3352 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3353 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3354 (__v16si)_mm512_setzero_si512()))
3355/* Vector Extract */
3356
3357#define _mm512_extractf64x4_pd(A, I) \
3358 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3359 (__v4df)_mm256_undefined_pd(), \
3360 (__mmask8)-1))
3361
3362#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3363 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3364 (__v4df)(__m256d)(W), \
3365 (__mmask8)(U)))
3366
3367#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3368 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3369 (__v4df)_mm256_setzero_pd(), \
3370 (__mmask8)(U)))
3371
3372#define _mm512_extractf32x4_ps(A, I) \
3373 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3374 (__v4sf)_mm_undefined_ps(), \
3375 (__mmask8)-1))
3376
3377#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3378 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3379 (__v4sf)(__m128)(W), \
3380 (__mmask8)(U)))
3381
3382#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3383 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3384 (__v4sf)_mm_setzero_ps(), \
3385 (__mmask8)(U)))
3386
3387/* Vector Blend */
3388
3389static __inline __m512d __DEFAULT_FN_ATTRS512
3390_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3391{
3392 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3393 (__v8df) __W,
3394 (__v8df) __A);
3395}
3396
3397static __inline __m512 __DEFAULT_FN_ATTRS512
3398_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3399{
3400 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3401 (__v16sf) __W,
3402 (__v16sf) __A);
3403}
3404
3405static __inline __m512i __DEFAULT_FN_ATTRS512
3406_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3407{
3408 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3409 (__v8di) __W,
3410 (__v8di) __A);
3411}
3412
3413static __inline __m512i __DEFAULT_FN_ATTRS512
3414_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3415{
3416 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3417 (__v16si) __W,
3418 (__v16si) __A);
3419}
3420
3421/* Compare */
3422
3423#define _mm512_cmp_round_ps_mask(A, B, P, R) \
3424 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3425 (__v16sf)(__m512)(B), (int)(P), \
3426 (__mmask16)-1, (int)(R)))
3427
3428#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3429 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3430 (__v16sf)(__m512)(B), (int)(P), \
3431 (__mmask16)(U), (int)(R)))
3432
3433#define _mm512_cmp_ps_mask(A, B, P) \
3434 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3435#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3436 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3437
3438#define _mm512_cmpeq_ps_mask(A, B) \
3439 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3440#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3441 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3442
3443#define _mm512_cmplt_ps_mask(A, B) \
3444 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3445#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3446 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3447
3448#define _mm512_cmple_ps_mask(A, B) \
3449 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3450#define _mm512_mask_cmple_ps_mask(k, A, B) \
3451 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3452
3453#define _mm512_cmpunord_ps_mask(A, B) \
3454 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3455#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3456 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3457
3458#define _mm512_cmpneq_ps_mask(A, B) \
3459 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3460#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3461 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3462
3463#define _mm512_cmpnlt_ps_mask(A, B) \
3464 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3465#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3466 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3467
3468#define _mm512_cmpnle_ps_mask(A, B) \
3469 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3470#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3471 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3472
3473#define _mm512_cmpord_ps_mask(A, B) \
3474 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3475#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3476 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3477
3478#define _mm512_cmp_round_pd_mask(A, B, P, R) \
3479 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3480 (__v8df)(__m512d)(B), (int)(P), \
3481 (__mmask8)-1, (int)(R)))
3482
3483#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3484 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3485 (__v8df)(__m512d)(B), (int)(P), \
3486 (__mmask8)(U), (int)(R)))
3487
3488#define _mm512_cmp_pd_mask(A, B, P) \
3489 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3490#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3491 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3492
3493#define _mm512_cmpeq_pd_mask(A, B) \
3494 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3495#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3496 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3497
3498#define _mm512_cmplt_pd_mask(A, B) \
3499 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3500#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3501 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3502
3503#define _mm512_cmple_pd_mask(A, B) \
3504 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3505#define _mm512_mask_cmple_pd_mask(k, A, B) \
3506 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3507
3508#define _mm512_cmpunord_pd_mask(A, B) \
3509 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3510#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3511 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3512
3513#define _mm512_cmpneq_pd_mask(A, B) \
3514 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3515#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3516 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3517
3518#define _mm512_cmpnlt_pd_mask(A, B) \
3519 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3520#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3521 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3522
3523#define _mm512_cmpnle_pd_mask(A, B) \
3524 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3525#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3526 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3527
3528#define _mm512_cmpord_pd_mask(A, B) \
3529 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3530#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3531 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3532
3533/* Conversion */
3534
3535#define _mm512_cvtt_roundps_epu32(A, R) \
3536 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3537 (__v16si)_mm512_undefined_epi32(), \
3538 (__mmask16)-1, (int)(R)))
3539
3540#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3541 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3542 (__v16si)(__m512i)(W), \
3543 (__mmask16)(U), (int)(R)))
3544
3545#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3546 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3547 (__v16si)_mm512_setzero_si512(), \
3548 (__mmask16)(U), (int)(R)))
3549
3550
3551static __inline __m512i __DEFAULT_FN_ATTRS512
3553{
3554 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3555 (__v16si)
3557 (__mmask16) -1,
3559}
3560
3561static __inline__ __m512i __DEFAULT_FN_ATTRS512
3562_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3563{
3564 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3565 (__v16si) __W,
3566 (__mmask16) __U,
3568}
3569
3570static __inline__ __m512i __DEFAULT_FN_ATTRS512
3572{
3573 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3574 (__v16si) _mm512_setzero_si512 (),
3575 (__mmask16) __U,
3577}
3578
3579#define _mm512_cvt_roundepi32_ps(A, R) \
3580 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3581 (__v16sf)_mm512_setzero_ps(), \
3582 (__mmask16)-1, (int)(R)))
3583
3584#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3585 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3586 (__v16sf)(__m512)(W), \
3587 (__mmask16)(U), (int)(R)))
3588
3589#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3590 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3591 (__v16sf)_mm512_setzero_ps(), \
3592 (__mmask16)(U), (int)(R)))
3593
3594#define _mm512_cvt_roundepu32_ps(A, R) \
3595 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3596 (__v16sf)_mm512_setzero_ps(), \
3597 (__mmask16)-1, (int)(R)))
3598
3599#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3600 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3601 (__v16sf)(__m512)(W), \
3602 (__mmask16)(U), (int)(R)))
3603
3604#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3605 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3606 (__v16sf)_mm512_setzero_ps(), \
3607 (__mmask16)(U), (int)(R)))
3608
3609static __inline__ __m512 __DEFAULT_FN_ATTRS512
3611{
3612 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3613}
3614
3615static __inline__ __m512 __DEFAULT_FN_ATTRS512
3616_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3617{
3618 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3619 (__v16sf)_mm512_cvtepu32_ps(__A),
3620 (__v16sf)__W);
3621}
3622
3623static __inline__ __m512 __DEFAULT_FN_ATTRS512
3625{
3626 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3627 (__v16sf)_mm512_cvtepu32_ps(__A),
3628 (__v16sf)_mm512_setzero_ps());
3629}
3630
3631static __inline __m512d __DEFAULT_FN_ATTRS512
3633{
3634 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3635}
3636
3637static __inline__ __m512d __DEFAULT_FN_ATTRS512
3638_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3639{
3640 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3641 (__v8df)_mm512_cvtepi32_pd(__A),
3642 (__v8df)__W);
3643}
3644
3645static __inline__ __m512d __DEFAULT_FN_ATTRS512
3647{
3648 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3649 (__v8df)_mm512_cvtepi32_pd(__A),
3650 (__v8df)_mm512_setzero_pd());
3651}
3652
3653static __inline__ __m512d __DEFAULT_FN_ATTRS512
3655{
3656 return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3657}
3658
3659static __inline__ __m512d __DEFAULT_FN_ATTRS512
3660_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3661{
3662 return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3663}
3664
3665static __inline__ __m512 __DEFAULT_FN_ATTRS512
3667{
3668 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3669}
3670
3671static __inline__ __m512 __DEFAULT_FN_ATTRS512
3672_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3673{
3674 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3675 (__v16sf)_mm512_cvtepi32_ps(__A),
3676 (__v16sf)__W);
3677}
3678
3679static __inline__ __m512 __DEFAULT_FN_ATTRS512
3681{
3682 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3683 (__v16sf)_mm512_cvtepi32_ps(__A),
3684 (__v16sf)_mm512_setzero_ps());
3685}
3686
3687static __inline __m512d __DEFAULT_FN_ATTRS512
3689{
3690 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3691}
3692
3693static __inline__ __m512d __DEFAULT_FN_ATTRS512
3694_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3695{
3696 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3697 (__v8df)_mm512_cvtepu32_pd(__A),
3698 (__v8df)__W);
3699}
3700
3701static __inline__ __m512d __DEFAULT_FN_ATTRS512
3703{
3704 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3705 (__v8df)_mm512_cvtepu32_pd(__A),
3706 (__v8df)_mm512_setzero_pd());
3707}
3708
3709static __inline__ __m512d __DEFAULT_FN_ATTRS512
3711{
3712 return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3713}
3714
3715static __inline__ __m512d __DEFAULT_FN_ATTRS512
3716_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3717{
3718 return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3719}
3720
3721#define _mm512_cvt_roundpd_ps(A, R) \
3722 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3723 (__v8sf)_mm256_setzero_ps(), \
3724 (__mmask8)-1, (int)(R)))
3725
3726#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3727 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3728 (__v8sf)(__m256)(W), (__mmask8)(U), \
3729 (int)(R)))
3730
3731#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3732 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3733 (__v8sf)_mm256_setzero_ps(), \
3734 (__mmask8)(U), (int)(R)))
3735
3736static __inline__ __m256 __DEFAULT_FN_ATTRS512
3737_mm512_cvtpd_ps (__m512d __A)
3738{
3739 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3740 (__v8sf) _mm256_undefined_ps (),
3741 (__mmask8) -1,
3743}
3744
3745static __inline__ __m256 __DEFAULT_FN_ATTRS512
3746_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3747{
3748 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3749 (__v8sf) __W,
3750 (__mmask8) __U,
3752}
3753
3754static __inline__ __m256 __DEFAULT_FN_ATTRS512
3756{
3757 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3758 (__v8sf) _mm256_setzero_ps (),
3759 (__mmask8) __U,
3761}
3762
3763static __inline__ __m512 __DEFAULT_FN_ATTRS512
3765{
3766 return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3767 (__v8sf) _mm256_setzero_ps (),
3768 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3769}
3770
3771static __inline__ __m512 __DEFAULT_FN_ATTRS512
3772_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3773{
3774 return (__m512) __builtin_shufflevector (
3776 __U, __A),
3777 (__v8sf) _mm256_setzero_ps (),
3778 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3779}
3780
3781#define _mm512_cvt_roundps_ph(A, I) \
3782 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3783 (__v16hi)_mm256_undefined_si256(), \
3784 (__mmask16)-1))
3785
3786#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3787 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3788 (__v16hi)(__m256i)(U), \
3789 (__mmask16)(W)))
3790
3791#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3792 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3793 (__v16hi)_mm256_setzero_si256(), \
3794 (__mmask16)(W)))
3795
3796#define _mm512_cvtps_ph _mm512_cvt_roundps_ph
3797#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
3798#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3799
3800#define _mm512_cvt_roundph_ps(A, R) \
3801 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3802 (__v16sf)_mm512_undefined_ps(), \
3803 (__mmask16)-1, (int)(R)))
3804
3805#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3806 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3807 (__v16sf)(__m512)(W), \
3808 (__mmask16)(U), (int)(R)))
3809
3810#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3811 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3812 (__v16sf)_mm512_setzero_ps(), \
3813 (__mmask16)(U), (int)(R)))
3814
3815
3816static __inline __m512 __DEFAULT_FN_ATTRS512
3818{
3819 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3820 (__v16sf)
3822 (__mmask16) -1,
3824}
3825
3826static __inline__ __m512 __DEFAULT_FN_ATTRS512
3827_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3828{
3829 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3830 (__v16sf) __W,
3831 (__mmask16) __U,
3833}
3834
3835static __inline__ __m512 __DEFAULT_FN_ATTRS512
3837{
3838 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3839 (__v16sf) _mm512_setzero_ps (),
3840 (__mmask16) __U,
3842}
3843
3844#define _mm512_cvtt_roundpd_epi32(A, R) \
3845 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3846 (__v8si)_mm256_setzero_si256(), \
3847 (__mmask8)-1, (int)(R)))
3848
3849#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3850 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3851 (__v8si)(__m256i)(W), \
3852 (__mmask8)(U), (int)(R)))
3853
3854#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3855 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3856 (__v8si)_mm256_setzero_si256(), \
3857 (__mmask8)(U), (int)(R)))
3858
3859static __inline __m256i __DEFAULT_FN_ATTRS512
3861{
3862 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3863 (__v8si)_mm256_setzero_si256(),
3864 (__mmask8) -1,
3866}
3867
3868static __inline__ __m256i __DEFAULT_FN_ATTRS512
3869_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3870{
3871 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3872 (__v8si) __W,
3873 (__mmask8) __U,
3875}
3876
3877static __inline__ __m256i __DEFAULT_FN_ATTRS512
3879{
3880 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3881 (__v8si) _mm256_setzero_si256 (),
3882 (__mmask8) __U,
3884}
3885
3886#define _mm512_cvtt_roundps_epi32(A, R) \
3887 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3888 (__v16si)_mm512_setzero_si512(), \
3889 (__mmask16)-1, (int)(R)))
3890
3891#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3892 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3893 (__v16si)(__m512i)(W), \
3894 (__mmask16)(U), (int)(R)))
3895
3896#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3897 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3898 (__v16si)_mm512_setzero_si512(), \
3899 (__mmask16)(U), (int)(R)))
3900
3901static __inline __m512i __DEFAULT_FN_ATTRS512
3903{
3904 return (__m512i)
3905 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3906 (__v16si) _mm512_setzero_si512 (),
3908}
3909
3910static __inline__ __m512i __DEFAULT_FN_ATTRS512
3911_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3912{
3913 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3914 (__v16si) __W,
3915 (__mmask16) __U,
3917}
3918
3919static __inline__ __m512i __DEFAULT_FN_ATTRS512
3921{
3922 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3923 (__v16si) _mm512_setzero_si512 (),
3924 (__mmask16) __U,
3926}
3927
3928#define _mm512_cvt_roundps_epi32(A, R) \
3929 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3930 (__v16si)_mm512_setzero_si512(), \
3931 (__mmask16)-1, (int)(R)))
3932
3933#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3934 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3935 (__v16si)(__m512i)(W), \
3936 (__mmask16)(U), (int)(R)))
3937
3938#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3939 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3940 (__v16si)_mm512_setzero_si512(), \
3941 (__mmask16)(U), (int)(R)))
3942
3943static __inline__ __m512i __DEFAULT_FN_ATTRS512
3945{
3946 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3947 (__v16si) _mm512_undefined_epi32 (),
3948 (__mmask16) -1,
3950}
3951
3952static __inline__ __m512i __DEFAULT_FN_ATTRS512
3953_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3954{
3955 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3956 (__v16si) __W,
3957 (__mmask16) __U,
3959}
3960
3961static __inline__ __m512i __DEFAULT_FN_ATTRS512
3963{
3964 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3965 (__v16si)
3967 (__mmask16) __U,
3969}
3970
3971#define _mm512_cvt_roundpd_epi32(A, R) \
3972 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3973 (__v8si)_mm256_setzero_si256(), \
3974 (__mmask8)-1, (int)(R)))
3975
3976#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3977 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3978 (__v8si)(__m256i)(W), \
3979 (__mmask8)(U), (int)(R)))
3980
3981#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3982 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3983 (__v8si)_mm256_setzero_si256(), \
3984 (__mmask8)(U), (int)(R)))
3985
3986static __inline__ __m256i __DEFAULT_FN_ATTRS512
3988{
3989 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3990 (__v8si)
3992 (__mmask8) -1,
3994}
3995
3996static __inline__ __m256i __DEFAULT_FN_ATTRS512
3997_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3998{
3999 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4000 (__v8si) __W,
4001 (__mmask8) __U,
4003}
4004
4005static __inline__ __m256i __DEFAULT_FN_ATTRS512
4007{
4008 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4009 (__v8si)
4011 (__mmask8) __U,
4013}
4014
4015#define _mm512_cvt_roundps_epu32(A, R) \
4016 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4017 (__v16si)_mm512_setzero_si512(), \
4018 (__mmask16)-1, (int)(R)))
4019
4020#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
4021 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4022 (__v16si)(__m512i)(W), \
4023 (__mmask16)(U), (int)(R)))
4024
4025#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
4026 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4027 (__v16si)_mm512_setzero_si512(), \
4028 (__mmask16)(U), (int)(R)))
4029
4030static __inline__ __m512i __DEFAULT_FN_ATTRS512
4032{
4033 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4034 (__v16si)\
4036 (__mmask16) -1,\
4038}
4039
4040static __inline__ __m512i __DEFAULT_FN_ATTRS512
4041_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4042{
4043 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4044 (__v16si) __W,
4045 (__mmask16) __U,
4047}
4048
4049static __inline__ __m512i __DEFAULT_FN_ATTRS512
4051{
4052 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4053 (__v16si)
4055 (__mmask16) __U ,
4057}
4058
4059#define _mm512_cvt_roundpd_epu32(A, R) \
4060 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4061 (__v8si)_mm256_setzero_si256(), \
4062 (__mmask8)-1, (int)(R)))
4063
4064#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
4065 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4066 (__v8si)(__m256i)(W), \
4067 (__mmask8)(U), (int)(R)))
4068
4069#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
4070 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4071 (__v8si)_mm256_setzero_si256(), \
4072 (__mmask8)(U), (int)(R)))
4073
4074static __inline__ __m256i __DEFAULT_FN_ATTRS512
4076{
4077 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4078 (__v8si)
4080 (__mmask8) -1,
4082}
4083
4084static __inline__ __m256i __DEFAULT_FN_ATTRS512
4085_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4086{
4087 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4088 (__v8si) __W,
4089 (__mmask8) __U,
4091}
4092
4093static __inline__ __m256i __DEFAULT_FN_ATTRS512
4095{
4096 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4097 (__v8si)
4099 (__mmask8) __U,
4101}
4102
4103static __inline__ double __DEFAULT_FN_ATTRS512
4105{
4106 return __a[0];
4107}
4108
4109static __inline__ float __DEFAULT_FN_ATTRS512
4111{
4112 return __a[0];
4113}
4114
4115/* Unpack and Interleave */
4116
4117static __inline __m512d __DEFAULT_FN_ATTRS512
4118_mm512_unpackhi_pd(__m512d __a, __m512d __b)
4119{
4120 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4121 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4122}
4123
4124static __inline__ __m512d __DEFAULT_FN_ATTRS512
4125_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4126{
4127 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4128 (__v8df)_mm512_unpackhi_pd(__A, __B),
4129 (__v8df)__W);
4130}
4131
4132static __inline__ __m512d __DEFAULT_FN_ATTRS512
4133_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4134{
4135 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4136 (__v8df)_mm512_unpackhi_pd(__A, __B),
4137 (__v8df)_mm512_setzero_pd());
4138}
4139
4140static __inline __m512d __DEFAULT_FN_ATTRS512
4141_mm512_unpacklo_pd(__m512d __a, __m512d __b)
4142{
4143 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4144 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4145}
4146
4147static __inline__ __m512d __DEFAULT_FN_ATTRS512
4148_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4149{
4150 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4151 (__v8df)_mm512_unpacklo_pd(__A, __B),
4152 (__v8df)__W);
4153}
4154
4155static __inline__ __m512d __DEFAULT_FN_ATTRS512
4156_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4157{
4158 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4159 (__v8df)_mm512_unpacklo_pd(__A, __B),
4160 (__v8df)_mm512_setzero_pd());
4161}
4162
4163static __inline __m512 __DEFAULT_FN_ATTRS512
4165{
4166 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4167 2, 18, 3, 19,
4168 2+4, 18+4, 3+4, 19+4,
4169 2+8, 18+8, 3+8, 19+8,
4170 2+12, 18+12, 3+12, 19+12);
4171}
4172
4173static __inline__ __m512 __DEFAULT_FN_ATTRS512
4174_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4175{
4176 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4177 (__v16sf)_mm512_unpackhi_ps(__A, __B),
4178 (__v16sf)__W);
4179}
4180
4181static __inline__ __m512 __DEFAULT_FN_ATTRS512
4182_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4183{
4184 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4185 (__v16sf)_mm512_unpackhi_ps(__A, __B),
4186 (__v16sf)_mm512_setzero_ps());
4187}
4188
4189static __inline __m512 __DEFAULT_FN_ATTRS512
4191{
4192 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4193 0, 16, 1, 17,
4194 0+4, 16+4, 1+4, 17+4,
4195 0+8, 16+8, 1+8, 17+8,
4196 0+12, 16+12, 1+12, 17+12);
4197}
4198
4199static __inline__ __m512 __DEFAULT_FN_ATTRS512
4200_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4201{
4202 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4203 (__v16sf)_mm512_unpacklo_ps(__A, __B),
4204 (__v16sf)__W);
4205}
4206
4207static __inline__ __m512 __DEFAULT_FN_ATTRS512
4208_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4209{
4210 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4211 (__v16sf)_mm512_unpacklo_ps(__A, __B),
4212 (__v16sf)_mm512_setzero_ps());
4213}
4214
4215static __inline__ __m512i __DEFAULT_FN_ATTRS512
4216_mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4217{
4218 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4219 2, 18, 3, 19,
4220 2+4, 18+4, 3+4, 19+4,
4221 2+8, 18+8, 3+8, 19+8,
4222 2+12, 18+12, 3+12, 19+12);
4223}
4224
4225static __inline__ __m512i __DEFAULT_FN_ATTRS512
4226_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4227{
4228 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4229 (__v16si)_mm512_unpackhi_epi32(__A, __B),
4230 (__v16si)__W);
4231}
4232
4233static __inline__ __m512i __DEFAULT_FN_ATTRS512
4234_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4235{
4236 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4237 (__v16si)_mm512_unpackhi_epi32(__A, __B),
4238 (__v16si)_mm512_setzero_si512());
4239}
4240
4241static __inline__ __m512i __DEFAULT_FN_ATTRS512
4242_mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4243{
4244 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4245 0, 16, 1, 17,
4246 0+4, 16+4, 1+4, 17+4,
4247 0+8, 16+8, 1+8, 17+8,
4248 0+12, 16+12, 1+12, 17+12);
4249}
4250
4251static __inline__ __m512i __DEFAULT_FN_ATTRS512
4252_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4253{
4254 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4255 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4256 (__v16si)__W);
4257}
4258
4259static __inline__ __m512i __DEFAULT_FN_ATTRS512
4260_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4261{
4262 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4263 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4264 (__v16si)_mm512_setzero_si512());
4265}
4266
4267static __inline__ __m512i __DEFAULT_FN_ATTRS512
4268_mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4269{
4270 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4271 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4272}
4273
4274static __inline__ __m512i __DEFAULT_FN_ATTRS512
4275_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4276{
4277 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4278 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4279 (__v8di)__W);
4280}
4281
4282static __inline__ __m512i __DEFAULT_FN_ATTRS512
4283_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4284{
4285 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4286 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4287 (__v8di)_mm512_setzero_si512());
4288}
4289
4290static __inline__ __m512i __DEFAULT_FN_ATTRS512
4291_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4292{
4293 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4294 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4295}
4296
4297static __inline__ __m512i __DEFAULT_FN_ATTRS512
4298_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4299{
4300 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4301 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4302 (__v8di)__W);
4303}
4304
4305static __inline__ __m512i __DEFAULT_FN_ATTRS512
4306_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4307{
4308 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4309 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4310 (__v8di)_mm512_setzero_si512());
4311}
4312
4313
4314/* SIMD load ops */
4315
4316static __inline __m512i __DEFAULT_FN_ATTRS512
4318{
4319 struct __loadu_si512 {
4320 __m512i_u __v;
4321 } __attribute__((__packed__, __may_alias__));
4322 return ((const struct __loadu_si512*)__P)->__v;
4323}
4324
4325static __inline __m512i __DEFAULT_FN_ATTRS512
4327{
4328 struct __loadu_epi32 {
4329 __m512i_u __v;
4330 } __attribute__((__packed__, __may_alias__));
4331 return ((const struct __loadu_epi32*)__P)->__v;
4332}
4333
4334static __inline __m512i __DEFAULT_FN_ATTRS512
4335_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4336{
4337 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4338 (__v16si) __W,
4339 (__mmask16) __U);
4340}
4341
4342
4343static __inline __m512i __DEFAULT_FN_ATTRS512
4345{
4346 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4347 (__v16si)
4349 (__mmask16) __U);
4350}
4351
4352static __inline __m512i __DEFAULT_FN_ATTRS512
4354{
4355 struct __loadu_epi64 {
4356 __m512i_u __v;
4357 } __attribute__((__packed__, __may_alias__));
4358 return ((const struct __loadu_epi64*)__P)->__v;
4359}
4360
4361static __inline __m512i __DEFAULT_FN_ATTRS512
4362_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4363{
4364 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4365 (__v8di) __W,
4366 (__mmask8) __U);
4367}
4368
4369static __inline __m512i __DEFAULT_FN_ATTRS512
4371{
4372 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4373 (__v8di)
4375 (__mmask8) __U);
4376}
4377
4378static __inline __m512 __DEFAULT_FN_ATTRS512
4379_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4380{
4381 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4382 (__v16sf) __W,
4383 (__mmask16) __U);
4384}
4385
4386static __inline __m512 __DEFAULT_FN_ATTRS512
4388{
4389 return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4390 (__v16sf)
4392 (__mmask16) __U);
4393}
4394
4395static __inline __m512d __DEFAULT_FN_ATTRS512
4396_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4397{
4398 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4399 (__v8df) __W,
4400 (__mmask8) __U);
4401}
4402
4403static __inline __m512d __DEFAULT_FN_ATTRS512
4405{
4406 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4407 (__v8df)
4409 (__mmask8) __U);
4410}
4411
4412static __inline __m512d __DEFAULT_FN_ATTRS512
4414{
4415 struct __loadu_pd {
4416 __m512d_u __v;
4417 } __attribute__((__packed__, __may_alias__));
4418 return ((const struct __loadu_pd*)__p)->__v;
4419}
4420
4421static __inline __m512 __DEFAULT_FN_ATTRS512
4423{
4424 struct __loadu_ps {
4425 __m512_u __v;
4426 } __attribute__((__packed__, __may_alias__));
4427 return ((const struct __loadu_ps*)__p)->__v;
4428}
4429
4430static __inline __m512 __DEFAULT_FN_ATTRS512
4432{
4433 return *(const __m512*)__p;
4434}
4435
4436static __inline __m512 __DEFAULT_FN_ATTRS512
4437_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4438{
4439 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4440 (__v16sf) __W,
4441 (__mmask16) __U);
4442}
4443
4444static __inline __m512 __DEFAULT_FN_ATTRS512
4446{
4447 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4448 (__v16sf)
4450 (__mmask16) __U);
4451}
4452
4453static __inline __m512d __DEFAULT_FN_ATTRS512
4455{
4456 return *(const __m512d*)__p;
4457}
4458
4459static __inline __m512d __DEFAULT_FN_ATTRS512
4460_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4461{
4462 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4463 (__v8df) __W,
4464 (__mmask8) __U);
4465}
4466
4467static __inline __m512d __DEFAULT_FN_ATTRS512
4469{
4470 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4471 (__v8df)
4473 (__mmask8) __U);
4474}
4475
4476static __inline __m512i __DEFAULT_FN_ATTRS512
4478{
4479 return *(const __m512i *) __P;
4480}
4481
4482static __inline __m512i __DEFAULT_FN_ATTRS512
4484{
4485 return *(const __m512i *) __P;
4486}
4487
4488static __inline __m512i __DEFAULT_FN_ATTRS512
4490{
4491 return *(const __m512i *) __P;
4492}
4493
4494/* SIMD store ops */
4495
4496static __inline void __DEFAULT_FN_ATTRS512
4497_mm512_storeu_epi64 (void *__P, __m512i __A)
4498{
4499 struct __storeu_epi64 {
4500 __m512i_u __v;
4501 } __attribute__((__packed__, __may_alias__));
4502 ((struct __storeu_epi64*)__P)->__v = __A;
4503}
4504
4505static __inline void __DEFAULT_FN_ATTRS512
4506_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4507{
4508 __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4509 (__mmask8) __U);
4510}
4511
4512static __inline void __DEFAULT_FN_ATTRS512
4513_mm512_storeu_si512 (void *__P, __m512i __A)
4514{
4515 struct __storeu_si512 {
4516 __m512i_u __v;
4517 } __attribute__((__packed__, __may_alias__));
4518 ((struct __storeu_si512*)__P)->__v = __A;
4519}
4520
4521static __inline void __DEFAULT_FN_ATTRS512
4522_mm512_storeu_epi32 (void *__P, __m512i __A)
4523{
4524 struct __storeu_epi32 {
4525 __m512i_u __v;
4526 } __attribute__((__packed__, __may_alias__));
4527 ((struct __storeu_epi32*)__P)->__v = __A;
4528}
4529
4530static __inline void __DEFAULT_FN_ATTRS512
4532{
4533 __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4534 (__mmask16) __U);
4535}
4536
4537static __inline void __DEFAULT_FN_ATTRS512
4538_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4539{
4540 __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4541}
4542
4543static __inline void __DEFAULT_FN_ATTRS512
4544_mm512_storeu_pd(void *__P, __m512d __A)
4545{
4546 struct __storeu_pd {
4547 __m512d_u __v;
4548 } __attribute__((__packed__, __may_alias__));
4549 ((struct __storeu_pd*)__P)->__v = __A;
4550}
4551
4552static __inline void __DEFAULT_FN_ATTRS512
4553_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4554{
4555 __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4556 (__mmask16) __U);
4557}
4558
4559static __inline void __DEFAULT_FN_ATTRS512
4560_mm512_storeu_ps(void *__P, __m512 __A)
4561{
4562 struct __storeu_ps {
4563 __m512_u __v;
4564 } __attribute__((__packed__, __may_alias__));
4565 ((struct __storeu_ps*)__P)->__v = __A;
4566}
4567
4568static __inline void __DEFAULT_FN_ATTRS512
4569_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4570{
4571 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4572}
4573
4574static __inline void __DEFAULT_FN_ATTRS512
4575_mm512_store_pd(void *__P, __m512d __A)
4576{
4577 *(__m512d*)__P = __A;
4578}
4579
4580static __inline void __DEFAULT_FN_ATTRS512
4581_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4582{
4583 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4584 (__mmask16) __U);
4585}
4586
4587static __inline void __DEFAULT_FN_ATTRS512
4588_mm512_store_ps(void *__P, __m512 __A)
4589{
4590 *(__m512*)__P = __A;
4591}
4592
4593static __inline void __DEFAULT_FN_ATTRS512
4594_mm512_store_si512 (void *__P, __m512i __A)
4595{
4596 *(__m512i *) __P = __A;
4597}
4598
4599static __inline void __DEFAULT_FN_ATTRS512
4600_mm512_store_epi32 (void *__P, __m512i __A)
4601{
4602 *(__m512i *) __P = __A;
4603}
4604
4605static __inline void __DEFAULT_FN_ATTRS512
4606_mm512_store_epi64 (void *__P, __m512i __A)
4607{
4608 *(__m512i *) __P = __A;
4609}
4610
4611/* Mask ops */
4612
4613static __inline __mmask16 __DEFAULT_FN_ATTRS
4615{
4616 return __builtin_ia32_knothi(__M);
4617}
4618
4619/* Integer compare */
4620
4621#define _mm512_cmpeq_epi32_mask(A, B) \
4622 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4623#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4624 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4625#define _mm512_cmpge_epi32_mask(A, B) \
4626 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4627#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4628 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4629#define _mm512_cmpgt_epi32_mask(A, B) \
4630 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4631#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4632 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4633#define _mm512_cmple_epi32_mask(A, B) \
4634 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4635#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4636 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4637#define _mm512_cmplt_epi32_mask(A, B) \
4638 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4639#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4640 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4641#define _mm512_cmpneq_epi32_mask(A, B) \
4642 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4643#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4644 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4645
4646#define _mm512_cmpeq_epu32_mask(A, B) \
4647 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4648#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4649 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4650#define _mm512_cmpge_epu32_mask(A, B) \
4651 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4652#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4653 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4654#define _mm512_cmpgt_epu32_mask(A, B) \
4655 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4656#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4657 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4658#define _mm512_cmple_epu32_mask(A, B) \
4659 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4660#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4661 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4662#define _mm512_cmplt_epu32_mask(A, B) \
4663 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4664#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4665 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4666#define _mm512_cmpneq_epu32_mask(A, B) \
4667 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4668#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4669 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4670
4671#define _mm512_cmpeq_epi64_mask(A, B) \
4672 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4673#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4674 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4675#define _mm512_cmpge_epi64_mask(A, B) \
4676 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4677#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4678 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4679#define _mm512_cmpgt_epi64_mask(A, B) \
4680 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4681#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4682 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4683#define _mm512_cmple_epi64_mask(A, B) \
4684 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4685#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4686 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4687#define _mm512_cmplt_epi64_mask(A, B) \
4688 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4689#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4690 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4691#define _mm512_cmpneq_epi64_mask(A, B) \
4692 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4693#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4694 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4695
4696#define _mm512_cmpeq_epu64_mask(A, B) \
4697 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4698#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4699 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4700#define _mm512_cmpge_epu64_mask(A, B) \
4701 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4702#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4703 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4704#define _mm512_cmpgt_epu64_mask(A, B) \
4705 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4706#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4707 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4708#define _mm512_cmple_epu64_mask(A, B) \
4709 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4710#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4711 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4712#define _mm512_cmplt_epu64_mask(A, B) \
4713 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4714#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4715 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4716#define _mm512_cmpneq_epu64_mask(A, B) \
4717 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4718#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4719 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4720
4721static __inline__ __m512i __DEFAULT_FN_ATTRS512
4723{
4724 /* This function always performs a signed extension, but __v16qi is a char
4725 which may be signed or unsigned, so use __v16qs. */
4726 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4727}
4728
4729static __inline__ __m512i __DEFAULT_FN_ATTRS512
4730_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4731{
4732 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4733 (__v16si)_mm512_cvtepi8_epi32(__A),
4734 (__v16si)__W);
4735}
4736
4737static __inline__ __m512i __DEFAULT_FN_ATTRS512
4739{
4740 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4741 (__v16si)_mm512_cvtepi8_epi32(__A),
4742 (__v16si)_mm512_setzero_si512());
4743}
4744
4745static __inline__ __m512i __DEFAULT_FN_ATTRS512
4747{
4748 /* This function always performs a signed extension, but __v16qi is a char
4749 which may be signed or unsigned, so use __v16qs. */
4750 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4751}
4752
4753static __inline__ __m512i __DEFAULT_FN_ATTRS512
4754_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4755{
4756 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4757 (__v8di)_mm512_cvtepi8_epi64(__A),
4758 (__v8di)__W);
4759}
4760
4761static __inline__ __m512i __DEFAULT_FN_ATTRS512
4763{
4764 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4765 (__v8di)_mm512_cvtepi8_epi64(__A),
4766 (__v8di)_mm512_setzero_si512 ());
4767}
4768
4769static __inline__ __m512i __DEFAULT_FN_ATTRS512
4771{
4772 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4773}
4774
4775static __inline__ __m512i __DEFAULT_FN_ATTRS512
4776_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4777{
4778 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4779 (__v8di)_mm512_cvtepi32_epi64(__X),
4780 (__v8di)__W);
4781}
4782
4783static __inline__ __m512i __DEFAULT_FN_ATTRS512
4785{
4786 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4787 (__v8di)_mm512_cvtepi32_epi64(__X),
4788 (__v8di)_mm512_setzero_si512());
4789}
4790
4791static __inline__ __m512i __DEFAULT_FN_ATTRS512
4793{
4794 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4795}
4796
4797static __inline__ __m512i __DEFAULT_FN_ATTRS512
4798_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4799{
4800 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4801 (__v16si)_mm512_cvtepi16_epi32(__A),
4802 (__v16si)__W);
4803}
4804
4805static __inline__ __m512i __DEFAULT_FN_ATTRS512
4807{
4808 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4809 (__v16si)_mm512_cvtepi16_epi32(__A),
4810 (__v16si)_mm512_setzero_si512 ());
4811}
4812
4813static __inline__ __m512i __DEFAULT_FN_ATTRS512
4815{
4816 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4817}
4818
4819static __inline__ __m512i __DEFAULT_FN_ATTRS512
4820_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4821{
4822 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4823 (__v8di)_mm512_cvtepi16_epi64(__A),
4824 (__v8di)__W);
4825}
4826
4827static __inline__ __m512i __DEFAULT_FN_ATTRS512
4829{
4830 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4831 (__v8di)_mm512_cvtepi16_epi64(__A),
4832 (__v8di)_mm512_setzero_si512());
4833}
4834
4835static __inline__ __m512i __DEFAULT_FN_ATTRS512
4837{
4838 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4839}
4840
4841static __inline__ __m512i __DEFAULT_FN_ATTRS512
4842_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4843{
4844 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4845 (__v16si)_mm512_cvtepu8_epi32(__A),
4846 (__v16si)__W);
4847}
4848
4849static __inline__ __m512i __DEFAULT_FN_ATTRS512
4851{
4852 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4853 (__v16si)_mm512_cvtepu8_epi32(__A),
4854 (__v16si)_mm512_setzero_si512());
4855}
4856
4857static __inline__ __m512i __DEFAULT_FN_ATTRS512
4859{
4860 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4861}
4862
4863static __inline__ __m512i __DEFAULT_FN_ATTRS512
4864_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4865{
4866 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4867 (__v8di)_mm512_cvtepu8_epi64(__A),
4868 (__v8di)__W);
4869}
4870
4871static __inline__ __m512i __DEFAULT_FN_ATTRS512
4873{
4874 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4875 (__v8di)_mm512_cvtepu8_epi64(__A),
4876 (__v8di)_mm512_setzero_si512());
4877}
4878
4879static __inline__ __m512i __DEFAULT_FN_ATTRS512
4881{
4882 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4883}
4884
4885static __inline__ __m512i __DEFAULT_FN_ATTRS512
4886_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4887{
4888 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4889 (__v8di)_mm512_cvtepu32_epi64(__X),
4890 (__v8di)__W);
4891}
4892
4893static __inline__ __m512i __DEFAULT_FN_ATTRS512
4895{
4896 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4897 (__v8di)_mm512_cvtepu32_epi64(__X),
4898 (__v8di)_mm512_setzero_si512());
4899}
4900
4901static __inline__ __m512i __DEFAULT_FN_ATTRS512
4903{
4904 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4905}
4906
4907static __inline__ __m512i __DEFAULT_FN_ATTRS512
4908_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4909{
4910 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4911 (__v16si)_mm512_cvtepu16_epi32(__A),
4912 (__v16si)__W);
4913}
4914
4915static __inline__ __m512i __DEFAULT_FN_ATTRS512
4917{
4918 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4919 (__v16si)_mm512_cvtepu16_epi32(__A),
4920 (__v16si)_mm512_setzero_si512());
4921}
4922
4923static __inline__ __m512i __DEFAULT_FN_ATTRS512
4925{
4926 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4927}
4928
4929static __inline__ __m512i __DEFAULT_FN_ATTRS512
4930_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4931{
4932 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4933 (__v8di)_mm512_cvtepu16_epi64(__A),
4934 (__v8di)__W);
4935}
4936
4937static __inline__ __m512i __DEFAULT_FN_ATTRS512
4939{
4940 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4941 (__v8di)_mm512_cvtepu16_epi64(__A),
4942 (__v8di)_mm512_setzero_si512());
4943}
4944
4945static __inline__ __m512i __DEFAULT_FN_ATTRS512
4946_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4947{
4948 return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
4949}
4950
4951static __inline__ __m512i __DEFAULT_FN_ATTRS512
4952_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4953{
4954 return (__m512i)__builtin_ia32_selectd_512(__U,
4955 (__v16si)_mm512_rorv_epi32(__A, __B),
4956 (__v16si)__W);
4957}
4958
4959static __inline__ __m512i __DEFAULT_FN_ATTRS512
4960_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4961{
4962 return (__m512i)__builtin_ia32_selectd_512(__U,
4963 (__v16si)_mm512_rorv_epi32(__A, __B),
4964 (__v16si)_mm512_setzero_si512());
4965}
4966
4967static __inline__ __m512i __DEFAULT_FN_ATTRS512
4968_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4969{
4970 return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
4971}
4972
4973static __inline__ __m512i __DEFAULT_FN_ATTRS512
4974_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4975{
4976 return (__m512i)__builtin_ia32_selectq_512(__U,
4977 (__v8di)_mm512_rorv_epi64(__A, __B),
4978 (__v8di)__W);
4979}
4980
4981static __inline__ __m512i __DEFAULT_FN_ATTRS512
4982_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4983{
4984 return (__m512i)__builtin_ia32_selectq_512(__U,
4985 (__v8di)_mm512_rorv_epi64(__A, __B),
4986 (__v8di)_mm512_setzero_si512());
4987}
4988
4989
4990
4991#define _mm512_cmp_epi32_mask(a, b, p) \
4992 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4993 (__v16si)(__m512i)(b), (int)(p), \
4994 (__mmask16)-1))
4995
4996#define _mm512_cmp_epu32_mask(a, b, p) \
4997 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4998 (__v16si)(__m512i)(b), (int)(p), \
4999 (__mmask16)-1))
5000
5001#define _mm512_cmp_epi64_mask(a, b, p) \
5002 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5003 (__v8di)(__m512i)(b), (int)(p), \
5004 (__mmask8)-1))
5005
5006#define _mm512_cmp_epu64_mask(a, b, p) \
5007 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5008 (__v8di)(__m512i)(b), (int)(p), \
5009 (__mmask8)-1))
5010
5011#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
5012 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5013 (__v16si)(__m512i)(b), (int)(p), \
5014 (__mmask16)(m)))
5015
5016#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
5017 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5018 (__v16si)(__m512i)(b), (int)(p), \
5019 (__mmask16)(m)))
5020
5021#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
5022 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5023 (__v8di)(__m512i)(b), (int)(p), \
5024 (__mmask8)(m)))
5025
5026#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
5027 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5028 (__v8di)(__m512i)(b), (int)(p), \
5029 (__mmask8)(m)))
5030
5031#define _mm512_rol_epi32(a, b) \
5032 ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)))
5033
5034#define _mm512_mask_rol_epi32(W, U, a, b) \
5035 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5036 (__v16si)_mm512_rol_epi32((a), (b)), \
5037 (__v16si)(__m512i)(W)))
5038
5039#define _mm512_maskz_rol_epi32(U, a, b) \
5040 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5041 (__v16si)_mm512_rol_epi32((a), (b)), \
5042 (__v16si)_mm512_setzero_si512()))
5043
5044#define _mm512_rol_epi64(a, b) \
5045 ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)))
5046
5047#define _mm512_mask_rol_epi64(W, U, a, b) \
5048 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5049 (__v8di)_mm512_rol_epi64((a), (b)), \
5050 (__v8di)(__m512i)(W)))
5051
5052#define _mm512_maskz_rol_epi64(U, a, b) \
5053 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5054 (__v8di)_mm512_rol_epi64((a), (b)), \
5055 (__v8di)_mm512_setzero_si512()))
5056
5057static __inline__ __m512i __DEFAULT_FN_ATTRS512
5058_mm512_rolv_epi32 (__m512i __A, __m512i __B)
5059{
5060 return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
5061}
5062
5063static __inline__ __m512i __DEFAULT_FN_ATTRS512
5064_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5065{
5066 return (__m512i)__builtin_ia32_selectd_512(__U,
5067 (__v16si)_mm512_rolv_epi32(__A, __B),
5068 (__v16si)__W);
5069}
5070
5071static __inline__ __m512i __DEFAULT_FN_ATTRS512
5072_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5073{
5074 return (__m512i)__builtin_ia32_selectd_512(__U,
5075 (__v16si)_mm512_rolv_epi32(__A, __B),
5076 (__v16si)_mm512_setzero_si512());
5077}
5078
5079static __inline__ __m512i __DEFAULT_FN_ATTRS512
5080_mm512_rolv_epi64 (__m512i __A, __m512i __B)
5081{
5082 return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
5083}
5084
5085static __inline__ __m512i __DEFAULT_FN_ATTRS512
5086_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5087{
5088 return (__m512i)__builtin_ia32_selectq_512(__U,
5089 (__v8di)_mm512_rolv_epi64(__A, __B),
5090 (__v8di)__W);
5091}
5092
5093static __inline__ __m512i __DEFAULT_FN_ATTRS512
5094_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5095{
5096 return (__m512i)__builtin_ia32_selectq_512(__U,
5097</