clang 22.0.0git
avx512fintrin.h
Go to the documentation of this file.
1/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
11#endif
12
13#ifndef __AVX512FINTRIN_H
14#define __AVX512FINTRIN_H
15
16typedef char __v64qi __attribute__((__vector_size__(64)));
17typedef short __v32hi __attribute__((__vector_size__(64)));
18typedef double __v8df __attribute__((__vector_size__(64)));
19typedef float __v16sf __attribute__((__vector_size__(64)));
20typedef long long __v8di __attribute__((__vector_size__(64)));
21typedef int __v16si __attribute__((__vector_size__(64)));
22
23/* Unsigned types */
24typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
25typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
26typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
27typedef unsigned int __v16su __attribute__((__vector_size__(64)));
28
29/* We need an explicitly signed variant for char. Note that this shouldn't
30 * appear in the interface though. */
31typedef signed char __v64qs __attribute__((__vector_size__(64)));
32
33typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
34typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
35typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
36
37typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
38typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
39typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
40
41typedef unsigned char __mmask8;
42typedef unsigned short __mmask16;
43
44/* Rounding mode macros. */
45#define _MM_FROUND_TO_NEAREST_INT 0x00
46#define _MM_FROUND_TO_NEG_INF 0x01
47#define _MM_FROUND_TO_POS_INF 0x02
48#define _MM_FROUND_TO_ZERO 0x03
49#define _MM_FROUND_CUR_DIRECTION 0x04
50
51/* Constants for integer comparison predicates */
52typedef enum {
53 _MM_CMPINT_EQ, /* Equal */
54 _MM_CMPINT_LT, /* Less than */
55 _MM_CMPINT_LE, /* Less than or Equal */
57 _MM_CMPINT_NE, /* Not Equal */
58 _MM_CMPINT_NLT, /* Not Less than */
59#define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */
60 _MM_CMPINT_NLE /* Not Less than or Equal */
61#define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */
63
64typedef enum
65{
153
154typedef enum
155{
156 _MM_MANT_NORM_1_2, /* interval [1, 2) */
157 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
158 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
159 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
161
162typedef enum
163{
164 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
165 _MM_MANT_SIGN_zero, /* sign = 0 */
166 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
168
169/* Define the default attributes for the functions in this file. */
170#define __DEFAULT_FN_ATTRS512 \
171 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \
172 __min_vector_width__(512)))
173#define __DEFAULT_FN_ATTRS128 \
174 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \
175 __min_vector_width__(128)))
176#define __DEFAULT_FN_ATTRS \
177 __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
178
179#if defined(__cplusplus) && (__cplusplus >= 201103L)
180#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
181#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
182#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
183#else
184#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
185#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
186#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
187#endif
188
189/* Create vectors with repeated elements */
190
191static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
193 return __extension__(__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0};
194}
195
196#define _mm512_setzero_epi32 _mm512_setzero_si512
197
198static __inline__ __m512d __DEFAULT_FN_ATTRS512
200{
201 return (__m512d)__builtin_ia32_undef512();
202}
203
204static __inline__ __m512 __DEFAULT_FN_ATTRS512
206{
207 return (__m512)__builtin_ia32_undef512();
208}
209
210static __inline__ __m512 __DEFAULT_FN_ATTRS512
212{
213 return (__m512)__builtin_ia32_undef512();
214}
215
216static __inline__ __m512i __DEFAULT_FN_ATTRS512
218{
219 return (__m512i)__builtin_ia32_undef512();
220}
221
222static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
224 return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
225 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
226}
227
228static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
229_mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A) {
230 return (__m512i)__builtin_ia32_selectd_512(__M,
231 (__v16si) _mm512_broadcastd_epi32(__A),
232 (__v16si) __O);
233}
234
235static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
237 return (__m512i)__builtin_ia32_selectd_512(__M,
238 (__v16si) _mm512_broadcastd_epi32(__A),
239 (__v16si) _mm512_setzero_si512());
240}
241
242static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
244 return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
245 0, 0, 0, 0, 0, 0, 0, 0);
246}
247
248static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
249_mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A) {
250 return (__m512i)__builtin_ia32_selectq_512(
251 __M, (__v8di)_mm512_broadcastq_epi64(__A), (__v8di)__O);
252}
253
254static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
256 return (__m512i)__builtin_ia32_selectq_512(__M,
257 (__v8di) _mm512_broadcastq_epi64(__A),
258 (__v8di) _mm512_setzero_si512());
259}
260
262 return __extension__(__m512){0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
263 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
264}
265
266#define _mm512_setzero _mm512_setzero_ps
267
268static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
270 return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
271}
272
273static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
275{
276 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
277 __w, __w, __w, __w, __w, __w, __w, __w };
278}
279
280static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
281_mm512_set1_pd(double __w)
282{
283 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
284}
285
286static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
288{
289 return __extension__ (__m512i)(__v64qi){
290 __w, __w, __w, __w, __w, __w, __w, __w,
291 __w, __w, __w, __w, __w, __w, __w, __w,
292 __w, __w, __w, __w, __w, __w, __w, __w,
293 __w, __w, __w, __w, __w, __w, __w, __w,
294 __w, __w, __w, __w, __w, __w, __w, __w,
295 __w, __w, __w, __w, __w, __w, __w, __w,
296 __w, __w, __w, __w, __w, __w, __w, __w,
297 __w, __w, __w, __w, __w, __w, __w, __w };
298}
299
300static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
302{
303 return __extension__ (__m512i)(__v32hi){
304 __w, __w, __w, __w, __w, __w, __w, __w,
305 __w, __w, __w, __w, __w, __w, __w, __w,
306 __w, __w, __w, __w, __w, __w, __w, __w,
307 __w, __w, __w, __w, __w, __w, __w, __w };
308}
309
310static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
312{
313 return __extension__ (__m512i)(__v16si){
314 __s, __s, __s, __s, __s, __s, __s, __s,
315 __s, __s, __s, __s, __s, __s, __s, __s };
316}
317
318static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
320 return (__m512i)__builtin_ia32_selectd_512(__M,
321 (__v16si)_mm512_set1_epi32(__A),
322 (__v16si)_mm512_setzero_si512());
323}
324
325static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
326_mm512_set1_epi64(long long __d)
327{
328 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
329}
330
331static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
333 return (__m512i)__builtin_ia32_selectq_512(__M,
334 (__v8di)_mm512_set1_epi64(__A),
335 (__v8di)_mm512_setzero_si512());
336}
337
338static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
340 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
341 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
342}
343
344static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
345_mm512_set4_epi32(int __A, int __B, int __C, int __D) {
346 return __extension__ (__m512i)(__v16si)
347 { __D, __C, __B, __A, __D, __C, __B, __A,
348 __D, __C, __B, __A, __D, __C, __B, __A };
349}
350
351static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
352_mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D) {
353 return __extension__ (__m512i) (__v8di)
354 { __D, __C, __B, __A, __D, __C, __B, __A };
355}
356
357static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
358_mm512_set4_pd(double __A, double __B, double __C, double __D) {
359 return __extension__ (__m512d)
360 { __D, __C, __B, __A, __D, __C, __B, __A };
361}
362
363static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
364_mm512_set4_ps(float __A, float __B, float __C, float __D) {
365 return __extension__ (__m512)
366 { __D, __C, __B, __A, __D, __C, __B, __A,
367 __D, __C, __B, __A, __D, __C, __B, __A };
368}
369
370static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
371_mm512_setr4_epi32(int e0, int e1, int e2, int e3) {
372 return _mm512_set4_epi32(e3, e2, e1, e0);
373}
374
375static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
376_mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3) {
377 return _mm512_set4_epi64(e3, e2, e1, e0);
378}
379
380static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
381_mm512_setr4_pd(double e0, double e1, double e2, double e3) {
382 return _mm512_set4_pd(e3, e2, e1, e0);
383}
384
385static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
386_mm512_setr4_ps(float e0, float e1, float e2, float e3) {
387 return _mm512_set4_ps(e3, e2, e1, e0);
388}
389
390static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
392 return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
393 0, 0, 0, 0, 0, 0, 0, 0);
394}
395
396/* Cast between vector types */
397
398static __inline __m512d __DEFAULT_FN_ATTRS512
400{
401 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
402 1, 2, 3, 4, 5, 6, 7);
403}
404
405static __inline __m512 __DEFAULT_FN_ATTRS512
407{
408 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
409 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
410}
411
412static __inline __m128d __DEFAULT_FN_ATTRS512
414{
415 return __builtin_shufflevector(__a, __a, 0, 1);
416}
417
418static __inline __m256d __DEFAULT_FN_ATTRS512
420{
421 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
422}
423
424static __inline __m128 __DEFAULT_FN_ATTRS512
426{
427 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
428}
429
430static __inline __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
432 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
433}
434
435static __inline __m512 __DEFAULT_FN_ATTRS512
436_mm512_castpd_ps (__m512d __A)
437{
438 return (__m512) (__A);
439}
440
441static __inline __m512i __DEFAULT_FN_ATTRS512
443{
444 return (__m512i) (__A);
445}
446
447static __inline__ __m512d __DEFAULT_FN_ATTRS512
449{
450 __m256d __B = __builtin_nondeterministic_value(__B);
451 return __builtin_shufflevector(
452 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
453 __B, 0, 1, 2, 3, 4, 5, 6, 7);
454}
455
456static __inline __m512d __DEFAULT_FN_ATTRS512
458{
459 return (__m512d) (__A);
460}
461
462static __inline __m512i __DEFAULT_FN_ATTRS512
464{
465 return (__m512i) (__A);
466}
467
468static __inline__ __m512 __DEFAULT_FN_ATTRS512
470{
471 __m256 __B = __builtin_nondeterministic_value(__B);
472 return __builtin_shufflevector(
473 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7),
474 __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
475}
476
477static __inline__ __m512i __DEFAULT_FN_ATTRS512
479{
480 __m256i __B = __builtin_nondeterministic_value(__B);
481 return __builtin_shufflevector(
482 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
483 __B, 0, 1, 2, 3, 4, 5, 6, 7);
484}
485
486static __inline__ __m512i __DEFAULT_FN_ATTRS512
488{
489 return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7);
490}
491
492static __inline __m512 __DEFAULT_FN_ATTRS512
494{
495 return (__m512) (__A);
496}
497
498static __inline __m512d __DEFAULT_FN_ATTRS512
500{
501 return (__m512d) (__A);
502}
503
504static __inline __m128i __DEFAULT_FN_ATTRS512
506{
507 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
508}
509
510static __inline __m256i __DEFAULT_FN_ATTRS512_CONSTEXPR
512 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
513}
514
517 return (__mmask16)__a;
518}
519
520static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
522 return (int)__a;
523}
524
525/// Constructs a 512-bit floating-point vector of [8 x double] from a
526/// 128-bit floating-point vector of [2 x double]. The lower 128 bits
527/// contain the value of the source vector. The upper 384 bits are set
528/// to zero.
529///
530/// \headerfile <x86intrin.h>
531///
532/// This intrinsic has no corresponding instruction.
533///
534/// \param __a
535/// A 128-bit vector of [2 x double].
536/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
537/// contain the value of the parameter. The upper 384 bits are set to zero.
538static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
540 return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
541}
542
543/// Constructs a 512-bit floating-point vector of [8 x double] from a
544/// 256-bit floating-point vector of [4 x double]. The lower 256 bits
545/// contain the value of the source vector. The upper 256 bits are set
546/// to zero.
547///
548/// \headerfile <x86intrin.h>
549///
550/// This intrinsic has no corresponding instruction.
551///
552/// \param __a
553/// A 256-bit vector of [4 x double].
554/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
555/// contain the value of the parameter. The upper 256 bits are set to zero.
556static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
558 return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
559}
560
561/// Constructs a 512-bit floating-point vector of [16 x float] from a
562/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
563/// the value of the source vector. The upper 384 bits are set to zero.
564///
565/// \headerfile <x86intrin.h>
566///
567/// This intrinsic has no corresponding instruction.
568///
569/// \param __a
570/// A 128-bit vector of [4 x float].
571/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
572/// contain the value of the parameter. The upper 384 bits are set to zero.
573static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
575 return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
576}
577
578/// Constructs a 512-bit floating-point vector of [16 x float] from a
579/// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain
580/// the value of the source vector. The upper 256 bits are set to zero.
581///
582/// \headerfile <x86intrin.h>
583///
584/// This intrinsic has no corresponding instruction.
585///
586/// \param __a
587/// A 256-bit vector of [8 x float].
588/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
589/// contain the value of the parameter. The upper 256 bits are set to zero.
590static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
592 return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
593}
594
595/// Constructs a 512-bit integer vector from a 128-bit integer vector.
596/// The lower 128 bits contain the value of the source vector. The upper
597/// 384 bits are set to zero.
598///
599/// \headerfile <x86intrin.h>
600///
601/// This intrinsic has no corresponding instruction.
602///
603/// \param __a
604/// A 128-bit integer vector.
605/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
606/// the parameter. The upper 384 bits are set to zero.
607static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
609 return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
610}
611
612/// Constructs a 512-bit integer vector from a 256-bit integer vector.
613/// The lower 256 bits contain the value of the source vector. The upper
614/// 256 bits are set to zero.
615///
616/// \headerfile <x86intrin.h>
617///
618/// This intrinsic has no corresponding instruction.
619///
620/// \param __a
621/// A 256-bit integer vector.
622/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
623/// the parameter. The upper 256 bits are set to zero.
624static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
626 return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
627}
628
629/* Bitwise operators */
630static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
631_mm512_and_epi32(__m512i __a, __m512i __b)
632{
633 return (__m512i)((__v16su)__a & (__v16su)__b);
634}
635
636static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
637_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) {
638 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
639 (__v16si) _mm512_and_epi32(__a, __b),
640 (__v16si) __src);
641}
642
643static __inline__ __m512i __DEFAULT_FN_ATTRS512
645{
647 __k, __a, __b);
648}
649
650static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
651_mm512_and_epi64(__m512i __a, __m512i __b)
652{
653 return (__m512i)((__v8du)__a & (__v8du)__b);
654}
655
656static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
657_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) {
658 return (__m512i)__builtin_ia32_selectq_512(
659 (__mmask8)__k, (__v8di)_mm512_and_epi64(__a, __b), (__v8di)__src);
660}
661
662static __inline__ __m512i __DEFAULT_FN_ATTRS512
664{
666 __k, __a, __b);
667}
668
669static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
670_mm512_andnot_si512 (__m512i __A, __m512i __B)
671{
672 return (__m512i)(~(__v8du)__A & (__v8du)__B);
673}
674
675static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
676_mm512_andnot_epi32 (__m512i __A, __m512i __B)
677{
678 return (__m512i)(~(__v16su)__A & (__v16su)__B);
679}
680
681static __inline__ __m512i __DEFAULT_FN_ATTRS512
682_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
683{
684 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
685 (__v16si)_mm512_andnot_epi32(__A, __B),
686 (__v16si)__W);
687}
688
689static __inline__ __m512i __DEFAULT_FN_ATTRS512
690_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
691{
693 __U, __A, __B);
694}
695
696static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
697_mm512_andnot_epi64(__m512i __A, __m512i __B)
698{
699 return (__m512i)(~(__v8du)__A & (__v8du)__B);
700}
701
702static __inline__ __m512i __DEFAULT_FN_ATTRS512
703_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
704{
705 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
706 (__v8di)_mm512_andnot_epi64(__A, __B),
707 (__v8di)__W);
708}
709
710static __inline__ __m512i __DEFAULT_FN_ATTRS512
711_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
712{
714 __U, __A, __B);
715}
716
717static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
718_mm512_or_epi32(__m512i __a, __m512i __b)
719{
720 return (__m512i)((__v16su)__a | (__v16su)__b);
721}
722
723static __inline__ __m512i __DEFAULT_FN_ATTRS512
724_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
725{
726 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
727 (__v16si)_mm512_or_epi32(__a, __b),
728 (__v16si)__src);
729}
730
731static __inline__ __m512i __DEFAULT_FN_ATTRS512
733{
734 return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
735}
736
737static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
738_mm512_or_epi64(__m512i __a, __m512i __b)
739{
740 return (__m512i)((__v8du)__a | (__v8du)__b);
741}
742
743static __inline__ __m512i __DEFAULT_FN_ATTRS512
744_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
745{
746 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
747 (__v8di)_mm512_or_epi64(__a, __b),
748 (__v8di)__src);
749}
750
751static __inline__ __m512i __DEFAULT_FN_ATTRS512
752_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
753{
754 return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
755}
756
757static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
758_mm512_xor_epi32(__m512i __a, __m512i __b)
759{
760 return (__m512i)((__v16su)__a ^ (__v16su)__b);
761}
762
763static __inline__ __m512i __DEFAULT_FN_ATTRS512
764_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
765{
766 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
767 (__v16si)_mm512_xor_epi32(__a, __b),
768 (__v16si)__src);
769}
770
771static __inline__ __m512i __DEFAULT_FN_ATTRS512
773{
774 return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
775}
776
777static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
778_mm512_xor_epi64(__m512i __a, __m512i __b)
779{
780 return (__m512i)((__v8du)__a ^ (__v8du)__b);
781}
782
783static __inline__ __m512i __DEFAULT_FN_ATTRS512
784_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
785{
786 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
787 (__v8di)_mm512_xor_epi64(__a, __b),
788 (__v8di)__src);
789}
790
791static __inline__ __m512i __DEFAULT_FN_ATTRS512
793{
794 return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
795}
796
797static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
798_mm512_and_si512(__m512i __a, __m512i __b)
799{
800 return (__m512i)((__v8du)__a & (__v8du)__b);
801}
802
803static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
804_mm512_or_si512(__m512i __a, __m512i __b)
805{
806 return (__m512i)((__v8du)__a | (__v8du)__b);
807}
808
809static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
810_mm512_xor_si512(__m512i __a, __m512i __b)
811{
812 return (__m512i)((__v8du)__a ^ (__v8du)__b);
813}
814
815/* Arithmetic */
816
817static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
818_mm512_add_pd(__m512d __a, __m512d __b) {
819 return (__m512d)((__v8df)__a + (__v8df)__b);
820}
821
822static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
823_mm512_add_ps(__m512 __a, __m512 __b) {
824 return (__m512)((__v16sf)__a + (__v16sf)__b);
825}
826
827static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
828_mm512_mul_pd(__m512d __a, __m512d __b) {
829 return (__m512d)((__v8df)__a * (__v8df)__b);
830}
831
832static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
833_mm512_mul_ps(__m512 __a, __m512 __b) {
834 return (__m512)((__v16sf)__a * (__v16sf)__b);
835}
836
837static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
838_mm512_sub_pd(__m512d __a, __m512d __b) {
839 return (__m512d)((__v8df)__a - (__v8df)__b);
840}
841
842static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
843_mm512_sub_ps(__m512 __a, __m512 __b) {
844 return (__m512)((__v16sf)__a - (__v16sf)__b);
845}
846
847static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
848_mm512_add_epi64(__m512i __A, __m512i __B) {
849 return (__m512i) ((__v8du) __A + (__v8du) __B);
850}
851
852static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
853_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
854 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
855 (__v8di)_mm512_add_epi64(__A, __B),
856 (__v8di)__W);
857}
858
859static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
860_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
861 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
862 (__v8di)_mm512_add_epi64(__A, __B),
863 (__v8di)_mm512_setzero_si512());
864}
865
866static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
867_mm512_sub_epi64(__m512i __A, __m512i __B) {
868 return (__m512i) ((__v8du) __A - (__v8du) __B);
869}
870
871static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
872_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
873 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
874 (__v8di)_mm512_sub_epi64(__A, __B),
875 (__v8di)__W);
876}
877
878static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
879_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
880 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
881 (__v8di)_mm512_sub_epi64(__A, __B),
882 (__v8di)_mm512_setzero_si512());
883}
884
885static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
886_mm512_add_epi32(__m512i __A, __m512i __B) {
887 return (__m512i) ((__v16su) __A + (__v16su) __B);
888}
889
890static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
891_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
892 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
893 (__v16si)_mm512_add_epi32(__A, __B),
894 (__v16si)__W);
895}
896
897static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
898_mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
899 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
900 (__v16si)_mm512_add_epi32(__A, __B),
901 (__v16si)_mm512_setzero_si512());
902}
903
904static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
905_mm512_sub_epi32(__m512i __A, __m512i __B) {
906 return (__m512i) ((__v16su) __A - (__v16su) __B);
907}
908
909static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
910_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
911 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
912 (__v16si)_mm512_sub_epi32(__A, __B),
913 (__v16si)__W);
914}
915
916static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
917_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
918 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
919 (__v16si)_mm512_sub_epi32(__A, __B),
920 (__v16si)_mm512_setzero_si512());
921}
922
923#define _mm512_max_round_pd(A, B, R) \
924 ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
925 (__v8df)(__m512d)(B), (int)(R)))
926
927#define _mm512_mask_max_round_pd(W, U, A, B, R) \
928 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
929 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
930 (__v8df)(W)))
931
932#define _mm512_maskz_max_round_pd(U, A, B, R) \
933 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
934 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
935 (__v8df)_mm512_setzero_pd()))
936
937static __inline__ __m512d __DEFAULT_FN_ATTRS512
938_mm512_max_pd(__m512d __A, __m512d __B)
939{
940 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
942}
943
944static __inline__ __m512d __DEFAULT_FN_ATTRS512
945_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
946{
947 return (__m512d)__builtin_ia32_selectpd_512(__U,
948 (__v8df)_mm512_max_pd(__A, __B),
949 (__v8df)__W);
950}
951
952static __inline__ __m512d __DEFAULT_FN_ATTRS512
953_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
954{
955 return (__m512d)__builtin_ia32_selectpd_512(__U,
956 (__v8df)_mm512_max_pd(__A, __B),
957 (__v8df)_mm512_setzero_pd());
958}
959
960#define _mm512_max_round_ps(A, B, R) \
961 ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
962 (__v16sf)(__m512)(B), (int)(R)))
963
964#define _mm512_mask_max_round_ps(W, U, A, B, R) \
965 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
966 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
967 (__v16sf)(W)))
968
969#define _mm512_maskz_max_round_ps(U, A, B, R) \
970 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
971 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
972 (__v16sf)_mm512_setzero_ps()))
973
974static __inline__ __m512 __DEFAULT_FN_ATTRS512
975_mm512_max_ps(__m512 __A, __m512 __B)
976{
977 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
979}
980
981static __inline__ __m512 __DEFAULT_FN_ATTRS512
982_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
983{
984 return (__m512)__builtin_ia32_selectps_512(__U,
985 (__v16sf)_mm512_max_ps(__A, __B),
986 (__v16sf)__W);
987}
988
989static __inline__ __m512 __DEFAULT_FN_ATTRS512
990_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
991{
992 return (__m512)__builtin_ia32_selectps_512(__U,
993 (__v16sf)_mm512_max_ps(__A, __B),
994 (__v16sf)_mm512_setzero_ps());
995}
996
997static __inline__ __m128 __DEFAULT_FN_ATTRS128
998_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
999 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1000 (__v4sf) __B,
1001 (__v4sf) __W,
1002 (__mmask8) __U,
1004}
1005
1006static __inline__ __m128 __DEFAULT_FN_ATTRS128
1007_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1008 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1009 (__v4sf) __B,
1010 (__v4sf) _mm_setzero_ps (),
1011 (__mmask8) __U,
1013}
1014
1015#define _mm_max_round_ss(A, B, R) \
1016 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1017 (__v4sf)(__m128)(B), \
1018 (__v4sf)_mm_setzero_ps(), \
1019 (__mmask8)-1, (int)(R)))
1020
1021#define _mm_mask_max_round_ss(W, U, A, B, R) \
1022 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1023 (__v4sf)(__m128)(B), \
1024 (__v4sf)(__m128)(W), (__mmask8)(U), \
1025 (int)(R)))
1026
1027#define _mm_maskz_max_round_ss(U, A, B, R) \
1028 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1029 (__v4sf)(__m128)(B), \
1030 (__v4sf)_mm_setzero_ps(), \
1031 (__mmask8)(U), (int)(R)))
1032
1033static __inline__ __m128d __DEFAULT_FN_ATTRS128
1034_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1035 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1036 (__v2df) __B,
1037 (__v2df) __W,
1038 (__mmask8) __U,
1040}
1041
1042static __inline__ __m128d __DEFAULT_FN_ATTRS128
1043_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1044 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1045 (__v2df) __B,
1046 (__v2df) _mm_setzero_pd (),
1047 (__mmask8) __U,
1049}
1050
1051#define _mm_max_round_sd(A, B, R) \
1052 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1053 (__v2df)(__m128d)(B), \
1054 (__v2df)_mm_setzero_pd(), \
1055 (__mmask8)-1, (int)(R)))
1056
1057#define _mm_mask_max_round_sd(W, U, A, B, R) \
1058 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1059 (__v2df)(__m128d)(B), \
1060 (__v2df)(__m128d)(W), \
1061 (__mmask8)(U), (int)(R)))
1062
1063#define _mm_maskz_max_round_sd(U, A, B, R) \
1064 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1065 (__v2df)(__m128d)(B), \
1066 (__v2df)_mm_setzero_pd(), \
1067 (__mmask8)(U), (int)(R)))
1068
1069static __inline __m512i
1071 return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
1072}
1073
1074static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1075_mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1076 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1077 (__v16si)_mm512_max_epi32(__A, __B),
1078 (__v16si)__W);
1079}
1080
1081static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1082_mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1083 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1084 (__v16si)_mm512_max_epi32(__A, __B),
1085 (__v16si)_mm512_setzero_si512());
1086}
1087
1088static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1089_mm512_max_epu32(__m512i __A, __m512i __B) {
1090 return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
1091}
1092
1093static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1094_mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1095 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1096 (__v16si)_mm512_max_epu32(__A, __B),
1097 (__v16si)__W);
1098}
1099
1100static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1101_mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
1102 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1103 (__v16si)_mm512_max_epu32(__A, __B),
1104 (__v16si)_mm512_setzero_si512());
1105}
1106
1107static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1108_mm512_max_epi64(__m512i __A, __m512i __B) {
1109 return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
1110}
1111
1112static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1113_mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1114 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1115 (__v8di)_mm512_max_epi64(__A, __B),
1116 (__v8di)__W);
1117}
1118
1119static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1120_mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
1121 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1122 (__v8di)_mm512_max_epi64(__A, __B),
1123 (__v8di)_mm512_setzero_si512());
1124}
1125
1126static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1127_mm512_max_epu64(__m512i __A, __m512i __B) {
1128 return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
1129}
1130
1131static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1132_mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1133 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1134 (__v8di)_mm512_max_epu64(__A, __B),
1135 (__v8di)__W);
1136}
1137
1138static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1139_mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
1140 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1141 (__v8di)_mm512_max_epu64(__A, __B),
1142 (__v8di)_mm512_setzero_si512());
1143}
1144
1145#define _mm512_min_round_pd(A, B, R) \
1146 ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1147 (__v8df)(__m512d)(B), (int)(R)))
1148
1149#define _mm512_mask_min_round_pd(W, U, A, B, R) \
1150 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1151 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1152 (__v8df)(W)))
1153
1154#define _mm512_maskz_min_round_pd(U, A, B, R) \
1155 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1156 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1157 (__v8df)_mm512_setzero_pd()))
1158
1159static __inline__ __m512d __DEFAULT_FN_ATTRS512
1160_mm512_min_pd(__m512d __A, __m512d __B)
1161{
1162 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1164}
1165
1166static __inline__ __m512d __DEFAULT_FN_ATTRS512
1167_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1168{
1169 return (__m512d)__builtin_ia32_selectpd_512(__U,
1170 (__v8df)_mm512_min_pd(__A, __B),
1171 (__v8df)__W);
1172}
1173
1174static __inline__ __m512d __DEFAULT_FN_ATTRS512
1175_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1176{
1177 return (__m512d)__builtin_ia32_selectpd_512(__U,
1178 (__v8df)_mm512_min_pd(__A, __B),
1179 (__v8df)_mm512_setzero_pd());
1180}
1181
1182#define _mm512_min_round_ps(A, B, R) \
1183 ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1184 (__v16sf)(__m512)(B), (int)(R)))
1185
1186#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1187 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1188 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1189 (__v16sf)(W)))
1190
1191#define _mm512_maskz_min_round_ps(U, A, B, R) \
1192 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1193 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1194 (__v16sf)_mm512_setzero_ps()))
1195
1196static __inline__ __m512 __DEFAULT_FN_ATTRS512
1197_mm512_min_ps(__m512 __A, __m512 __B)
1198{
1199 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1201}
1202
1203static __inline__ __m512 __DEFAULT_FN_ATTRS512
1204_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1205{
1206 return (__m512)__builtin_ia32_selectps_512(__U,
1207 (__v16sf)_mm512_min_ps(__A, __B),
1208 (__v16sf)__W);
1209}
1210
1211static __inline__ __m512 __DEFAULT_FN_ATTRS512
1212_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1213{
1214 return (__m512)__builtin_ia32_selectps_512(__U,
1215 (__v16sf)_mm512_min_ps(__A, __B),
1216 (__v16sf)_mm512_setzero_ps());
1217}
1218
1219static __inline__ __m128 __DEFAULT_FN_ATTRS128
1220_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1221 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1222 (__v4sf) __B,
1223 (__v4sf) __W,
1224 (__mmask8) __U,
1226}
1227
1228static __inline__ __m128 __DEFAULT_FN_ATTRS128
1229_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1230 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1231 (__v4sf) __B,
1232 (__v4sf) _mm_setzero_ps (),
1233 (__mmask8) __U,
1235}
1236
1237#define _mm_min_round_ss(A, B, R) \
1238 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1239 (__v4sf)(__m128)(B), \
1240 (__v4sf)_mm_setzero_ps(), \
1241 (__mmask8)-1, (int)(R)))
1242
1243#define _mm_mask_min_round_ss(W, U, A, B, R) \
1244 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1245 (__v4sf)(__m128)(B), \
1246 (__v4sf)(__m128)(W), (__mmask8)(U), \
1247 (int)(R)))
1248
1249#define _mm_maskz_min_round_ss(U, A, B, R) \
1250 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1251 (__v4sf)(__m128)(B), \
1252 (__v4sf)_mm_setzero_ps(), \
1253 (__mmask8)(U), (int)(R)))
1254
1255static __inline__ __m128d __DEFAULT_FN_ATTRS128
1256_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1257 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1258 (__v2df) __B,
1259 (__v2df) __W,
1260 (__mmask8) __U,
1262}
1263
1264static __inline__ __m128d __DEFAULT_FN_ATTRS128
1265_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1266 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1267 (__v2df) __B,
1268 (__v2df) _mm_setzero_pd (),
1269 (__mmask8) __U,
1271}
1272
1273#define _mm_min_round_sd(A, B, R) \
1274 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1275 (__v2df)(__m128d)(B), \
1276 (__v2df)_mm_setzero_pd(), \
1277 (__mmask8)-1, (int)(R)))
1278
1279#define _mm_mask_min_round_sd(W, U, A, B, R) \
1280 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1281 (__v2df)(__m128d)(B), \
1282 (__v2df)(__m128d)(W), \
1283 (__mmask8)(U), (int)(R)))
1284
1285#define _mm_maskz_min_round_sd(U, A, B, R) \
1286 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1287 (__v2df)(__m128d)(B), \
1288 (__v2df)_mm_setzero_pd(), \
1289 (__mmask8)(U), (int)(R)))
1290
1291static __inline __m512i
1293 return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
1294}
1295
1296static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1297_mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1298 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1299 (__v16si)_mm512_min_epi32(__A, __B),
1300 (__v16si)__W);
1301}
1302
1303static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1304_mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1305 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1306 (__v16si)_mm512_min_epi32(__A, __B),
1307 (__v16si)_mm512_setzero_si512());
1308}
1309
1310static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1311_mm512_min_epu32(__m512i __A, __m512i __B) {
1312 return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
1313}
1314
1315static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1316_mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1317 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1318 (__v16si)_mm512_min_epu32(__A, __B),
1319 (__v16si)__W);
1320}
1321
1322static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1323_mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
1324 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1325 (__v16si)_mm512_min_epu32(__A, __B),
1326 (__v16si)_mm512_setzero_si512());
1327}
1328
1329static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1330_mm512_min_epi64(__m512i __A, __m512i __B) {
1331 return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
1332}
1333
1334static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1335_mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1336 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1337 (__v8di)_mm512_min_epi64(__A, __B),
1338 (__v8di)__W);
1339}
1340
1341static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1342_mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
1343 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1344 (__v8di)_mm512_min_epi64(__A, __B),
1345 (__v8di)_mm512_setzero_si512());
1346}
1347
1348static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1349_mm512_min_epu64(__m512i __A, __m512i __B) {
1350 return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
1351}
1352
1353static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1354_mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1355 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1356 (__v8di)_mm512_min_epu64(__A, __B),
1357 (__v8di)__W);
1358}
1359
1360static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1361_mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
1362 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1363 (__v8di)_mm512_min_epu64(__A, __B),
1364 (__v8di)_mm512_setzero_si512());
1365}
1366
1367static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1368_mm512_mul_epi32(__m512i __X, __m512i __Y) {
1369 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1370}
1371
1372static __inline __m512i __DEFAULT_FN_ATTRS512
1373_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1374{
1375 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1376 (__v8di)_mm512_mul_epi32(__X, __Y),
1377 (__v8di)__W);
1378}
1379
1380static __inline __m512i __DEFAULT_FN_ATTRS512
1381_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1382{
1383 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1384 (__v8di)_mm512_mul_epi32(__X, __Y),
1385 (__v8di)_mm512_setzero_si512 ());
1386}
1387
1388static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1389_mm512_mul_epu32(__m512i __X, __m512i __Y) {
1390 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1391}
1392
1393static __inline __m512i __DEFAULT_FN_ATTRS512
1394_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1395{
1396 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1397 (__v8di)_mm512_mul_epu32(__X, __Y),
1398 (__v8di)__W);
1399}
1400
1401static __inline __m512i __DEFAULT_FN_ATTRS512
1402_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1403{
1404 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1405 (__v8di)_mm512_mul_epu32(__X, __Y),
1406 (__v8di)_mm512_setzero_si512 ());
1407}
1408
1409static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1410_mm512_mullo_epi32(__m512i __A, __m512i __B) {
1411 return (__m512i) ((__v16su) __A * (__v16su) __B);
1412}
1413
1414static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1415_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1416 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1417 (__v16si)_mm512_mullo_epi32(__A, __B),
1418 (__v16si)_mm512_setzero_si512());
1419}
1420
1421static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1422_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1423 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1424 (__v16si)_mm512_mullo_epi32(__A, __B),
1425 (__v16si)__W);
1426}
1427
1428static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1429_mm512_mullox_epi64(__m512i __A, __m512i __B) {
1430 return (__m512i) ((__v8du) __A * (__v8du) __B);
1431}
1432
1433static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1434_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1435 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1436 (__v8di)_mm512_mullox_epi64(__A, __B),
1437 (__v8di)__W);
1438}
1439
1440#define _mm512_sqrt_round_pd(A, R) \
1441 ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)))
1442
1443#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1444 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1445 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1446 (__v8df)(__m512d)(W)))
1447
1448#define _mm512_maskz_sqrt_round_pd(U, A, R) \
1449 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1450 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1451 (__v8df)_mm512_setzero_pd()))
1452
1453static __inline__ __m512d __DEFAULT_FN_ATTRS512
1454_mm512_sqrt_pd(__m512d __A)
1455{
1456 return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1458}
1459
1460static __inline__ __m512d __DEFAULT_FN_ATTRS512
1461_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1462{
1463 return (__m512d)__builtin_ia32_selectpd_512(__U,
1464 (__v8df)_mm512_sqrt_pd(__A),
1465 (__v8df)__W);
1466}
1467
1468static __inline__ __m512d __DEFAULT_FN_ATTRS512
1470{
1471 return (__m512d)__builtin_ia32_selectpd_512(__U,
1472 (__v8df)_mm512_sqrt_pd(__A),
1473 (__v8df)_mm512_setzero_pd());
1474}
1475
1476#define _mm512_sqrt_round_ps(A, R) \
1477 ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)))
1478
1479#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1480 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1481 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1482 (__v16sf)(__m512)(W)))
1483
1484#define _mm512_maskz_sqrt_round_ps(U, A, R) \
1485 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1486 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1487 (__v16sf)_mm512_setzero_ps()))
1488
1489static __inline__ __m512 __DEFAULT_FN_ATTRS512
1491{
1492 return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1494}
1495
1496static __inline__ __m512 __DEFAULT_FN_ATTRS512
1497_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1498{
1499 return (__m512)__builtin_ia32_selectps_512(__U,
1500 (__v16sf)_mm512_sqrt_ps(__A),
1501 (__v16sf)__W);
1502}
1503
1504static __inline__ __m512 __DEFAULT_FN_ATTRS512
1506{
1507 return (__m512)__builtin_ia32_selectps_512(__U,
1508 (__v16sf)_mm512_sqrt_ps(__A),
1509 (__v16sf)_mm512_setzero_ps());
1510}
1511
1512static __inline__ __m512d __DEFAULT_FN_ATTRS512
1514{
1515 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1516 (__v8df)
1518 (__mmask8) -1);}
1519
1520static __inline__ __m512d __DEFAULT_FN_ATTRS512
1521_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1522{
1523 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1524 (__v8df) __W,
1525 (__mmask8) __U);
1526}
1527
1528static __inline__ __m512d __DEFAULT_FN_ATTRS512
1530{
1531 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1532 (__v8df)
1534 (__mmask8) __U);
1535}
1536
1537static __inline__ __m512 __DEFAULT_FN_ATTRS512
1539{
1540 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1541 (__v16sf)
1543 (__mmask16) -1);
1544}
1545
1546static __inline__ __m512 __DEFAULT_FN_ATTRS512
1547_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1548{
1549 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1550 (__v16sf) __W,
1551 (__mmask16) __U);
1552}
1553
1554static __inline__ __m512 __DEFAULT_FN_ATTRS512
1556{
1557 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1558 (__v16sf)
1560 (__mmask16) __U);
1561}
1562
1563static __inline__ __m128 __DEFAULT_FN_ATTRS128
1564_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1565{
1566 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1567 (__v4sf) __B,
1568 (__v4sf)
1569 _mm_setzero_ps (),
1570 (__mmask8) -1);
1571}
1572
1573static __inline__ __m128 __DEFAULT_FN_ATTRS128
1574_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1575{
1576 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1577 (__v4sf) __B,
1578 (__v4sf) __W,
1579 (__mmask8) __U);
1580}
1581
1582static __inline__ __m128 __DEFAULT_FN_ATTRS128
1583_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1584{
1585 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1586 (__v4sf) __B,
1587 (__v4sf) _mm_setzero_ps (),
1588 (__mmask8) __U);
1589}
1590
1591static __inline__ __m128d __DEFAULT_FN_ATTRS128
1592_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1593{
1594 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1595 (__v2df) __B,
1596 (__v2df)
1597 _mm_setzero_pd (),
1598 (__mmask8) -1);
1599}
1600
1601static __inline__ __m128d __DEFAULT_FN_ATTRS128
1602_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1603{
1604 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1605 (__v2df) __B,
1606 (__v2df) __W,
1607 (__mmask8) __U);
1608}
1609
1610static __inline__ __m128d __DEFAULT_FN_ATTRS128
1611_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1612{
1613 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1614 (__v2df) __B,
1615 (__v2df) _mm_setzero_pd (),
1616 (__mmask8) __U);
1617}
1618
1619static __inline__ __m512d __DEFAULT_FN_ATTRS512
1621{
1622 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1623 (__v8df)
1625 (__mmask8) -1);
1626}
1627
1628static __inline__ __m512d __DEFAULT_FN_ATTRS512
1629_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1630{
1631 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1632 (__v8df) __W,
1633 (__mmask8) __U);
1634}
1635
1636static __inline__ __m512d __DEFAULT_FN_ATTRS512
1638{
1639 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1640 (__v8df)
1642 (__mmask8) __U);
1643}
1644
1645static __inline__ __m512 __DEFAULT_FN_ATTRS512
1647{
1648 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1649 (__v16sf)
1651 (__mmask16) -1);
1652}
1653
1654static __inline__ __m512 __DEFAULT_FN_ATTRS512
1655_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1656{
1657 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1658 (__v16sf) __W,
1659 (__mmask16) __U);
1660}
1661
1662static __inline__ __m512 __DEFAULT_FN_ATTRS512
1664{
1665 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1666 (__v16sf)
1668 (__mmask16) __U);
1669}
1670
1671static __inline__ __m128 __DEFAULT_FN_ATTRS128
1672_mm_rcp14_ss(__m128 __A, __m128 __B)
1673{
1674 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1675 (__v4sf) __B,
1676 (__v4sf)
1677 _mm_setzero_ps (),
1678 (__mmask8) -1);
1679}
1680
1681static __inline__ __m128 __DEFAULT_FN_ATTRS128
1682_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1683{
1684 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1685 (__v4sf) __B,
1686 (__v4sf) __W,
1687 (__mmask8) __U);
1688}
1689
1690static __inline__ __m128 __DEFAULT_FN_ATTRS128
1691_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1692{
1693 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1694 (__v4sf) __B,
1695 (__v4sf) _mm_setzero_ps (),
1696 (__mmask8) __U);
1697}
1698
1699static __inline__ __m128d __DEFAULT_FN_ATTRS128
1700_mm_rcp14_sd(__m128d __A, __m128d __B)
1701{
1702 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1703 (__v2df) __B,
1704 (__v2df)
1705 _mm_setzero_pd (),
1706 (__mmask8) -1);
1707}
1708
1709static __inline__ __m128d __DEFAULT_FN_ATTRS128
1710_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1711{
1712 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1713 (__v2df) __B,
1714 (__v2df) __W,
1715 (__mmask8) __U);
1716}
1717
1718static __inline__ __m128d __DEFAULT_FN_ATTRS128
1719_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1720{
1721 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1722 (__v2df) __B,
1723 (__v2df) _mm_setzero_pd (),
1724 (__mmask8) __U);
1725}
1726
1727static __inline __m512 __DEFAULT_FN_ATTRS512
1729{
1730 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1732 (__v16sf) __A, (unsigned short)-1,
1734}
1735
1736static __inline__ __m512 __DEFAULT_FN_ATTRS512
1737_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1738{
1739 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1741 (__v16sf) __W, __U,
1743}
1744
1745static __inline __m512d __DEFAULT_FN_ATTRS512
1747{
1748 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1750 (__v8df) __A, (unsigned char)-1,
1752}
1753
1754static __inline__ __m512d __DEFAULT_FN_ATTRS512
1755_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1756{
1757 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1759 (__v8df) __W, __U,
1761}
1762
1763static __inline__ __m512 __DEFAULT_FN_ATTRS512
1764_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1765{
1766 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1768 (__v16sf) __W, __U,
1770}
1771
1772static __inline __m512 __DEFAULT_FN_ATTRS512
1774{
1775 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1777 (__v16sf) __A, (unsigned short)-1,
1779}
1780
1781static __inline __m512d __DEFAULT_FN_ATTRS512
1782_mm512_ceil_pd(__m512d __A)
1783{
1784 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1786 (__v8df) __A, (unsigned char)-1,
1788}
1789
1790static __inline__ __m512d __DEFAULT_FN_ATTRS512
1791_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1792{
1793 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1795 (__v8df) __W, __U,
1797}
1798
1799static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1800_mm512_abs_epi64(__m512i __A) {
1801 return (__m512i)__builtin_elementwise_abs((__v8di)__A);
1802}
1803
1804static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1805_mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
1806 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1807 (__v8di)_mm512_abs_epi64(__A),
1808 (__v8di)__W);
1809}
1810
1811static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1813 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1814 (__v8di)_mm512_abs_epi64(__A),
1815 (__v8di)_mm512_setzero_si512());
1816}
1817
1818static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1819_mm512_abs_epi32(__m512i __A) {
1820 return (__m512i)__builtin_elementwise_abs((__v16si) __A);
1821}
1822
1823static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1824_mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
1825 return (__m512i)__builtin_ia32_selectd_512(__U,
1826 (__v16si)_mm512_abs_epi32(__A),
1827 (__v16si)__W);
1828}
1829
1830static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1832 return (__m512i)__builtin_ia32_selectd_512(__U,
1833 (__v16si)_mm512_abs_epi32(__A),
1834 (__v16si)_mm512_setzero_si512());
1835}
1836
1837static __inline__ __m128 __DEFAULT_FN_ATTRS128
1838_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1839 __A = _mm_add_ss(__A, __B);
1840 return __builtin_ia32_selectss_128(__U, __A, __W);
1841}
1842
1843static __inline__ __m128 __DEFAULT_FN_ATTRS128
1844_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1845 __A = _mm_add_ss(__A, __B);
1846 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1847}
1848
1849#define _mm_add_round_ss(A, B, R) \
1850 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1851 (__v4sf)(__m128)(B), \
1852 (__v4sf)_mm_setzero_ps(), \
1853 (__mmask8)-1, (int)(R)))
1854
1855#define _mm_mask_add_round_ss(W, U, A, B, R) \
1856 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1857 (__v4sf)(__m128)(B), \
1858 (__v4sf)(__m128)(W), (__mmask8)(U), \
1859 (int)(R)))
1860
1861#define _mm_maskz_add_round_ss(U, A, B, R) \
1862 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1863 (__v4sf)(__m128)(B), \
1864 (__v4sf)_mm_setzero_ps(), \
1865 (__mmask8)(U), (int)(R)))
1866
1867static __inline__ __m128d __DEFAULT_FN_ATTRS128
1868_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1869 __A = _mm_add_sd(__A, __B);
1870 return __builtin_ia32_selectsd_128(__U, __A, __W);
1871}
1872
1873static __inline__ __m128d __DEFAULT_FN_ATTRS128
1874_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1875 __A = _mm_add_sd(__A, __B);
1876 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1877}
1878#define _mm_add_round_sd(A, B, R) \
1879 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1880 (__v2df)(__m128d)(B), \
1881 (__v2df)_mm_setzero_pd(), \
1882 (__mmask8)-1, (int)(R)))
1883
1884#define _mm_mask_add_round_sd(W, U, A, B, R) \
1885 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1886 (__v2df)(__m128d)(B), \
1887 (__v2df)(__m128d)(W), \
1888 (__mmask8)(U), (int)(R)))
1889
1890#define _mm_maskz_add_round_sd(U, A, B, R) \
1891 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1892 (__v2df)(__m128d)(B), \
1893 (__v2df)_mm_setzero_pd(), \
1894 (__mmask8)(U), (int)(R)))
1895
1896static __inline__ __m512d __DEFAULT_FN_ATTRS512
1897_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1898 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1899 (__v8df)_mm512_add_pd(__A, __B),
1900 (__v8df)__W);
1901}
1902
1903static __inline__ __m512d __DEFAULT_FN_ATTRS512
1904_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1905 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1906 (__v8df)_mm512_add_pd(__A, __B),
1907 (__v8df)_mm512_setzero_pd());
1908}
1909
1910static __inline__ __m512 __DEFAULT_FN_ATTRS512
1911_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1912 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1913 (__v16sf)_mm512_add_ps(__A, __B),
1914 (__v16sf)__W);
1915}
1916
1917static __inline__ __m512 __DEFAULT_FN_ATTRS512
1918_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1919 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1920 (__v16sf)_mm512_add_ps(__A, __B),
1921 (__v16sf)_mm512_setzero_ps());
1922}
1923
1924#define _mm512_add_round_pd(A, B, R) \
1925 ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1926 (__v8df)(__m512d)(B), (int)(R)))
1927
1928#define _mm512_mask_add_round_pd(W, U, A, B, R) \
1929 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1930 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1931 (__v8df)(__m512d)(W)))
1932
1933#define _mm512_maskz_add_round_pd(U, A, B, R) \
1934 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1935 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1936 (__v8df)_mm512_setzero_pd()))
1937
1938#define _mm512_add_round_ps(A, B, R) \
1939 ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
1940 (__v16sf)(__m512)(B), (int)(R)))
1941
1942#define _mm512_mask_add_round_ps(W, U, A, B, R) \
1943 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1944 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1945 (__v16sf)(__m512)(W)))
1946
1947#define _mm512_maskz_add_round_ps(U, A, B, R) \
1948 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1949 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1950 (__v16sf)_mm512_setzero_ps()))
1951
1952static __inline__ __m128 __DEFAULT_FN_ATTRS128
1953_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1954 __A = _mm_sub_ss(__A, __B);
1955 return __builtin_ia32_selectss_128(__U, __A, __W);
1956}
1957
1958static __inline__ __m128 __DEFAULT_FN_ATTRS128
1959_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1960 __A = _mm_sub_ss(__A, __B);
1961 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1962}
1963#define _mm_sub_round_ss(A, B, R) \
1964 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1965 (__v4sf)(__m128)(B), \
1966 (__v4sf)_mm_setzero_ps(), \
1967 (__mmask8)-1, (int)(R)))
1968
1969#define _mm_mask_sub_round_ss(W, U, A, B, R) \
1970 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1971 (__v4sf)(__m128)(B), \
1972 (__v4sf)(__m128)(W), (__mmask8)(U), \
1973 (int)(R)))
1974
1975#define _mm_maskz_sub_round_ss(U, A, B, R) \
1976 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1977 (__v4sf)(__m128)(B), \
1978 (__v4sf)_mm_setzero_ps(), \
1979 (__mmask8)(U), (int)(R)))
1980
1981static __inline__ __m128d __DEFAULT_FN_ATTRS128
1982_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1983 __A = _mm_sub_sd(__A, __B);
1984 return __builtin_ia32_selectsd_128(__U, __A, __W);
1985}
1986
1987static __inline__ __m128d __DEFAULT_FN_ATTRS128
1988_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1989 __A = _mm_sub_sd(__A, __B);
1990 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1991}
1992
1993#define _mm_sub_round_sd(A, B, R) \
1994 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1995 (__v2df)(__m128d)(B), \
1996 (__v2df)_mm_setzero_pd(), \
1997 (__mmask8)-1, (int)(R)))
1998
1999#define _mm_mask_sub_round_sd(W, U, A, B, R) \
2000 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2001 (__v2df)(__m128d)(B), \
2002 (__v2df)(__m128d)(W), \
2003 (__mmask8)(U), (int)(R)))
2004
2005#define _mm_maskz_sub_round_sd(U, A, B, R) \
2006 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2007 (__v2df)(__m128d)(B), \
2008 (__v2df)_mm_setzero_pd(), \
2009 (__mmask8)(U), (int)(R)))
2010
2011static __inline__ __m512d __DEFAULT_FN_ATTRS512
2012_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2013 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2014 (__v8df)_mm512_sub_pd(__A, __B),
2015 (__v8df)__W);
2016}
2017
2018static __inline__ __m512d __DEFAULT_FN_ATTRS512
2019_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2020 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2021 (__v8df)_mm512_sub_pd(__A, __B),
2022 (__v8df)_mm512_setzero_pd());
2023}
2024
2025static __inline__ __m512 __DEFAULT_FN_ATTRS512
2026_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2027 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2028 (__v16sf)_mm512_sub_ps(__A, __B),
2029 (__v16sf)__W);
2030}
2031
2032static __inline__ __m512 __DEFAULT_FN_ATTRS512
2033_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2034 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2035 (__v16sf)_mm512_sub_ps(__A, __B),
2036 (__v16sf)_mm512_setzero_ps());
2037}
2038
2039#define _mm512_sub_round_pd(A, B, R) \
2040 ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2041 (__v8df)(__m512d)(B), (int)(R)))
2042
2043#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2044 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2045 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2046 (__v8df)(__m512d)(W)))
2047
2048#define _mm512_maskz_sub_round_pd(U, A, B, R) \
2049 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2050 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2051 (__v8df)_mm512_setzero_pd()))
2052
2053#define _mm512_sub_round_ps(A, B, R) \
2054 ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2055 (__v16sf)(__m512)(B), (int)(R)))
2056
2057#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2058 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2059 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2060 (__v16sf)(__m512)(W)))
2061
2062#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2063 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2064 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2065 (__v16sf)_mm512_setzero_ps()))
2066
2067static __inline__ __m128 __DEFAULT_FN_ATTRS128
2068_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2069 __A = _mm_mul_ss(__A, __B);
2070 return __builtin_ia32_selectss_128(__U, __A, __W);
2071}
2072
2073static __inline__ __m128 __DEFAULT_FN_ATTRS128
2074_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2075 __A = _mm_mul_ss(__A, __B);
2076 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2077}
2078#define _mm_mul_round_ss(A, B, R) \
2079 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2080 (__v4sf)(__m128)(B), \
2081 (__v4sf)_mm_setzero_ps(), \
2082 (__mmask8)-1, (int)(R)))
2083
2084#define _mm_mask_mul_round_ss(W, U, A, B, R) \
2085 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2086 (__v4sf)(__m128)(B), \
2087 (__v4sf)(__m128)(W), (__mmask8)(U), \
2088 (int)(R)))
2089
2090#define _mm_maskz_mul_round_ss(U, A, B, R) \
2091 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2092 (__v4sf)(__m128)(B), \
2093 (__v4sf)_mm_setzero_ps(), \
2094 (__mmask8)(U), (int)(R)))
2095
2096static __inline__ __m128d __DEFAULT_FN_ATTRS128
2097_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2098 __A = _mm_mul_sd(__A, __B);
2099 return __builtin_ia32_selectsd_128(__U, __A, __W);
2100}
2101
2102static __inline__ __m128d __DEFAULT_FN_ATTRS128
2103_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2104 __A = _mm_mul_sd(__A, __B);
2105 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2106}
2107
2108#define _mm_mul_round_sd(A, B, R) \
2109 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2110 (__v2df)(__m128d)(B), \
2111 (__v2df)_mm_setzero_pd(), \
2112 (__mmask8)-1, (int)(R)))
2113
2114#define _mm_mask_mul_round_sd(W, U, A, B, R) \
2115 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2116 (__v2df)(__m128d)(B), \
2117 (__v2df)(__m128d)(W), \
2118 (__mmask8)(U), (int)(R)))
2119
2120#define _mm_maskz_mul_round_sd(U, A, B, R) \
2121 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2122 (__v2df)(__m128d)(B), \
2123 (__v2df)_mm_setzero_pd(), \
2124 (__mmask8)(U), (int)(R)))
2125
2126static __inline__ __m512d __DEFAULT_FN_ATTRS512
2127_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2128 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2129 (__v8df)_mm512_mul_pd(__A, __B),
2130 (__v8df)__W);
2131}
2132
2133static __inline__ __m512d __DEFAULT_FN_ATTRS512
2134_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2135 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2136 (__v8df)_mm512_mul_pd(__A, __B),
2137 (__v8df)_mm512_setzero_pd());
2138}
2139
2140static __inline__ __m512 __DEFAULT_FN_ATTRS512
2141_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2142 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2143 (__v16sf)_mm512_mul_ps(__A, __B),
2144 (__v16sf)__W);
2145}
2146
2147static __inline__ __m512 __DEFAULT_FN_ATTRS512
2148_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2149 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2150 (__v16sf)_mm512_mul_ps(__A, __B),
2151 (__v16sf)_mm512_setzero_ps());
2152}
2153
2154#define _mm512_mul_round_pd(A, B, R) \
2155 ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2156 (__v8df)(__m512d)(B), (int)(R)))
2157
2158#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2159 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2160 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2161 (__v8df)(__m512d)(W)))
2162
2163#define _mm512_maskz_mul_round_pd(U, A, B, R) \
2164 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2165 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2166 (__v8df)_mm512_setzero_pd()))
2167
2168#define _mm512_mul_round_ps(A, B, R) \
2169 ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2170 (__v16sf)(__m512)(B), (int)(R)))
2171
2172#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2173 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2174 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2175 (__v16sf)(__m512)(W)))
2176
2177#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2178 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2179 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2180 (__v16sf)_mm512_setzero_ps()))
2181
2182static __inline__ __m128 __DEFAULT_FN_ATTRS128
2183_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2184 __A = _mm_div_ss(__A, __B);
2185 return __builtin_ia32_selectss_128(__U, __A, __W);
2186}
2187
2188static __inline__ __m128 __DEFAULT_FN_ATTRS128
2189_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2190 __A = _mm_div_ss(__A, __B);
2191 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2192}
2193
2194#define _mm_div_round_ss(A, B, R) \
2195 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2196 (__v4sf)(__m128)(B), \
2197 (__v4sf)_mm_setzero_ps(), \
2198 (__mmask8)-1, (int)(R)))
2199
2200#define _mm_mask_div_round_ss(W, U, A, B, R) \
2201 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2202 (__v4sf)(__m128)(B), \
2203 (__v4sf)(__m128)(W), (__mmask8)(U), \
2204 (int)(R)))
2205
2206#define _mm_maskz_div_round_ss(U, A, B, R) \
2207 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2208 (__v4sf)(__m128)(B), \
2209 (__v4sf)_mm_setzero_ps(), \
2210 (__mmask8)(U), (int)(R)))
2211
2212static __inline__ __m128d __DEFAULT_FN_ATTRS128
2213_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2214 __A = _mm_div_sd(__A, __B);
2215 return __builtin_ia32_selectsd_128(__U, __A, __W);
2216}
2217
2218static __inline__ __m128d __DEFAULT_FN_ATTRS128
2219_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2220 __A = _mm_div_sd(__A, __B);
2221 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2222}
2223
2224#define _mm_div_round_sd(A, B, R) \
2225 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2226 (__v2df)(__m128d)(B), \
2227 (__v2df)_mm_setzero_pd(), \
2228 (__mmask8)-1, (int)(R)))
2229
2230#define _mm_mask_div_round_sd(W, U, A, B, R) \
2231 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2232 (__v2df)(__m128d)(B), \
2233 (__v2df)(__m128d)(W), \
2234 (__mmask8)(U), (int)(R)))
2235
2236#define _mm_maskz_div_round_sd(U, A, B, R) \
2237 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2238 (__v2df)(__m128d)(B), \
2239 (__v2df)_mm_setzero_pd(), \
2240 (__mmask8)(U), (int)(R)))
2241
2242static __inline __m512d
2244 return (__m512d)((__v8df)__a/(__v8df)__b);
2245}
2246
2247static __inline__ __m512d __DEFAULT_FN_ATTRS512
2248_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2249 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2250 (__v8df)_mm512_div_pd(__A, __B),
2251 (__v8df)__W);
2252}
2253
2254static __inline__ __m512d __DEFAULT_FN_ATTRS512
2255_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2256 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2257 (__v8df)_mm512_div_pd(__A, __B),
2258 (__v8df)_mm512_setzero_pd());
2259}
2260
2261static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2262_mm512_div_ps(__m512 __a, __m512 __b) {
2263 return (__m512)((__v16sf)__a/(__v16sf)__b);
2264}
2265
2266static __inline__ __m512 __DEFAULT_FN_ATTRS512
2267_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2268 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2269 (__v16sf)_mm512_div_ps(__A, __B),
2270 (__v16sf)__W);
2271}
2272
2273static __inline__ __m512 __DEFAULT_FN_ATTRS512
2274_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2275 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2276 (__v16sf)_mm512_div_ps(__A, __B),
2277 (__v16sf)_mm512_setzero_ps());
2278}
2279
2280#define _mm512_div_round_pd(A, B, R) \
2281 ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2282 (__v8df)(__m512d)(B), (int)(R)))
2283
2284#define _mm512_mask_div_round_pd(W, U, A, B, R) \
2285 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2286 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2287 (__v8df)(__m512d)(W)))
2288
2289#define _mm512_maskz_div_round_pd(U, A, B, R) \
2290 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2291 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2292 (__v8df)_mm512_setzero_pd()))
2293
2294#define _mm512_div_round_ps(A, B, R) \
2295 ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2296 (__v16sf)(__m512)(B), (int)(R)))
2297
2298#define _mm512_mask_div_round_ps(W, U, A, B, R) \
2299 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2300 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2301 (__v16sf)(__m512)(W)))
2302
2303#define _mm512_maskz_div_round_ps(U, A, B, R) \
2304 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2305 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2306 (__v16sf)_mm512_setzero_ps()))
2307
2308#define _mm512_roundscale_ps(A, B) \
2309 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2310 (__v16sf)_mm512_undefined_ps(), \
2311 (__mmask16)-1, \
2312 _MM_FROUND_CUR_DIRECTION))
2313
2314#define _mm512_mask_roundscale_ps(A, B, C, imm) \
2315 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2316 (__v16sf)(__m512)(A), (__mmask16)(B), \
2317 _MM_FROUND_CUR_DIRECTION))
2318
2319#define _mm512_maskz_roundscale_ps(A, B, imm) \
2320 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2321 (__v16sf)_mm512_setzero_ps(), \
2322 (__mmask16)(A), \
2323 _MM_FROUND_CUR_DIRECTION))
2324
2325#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2326 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2327 (__v16sf)(__m512)(A), (__mmask16)(B), \
2328 (int)(R)))
2329
2330#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2331 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2332 (__v16sf)_mm512_setzero_ps(), \
2333 (__mmask16)(A), (int)(R)))
2334
2335#define _mm512_roundscale_round_ps(A, imm, R) \
2336 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2337 (__v16sf)_mm512_undefined_ps(), \
2338 (__mmask16)-1, (int)(R)))
2339
2340#define _mm512_roundscale_pd(A, B) \
2341 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2342 (__v8df)_mm512_undefined_pd(), \
2343 (__mmask8)-1, \
2344 _MM_FROUND_CUR_DIRECTION))
2345
2346#define _mm512_mask_roundscale_pd(A, B, C, imm) \
2347 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2348 (__v8df)(__m512d)(A), (__mmask8)(B), \
2349 _MM_FROUND_CUR_DIRECTION))
2350
2351#define _mm512_maskz_roundscale_pd(A, B, imm) \
2352 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2353 (__v8df)_mm512_setzero_pd(), \
2354 (__mmask8)(A), \
2355 _MM_FROUND_CUR_DIRECTION))
2356
2357#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2358 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2359 (__v8df)(__m512d)(A), (__mmask8)(B), \
2360 (int)(R)))
2361
2362#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2363 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2364 (__v8df)_mm512_setzero_pd(), \
2365 (__mmask8)(A), (int)(R)))
2366
2367#define _mm512_roundscale_round_pd(A, imm, R) \
2368 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2369 (__v8df)_mm512_undefined_pd(), \
2370 (__mmask8)-1, (int)(R)))
2371
2372#define _mm512_fmadd_round_pd(A, B, C, R) \
2373 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2374 (__v8df)(__m512d)(B), \
2375 (__v8df)(__m512d)(C), \
2376 (__mmask8)-1, (int)(R)))
2377
2378
2379#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2380 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2381 (__v8df)(__m512d)(B), \
2382 (__v8df)(__m512d)(C), \
2383 (__mmask8)(U), (int)(R)))
2384
2385
2386#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2387 ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2388 (__v8df)(__m512d)(B), \
2389 (__v8df)(__m512d)(C), \
2390 (__mmask8)(U), (int)(R)))
2391
2392
2393#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2394 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2395 (__v8df)(__m512d)(B), \
2396 (__v8df)(__m512d)(C), \
2397 (__mmask8)(U), (int)(R)))
2398
2399
2400#define _mm512_fmsub_round_pd(A, B, C, R) \
2401 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2402 (__v8df)(__m512d)(B), \
2403 -(__v8df)(__m512d)(C), \
2404 (__mmask8)-1, (int)(R)))
2405
2406
2407#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2408 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2409 (__v8df)(__m512d)(B), \
2410 -(__v8df)(__m512d)(C), \
2411 (__mmask8)(U), (int)(R)))
2412
2413
2414#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2415 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2416 (__v8df)(__m512d)(B), \
2417 -(__v8df)(__m512d)(C), \
2418 (__mmask8)(U), (int)(R)))
2419
2420
2421#define _mm512_fnmadd_round_pd(A, B, C, R) \
2422 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2423 (__v8df)(__m512d)(B), \
2424 (__v8df)(__m512d)(C), \
2425 (__mmask8)-1, (int)(R)))
2426
2427
2428#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2429 ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2430 (__v8df)(__m512d)(B), \
2431 (__v8df)(__m512d)(C), \
2432 (__mmask8)(U), (int)(R)))
2433
2434
2435#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2436 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2437 (__v8df)(__m512d)(B), \
2438 (__v8df)(__m512d)(C), \
2439 (__mmask8)(U), (int)(R)))
2440
2441
2442#define _mm512_fnmsub_round_pd(A, B, C, R) \
2443 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2444 (__v8df)(__m512d)(B), \
2445 -(__v8df)(__m512d)(C), \
2446 (__mmask8)-1, (int)(R)))
2447
2448
2449#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2450 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2451 (__v8df)(__m512d)(B), \
2452 -(__v8df)(__m512d)(C), \
2453 (__mmask8)(U), (int)(R)))
2454
2455static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2456_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
2457 return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
2458 (__v8df)__C);
2459}
2460
2461static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2462_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2463 return (__m512d)__builtin_ia32_selectpd_512(
2464 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C), (__v8df)__A);
2465}
2466
2467static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2468_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2469 return (__m512d)__builtin_ia32_selectpd_512(
2470 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C), (__v8df)__C);
2471}
2472
2473static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2474_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2475 return (__m512d)__builtin_ia32_selectpd_512(
2476 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C),
2477 (__v8df)_mm512_setzero_pd());
2478}
2479
2480static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2481_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
2482 return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
2483 -(__v8df)__C);
2484}
2485
2486static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2487_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2488 return (__m512d)__builtin_ia32_selectpd_512(
2489 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C), (__v8df)__A);
2490}
2491
2492static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2493_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2494 return (__m512d)__builtin_ia32_selectpd_512(
2495 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C), (__v8df)__C);
2496}
2497
2498static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2499_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2500 return (__m512d)__builtin_ia32_selectpd_512(
2501 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C),
2502 (__v8df)_mm512_setzero_pd());
2503}
2504
2505static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2506_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
2507 return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
2508 (__v8df)__C);
2509}
2510
2511static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2512_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2513 return (__m512d)__builtin_ia32_selectpd_512(
2514 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C), (__v8df)__A);
2515}
2516
2517static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2518_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2519 return (__m512d)__builtin_ia32_selectpd_512(
2520 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C), (__v8df)__C);
2521}
2522
2523static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2524_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2525 return (__m512d)__builtin_ia32_selectpd_512(
2526 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C),
2527 (__v8df)_mm512_setzero_pd());
2528}
2529
2530static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2531_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
2532 return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
2533 -(__v8df)__C);
2534}
2535
2536static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2537_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2538 return (__m512d)__builtin_ia32_selectpd_512(
2539 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C), (__v8df)__A);
2540}
2541
2542static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2543_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2544 return (__m512d)__builtin_ia32_selectpd_512(
2545 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C), (__v8df)__C);
2546}
2547
2548static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2549_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2550 return (__m512d)__builtin_ia32_selectpd_512(
2551 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C),
2552 (__v8df)_mm512_setzero_pd());
2553}
2554
2555#define _mm512_fmadd_round_ps(A, B, C, R) \
2556 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2557 (__v16sf)(__m512)(B), \
2558 (__v16sf)(__m512)(C), \
2559 (__mmask16)-1, (int)(R)))
2560
2561
2562#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2563 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2564 (__v16sf)(__m512)(B), \
2565 (__v16sf)(__m512)(C), \
2566 (__mmask16)(U), (int)(R)))
2567
2568
2569#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2570 ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2571 (__v16sf)(__m512)(B), \
2572 (__v16sf)(__m512)(C), \
2573 (__mmask16)(U), (int)(R)))
2574
2575
2576#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2577 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2578 (__v16sf)(__m512)(B), \
2579 (__v16sf)(__m512)(C), \
2580 (__mmask16)(U), (int)(R)))
2581
2582
2583#define _mm512_fmsub_round_ps(A, B, C, R) \
2584 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2585 (__v16sf)(__m512)(B), \
2586 -(__v16sf)(__m512)(C), \
2587 (__mmask16)-1, (int)(R)))
2588
2589
2590#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2591 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2592 (__v16sf)(__m512)(B), \
2593 -(__v16sf)(__m512)(C), \
2594 (__mmask16)(U), (int)(R)))
2595
2596
2597#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2598 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2599 (__v16sf)(__m512)(B), \
2600 -(__v16sf)(__m512)(C), \
2601 (__mmask16)(U), (int)(R)))
2602
2603
2604#define _mm512_fnmadd_round_ps(A, B, C, R) \
2605 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2606 -(__v16sf)(__m512)(B), \
2607 (__v16sf)(__m512)(C), \
2608 (__mmask16)-1, (int)(R)))
2609
2610
2611#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2612 ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2613 (__v16sf)(__m512)(B), \
2614 (__v16sf)(__m512)(C), \
2615 (__mmask16)(U), (int)(R)))
2616
2617
2618#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2619 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2620 (__v16sf)(__m512)(B), \
2621 (__v16sf)(__m512)(C), \
2622 (__mmask16)(U), (int)(R)))
2623
2624
2625#define _mm512_fnmsub_round_ps(A, B, C, R) \
2626 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2627 -(__v16sf)(__m512)(B), \
2628 -(__v16sf)(__m512)(C), \
2629 (__mmask16)-1, (int)(R)))
2630
2631
2632#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2633 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2634 (__v16sf)(__m512)(B), \
2635 -(__v16sf)(__m512)(C), \
2636 (__mmask16)(U), (int)(R)))
2637
2638static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2639_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
2640 return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
2641 (__v16sf)__C);
2642}
2643
2644static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2645_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2646 return (__m512)__builtin_ia32_selectps_512(
2647 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C), (__v16sf)__A);
2648}
2649
2650static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2651_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2652 return (__m512)__builtin_ia32_selectps_512(
2653 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C), (__v16sf)__C);
2654}
2655
2656static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2657_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2658 return (__m512)__builtin_ia32_selectps_512(
2659 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C),
2660 (__v16sf)_mm512_setzero_ps());
2661}
2662
2663static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2664_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
2665 return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
2666 -(__v16sf)__C);
2667}
2668
2669static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2670_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2671 return (__m512)__builtin_ia32_selectps_512(
2672 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C), (__v16sf)__A);
2673}
2674
2675static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2676_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2677 return (__m512)__builtin_ia32_selectps_512(
2678 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C), (__v16sf)__C);
2679}
2680
2681static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2682_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2683 return (__m512)__builtin_ia32_selectps_512(
2684 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C),
2685 (__v16sf)_mm512_setzero_ps());
2686}
2687
2688static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2689_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
2690 return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
2691 (__v16sf)__C);
2692}
2693
2694static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2695_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2696 return (__m512)__builtin_ia32_selectps_512(
2697 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__A);
2698}
2699
2700static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2701_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2702 return (__m512)__builtin_ia32_selectps_512(
2703 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__C);
2704}
2705
2706static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2707_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2708 return (__m512)__builtin_ia32_selectps_512(
2709 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C),
2710 (__v16sf)_mm512_setzero_ps());
2711}
2712
2713static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2714_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
2715 return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
2716 -(__v16sf)__C);
2717}
2718
2719static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2720_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2721 return (__m512)__builtin_ia32_selectps_512(
2722 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__A);
2723}
2724
2725static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2726_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2727 return (__m512)__builtin_ia32_selectps_512(
2728 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__C);
2729}
2730
2731static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2732_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2733 return (__m512)__builtin_ia32_selectps_512(
2734 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C),
2735 (__v16sf)_mm512_setzero_ps());
2736}
2737
2738#define _mm512_fmaddsub_round_pd(A, B, C, R) \
2739 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2740 (__v8df)(__m512d)(B), \
2741 (__v8df)(__m512d)(C), \
2742 (__mmask8)-1, (int)(R)))
2743
2744
2745#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2746 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2747 (__v8df)(__m512d)(B), \
2748 (__v8df)(__m512d)(C), \
2749 (__mmask8)(U), (int)(R)))
2750
2751
2752#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2753 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2754 (__v8df)(__m512d)(B), \
2755 (__v8df)(__m512d)(C), \
2756 (__mmask8)(U), (int)(R)))
2757
2758
2759#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2760 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2761 (__v8df)(__m512d)(B), \
2762 (__v8df)(__m512d)(C), \
2763 (__mmask8)(U), (int)(R)))
2764
2765
2766#define _mm512_fmsubadd_round_pd(A, B, C, R) \
2767 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2768 (__v8df)(__m512d)(B), \
2769 -(__v8df)(__m512d)(C), \
2770 (__mmask8)-1, (int)(R)))
2771
2772
2773#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2774 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2775 (__v8df)(__m512d)(B), \
2776 -(__v8df)(__m512d)(C), \
2777 (__mmask8)(U), (int)(R)))
2778
2779
2780#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2781 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2782 (__v8df)(__m512d)(B), \
2783 -(__v8df)(__m512d)(C), \
2784 (__mmask8)(U), (int)(R)))
2785
2786
2787static __inline__ __m512d __DEFAULT_FN_ATTRS512
2788_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2789{
2790 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2791 (__v8df) __B,
2792 (__v8df) __C,
2793 (__mmask8) -1,
2795}
2796
2797static __inline__ __m512d __DEFAULT_FN_ATTRS512
2798_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2799{
2800 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2801 (__v8df) __B,
2802 (__v8df) __C,
2803 (__mmask8) __U,
2805}
2806
2807static __inline__ __m512d __DEFAULT_FN_ATTRS512
2808_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2809{
2810 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2811 (__v8df) __B,
2812 (__v8df) __C,
2813 (__mmask8) __U,
2815}
2816
2817static __inline__ __m512d __DEFAULT_FN_ATTRS512
2818_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2819{
2820 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2821 (__v8df) __B,
2822 (__v8df) __C,
2823 (__mmask8) __U,
2825}
2826
2827static __inline__ __m512d __DEFAULT_FN_ATTRS512
2828_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2829{
2830 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2831 (__v8df) __B,
2832 -(__v8df) __C,
2833 (__mmask8) -1,
2835}
2836
2837static __inline__ __m512d __DEFAULT_FN_ATTRS512
2838_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2839{
2840 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2841 (__v8df) __B,
2842 -(__v8df) __C,
2843 (__mmask8) __U,
2845}
2846
2847static __inline__ __m512d __DEFAULT_FN_ATTRS512
2848_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2849{
2850 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2851 (__v8df) __B,
2852 -(__v8df) __C,
2853 (__mmask8) __U,
2855}
2856
2857#define _mm512_fmaddsub_round_ps(A, B, C, R) \
2858 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2859 (__v16sf)(__m512)(B), \
2860 (__v16sf)(__m512)(C), \
2861 (__mmask16)-1, (int)(R)))
2862
2863
2864#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2865 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2866 (__v16sf)(__m512)(B), \
2867 (__v16sf)(__m512)(C), \
2868 (__mmask16)(U), (int)(R)))
2869
2870
2871#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2872 ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2873 (__v16sf)(__m512)(B), \
2874 (__v16sf)(__m512)(C), \
2875 (__mmask16)(U), (int)(R)))
2876
2877
2878#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2879 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2880 (__v16sf)(__m512)(B), \
2881 (__v16sf)(__m512)(C), \
2882 (__mmask16)(U), (int)(R)))
2883
2884
2885#define _mm512_fmsubadd_round_ps(A, B, C, R) \
2886 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2887 (__v16sf)(__m512)(B), \
2888 -(__v16sf)(__m512)(C), \
2889 (__mmask16)-1, (int)(R)))
2890
2891
2892#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
2893 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2894 (__v16sf)(__m512)(B), \
2895 -(__v16sf)(__m512)(C), \
2896 (__mmask16)(U), (int)(R)))
2897
2898
2899#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
2900 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2901 (__v16sf)(__m512)(B), \
2902 -(__v16sf)(__m512)(C), \
2903 (__mmask16)(U), (int)(R)))
2904
2905
2906static __inline__ __m512 __DEFAULT_FN_ATTRS512
2907_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
2908{
2909 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2910 (__v16sf) __B,
2911 (__v16sf) __C,
2912 (__mmask16) -1,
2914}
2915
2916static __inline__ __m512 __DEFAULT_FN_ATTRS512
2917_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2918{
2919 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2920 (__v16sf) __B,
2921 (__v16sf) __C,
2922 (__mmask16) __U,
2924}
2925
2926static __inline__ __m512 __DEFAULT_FN_ATTRS512
2927_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2928{
2929 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2930 (__v16sf) __B,
2931 (__v16sf) __C,
2932 (__mmask16) __U,
2934}
2935
2936static __inline__ __m512 __DEFAULT_FN_ATTRS512
2937_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2938{
2939 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2940 (__v16sf) __B,
2941 (__v16sf) __C,
2942 (__mmask16) __U,
2944}
2945
2946static __inline__ __m512 __DEFAULT_FN_ATTRS512
2947_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
2948{
2949 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2950 (__v16sf) __B,
2951 -(__v16sf) __C,
2952 (__mmask16) -1,
2954}
2955
2956static __inline__ __m512 __DEFAULT_FN_ATTRS512
2957_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2958{
2959 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2960 (__v16sf) __B,
2961 -(__v16sf) __C,
2962 (__mmask16) __U,
2964}
2965
2966static __inline__ __m512 __DEFAULT_FN_ATTRS512
2967_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2968{
2969 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2970 (__v16sf) __B,
2971 -(__v16sf) __C,
2972 (__mmask16) __U,
2974}
2975
2976#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
2977 ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
2978 (__v8df)(__m512d)(B), \
2979 (__v8df)(__m512d)(C), \
2980 (__mmask8)(U), (int)(R)))
2981
2982#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
2983 ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
2984 (__v16sf)(__m512)(B), \
2985 (__v16sf)(__m512)(C), \
2986 (__mmask16)(U), (int)(R)))
2987
2988#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
2989 ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
2990 (__v8df)(__m512d)(B), \
2991 (__v8df)(__m512d)(C), \
2992 (__mmask8)(U), (int)(R)))
2993
2994
2995static __inline__ __m512d __DEFAULT_FN_ATTRS512
2996_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2997{
2998 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
2999 (__v8df) __B,
3000 (__v8df) __C,
3001 (__mmask8) __U,
3003}
3004
3005#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3006 ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3007 (__v16sf)(__m512)(B), \
3008 (__v16sf)(__m512)(C), \
3009 (__mmask16)(U), (int)(R)))
3010
3011
3012static __inline__ __m512 __DEFAULT_FN_ATTRS512
3013_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3014{
3015 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3016 (__v16sf) __B,
3017 (__v16sf) __C,
3018 (__mmask16) __U,
3020}
3021
3022#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3023 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3024 -(__v8df)(__m512d)(B), \
3025 (__v8df)(__m512d)(C), \
3026 (__mmask8)(U), (int)(R)))
3027
3028#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3029 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3030 -(__v16sf)(__m512)(B), \
3031 (__v16sf)(__m512)(C), \
3032 (__mmask16)(U), (int)(R)))
3033
3034#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3035 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3036 -(__v8df)(__m512d)(B), \
3037 -(__v8df)(__m512d)(C), \
3038 (__mmask8)(U), (int)(R)))
3039
3040
3041#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3042 ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3043 (__v8df)(__m512d)(B), \
3044 (__v8df)(__m512d)(C), \
3045 (__mmask8)(U), (int)(R)))
3046
3047#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3048 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3049 -(__v16sf)(__m512)(B), \
3050 -(__v16sf)(__m512)(C), \
3051 (__mmask16)(U), (int)(R)))
3052
3053
3054#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3055 ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3056 (__v16sf)(__m512)(B), \
3057 (__v16sf)(__m512)(C), \
3058 (__mmask16)(U), (int)(R)))
3059
3060/* Vector permutations */
3061
3062static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3063_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) {
3064 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3065 (__v16si) __B);
3066}
3067
3068static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3069_mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3070 __m512i __B) {
3071 return (__m512i)__builtin_ia32_selectd_512(__U,
3072 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3073 (__v16si)__A);
3074}
3075
3076static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3077_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3078 __m512i __B) {
3079 return (__m512i)__builtin_ia32_selectd_512(__U,
3080 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3081 (__v16si)__I);
3082}
3083
3084static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3085_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3086 __m512i __B) {
3087 return (__m512i)__builtin_ia32_selectd_512(__U,
3088 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3089 (__v16si)_mm512_setzero_si512());
3090}
3091
3092static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3093_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) {
3094 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3095 (__v8di) __B);
3096}
3097
3098static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3099_mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3100 __m512i __B) {
3101 return (__m512i)__builtin_ia32_selectq_512(__U,
3102 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3103 (__v8di)__A);
3104}
3105
3106static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3107_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
3108 __m512i __B) {
3109 return (__m512i)__builtin_ia32_selectq_512(__U,
3110 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3111 (__v8di)__I);
3112}
3113
3114static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3115_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3116 __m512i __B) {
3117 return (__m512i)__builtin_ia32_selectq_512(__U,
3118 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3119 (__v8di)_mm512_setzero_si512());
3120}
3121
3122#define _mm512_alignr_epi64(A, B, I) \
3123 ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3124 (__v8di)(__m512i)(B), (int)(I)))
3125
3126#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3127 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3128 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3129 (__v8di)(__m512i)(W)))
3130
3131#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3132 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3133 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3134 (__v8di)_mm512_setzero_si512()))
3135
3136#define _mm512_alignr_epi32(A, B, I) \
3137 ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3138 (__v16si)(__m512i)(B), (int)(I)))
3139
3140#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3141 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3142 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3143 (__v16si)(__m512i)(W)))
3144
3145#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3146 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3147 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3148 (__v16si)_mm512_setzero_si512()))
3149/* Vector Extract */
3150
3151#define _mm512_extractf64x4_pd(A, I) \
3152 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3153 (__v4df)_mm256_setzero_pd(), \
3154 (__mmask8) - 1))
3155
3156#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3157 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3158 (__v4df)(__m256d)(W), \
3159 (__mmask8)(U)))
3160
3161#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3162 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3163 (__v4df)_mm256_setzero_pd(), \
3164 (__mmask8)(U)))
3165
3166#define _mm512_extractf32x4_ps(A, I) \
3167 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3168 (__v4sf)_mm_setzero_ps(), \
3169 (__mmask8) - 1))
3170
3171#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3172 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3173 (__v4sf)(__m128)(W), \
3174 (__mmask8)(U)))
3175
3176#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3177 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3178 (__v4sf)_mm_setzero_ps(), \
3179 (__mmask8)(U)))
3180
3181/* Vector Blend */
3182
3183static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3184_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) {
3185 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3186 (__v8df) __W,
3187 (__v8df) __A);
3188}
3189
3190static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3191_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) {
3192 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3193 (__v16sf) __W,
3194 (__v16sf) __A);
3195}
3196
3197static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3198_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) {
3199 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3200 (__v8di) __W,
3201 (__v8di) __A);
3202}
3203
3204static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3205_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) {
3206 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3207 (__v16si) __W,
3208 (__v16si) __A);
3209}
3210
3211/* Compare */
3212
3213#define _mm512_cmp_round_ps_mask(A, B, P, R) \
3214 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3215 (__v16sf)(__m512)(B), (int)(P), \
3216 (__mmask16)-1, (int)(R)))
3217
3218#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3219 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3220 (__v16sf)(__m512)(B), (int)(P), \
3221 (__mmask16)(U), (int)(R)))
3222
3223#define _mm512_cmp_ps_mask(A, B, P) \
3224 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3225#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3226 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3227
3228#define _mm512_cmpeq_ps_mask(A, B) \
3229 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3230#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3231 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3232
3233#define _mm512_cmplt_ps_mask(A, B) \
3234 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3235#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3236 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3237
3238#define _mm512_cmple_ps_mask(A, B) \
3239 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3240#define _mm512_mask_cmple_ps_mask(k, A, B) \
3241 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3242
3243#define _mm512_cmpunord_ps_mask(A, B) \
3244 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3245#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3246 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3247
3248#define _mm512_cmpneq_ps_mask(A, B) \
3249 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3250#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3251 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3252
3253#define _mm512_cmpnlt_ps_mask(A, B) \
3254 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3255#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3256 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3257
3258#define _mm512_cmpnle_ps_mask(A, B) \
3259 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3260#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3261 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3262
3263#define _mm512_cmpord_ps_mask(A, B) \
3264 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3265#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3266 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3267
3268#define _mm512_cmp_round_pd_mask(A, B, P, R) \
3269 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3270 (__v8df)(__m512d)(B), (int)(P), \
3271 (__mmask8)-1, (int)(R)))
3272
3273#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3274 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3275 (__v8df)(__m512d)(B), (int)(P), \
3276 (__mmask8)(U), (int)(R)))
3277
3278#define _mm512_cmp_pd_mask(A, B, P) \
3279 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3280#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3281 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3282
3283#define _mm512_cmpeq_pd_mask(A, B) \
3284 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3285#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3286 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3287
3288#define _mm512_cmplt_pd_mask(A, B) \
3289 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3290#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3291 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3292
3293#define _mm512_cmple_pd_mask(A, B) \
3294 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3295#define _mm512_mask_cmple_pd_mask(k, A, B) \
3296 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3297
3298#define _mm512_cmpunord_pd_mask(A, B) \
3299 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3300#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3301 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3302
3303#define _mm512_cmpneq_pd_mask(A, B) \
3304 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3305#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3306 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3307
3308#define _mm512_cmpnlt_pd_mask(A, B) \
3309 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3310#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3311 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3312
3313#define _mm512_cmpnle_pd_mask(A, B) \
3314 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3315#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3316 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3317
3318#define _mm512_cmpord_pd_mask(A, B) \
3319 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3320#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3321 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3322
3323/* Conversion */
3324
3325#define _mm512_cvtt_roundps_epu32(A, R) \
3326 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3327 (__v16si)_mm512_undefined_epi32(), \
3328 (__mmask16)-1, (int)(R)))
3329
3330#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3331 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3332 (__v16si)(__m512i)(W), \
3333 (__mmask16)(U), (int)(R)))
3334
3335#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3336 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3337 (__v16si)_mm512_setzero_si512(), \
3338 (__mmask16)(U), (int)(R)))
3339
3340
3341static __inline __m512i __DEFAULT_FN_ATTRS512
3343{
3344 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3345 (__v16si)
3347 (__mmask16) -1,
3349}
3350
3351static __inline__ __m512i __DEFAULT_FN_ATTRS512
3352_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3353{
3354 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3355 (__v16si) __W,
3356 (__mmask16) __U,
3358}
3359
3360static __inline__ __m512i __DEFAULT_FN_ATTRS512
3362{
3363 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3364 (__v16si) _mm512_setzero_si512 (),
3365 (__mmask16) __U,
3367}
3368
3369#define _mm512_cvt_roundepi32_ps(A, R) \
3370 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3371 (__v16sf)_mm512_setzero_ps(), \
3372 (__mmask16)-1, (int)(R)))
3373
3374#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3375 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3376 (__v16sf)(__m512)(W), \
3377 (__mmask16)(U), (int)(R)))
3378
3379#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3380 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3381 (__v16sf)_mm512_setzero_ps(), \
3382 (__mmask16)(U), (int)(R)))
3383
3384#define _mm512_cvt_roundepu32_ps(A, R) \
3385 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3386 (__v16sf)_mm512_setzero_ps(), \
3387 (__mmask16)-1, (int)(R)))
3388
3389#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3390 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3391 (__v16sf)(__m512)(W), \
3392 (__mmask16)(U), (int)(R)))
3393
3394#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3395 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3396 (__v16sf)_mm512_setzero_ps(), \
3397 (__mmask16)(U), (int)(R)))
3398
3399static __inline__ __m512
3401 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3402}
3403
3404static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3405_mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A) {
3406 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3407 (__v16sf)_mm512_cvtepu32_ps(__A),
3408 (__v16sf)__W);
3409}
3410
3411static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3413 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3414 (__v16sf)_mm512_cvtepu32_ps(__A),
3415 (__v16sf)_mm512_setzero_ps());
3416}
3417
3418static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3420 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3421}
3422
3423static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3424_mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A) {
3425 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3426 (__v8df)_mm512_cvtepi32_pd(__A),
3427 (__v8df)__W);
3428}
3429
3430static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3432 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3433 (__v8df)_mm512_cvtepi32_pd(__A),
3434 (__v8df)_mm512_setzero_pd());
3435}
3436
3437static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3439 return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3440}
3441
3442static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3443_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
3444 return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3445}
3446
3447static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3449 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3450}
3451
3452static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3453_mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A) {
3454 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3455 (__v16sf)_mm512_cvtepi32_ps(__A),
3456 (__v16sf)__W);
3457}
3458
3459static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3461 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3462 (__v16sf)_mm512_cvtepi32_ps(__A),
3463 (__v16sf)_mm512_setzero_ps());
3464}
3465
3466static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3468 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3469}
3470
3471static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3472_mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A) {
3473 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3474 (__v8df)_mm512_cvtepu32_pd(__A),
3475 (__v8df)__W);
3476}
3477
3478static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3480 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3481 (__v8df)_mm512_cvtepu32_pd(__A),
3482 (__v8df)_mm512_setzero_pd());
3483}
3484
3485static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3487 return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3488}
3489
3490static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3491_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
3492 return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3493}
3494
3495#define _mm512_cvt_roundpd_ps(A, R) \
3496 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3497 (__v8sf)_mm256_setzero_ps(), \
3498 (__mmask8)-1, (int)(R)))
3499
3500#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3501 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3502 (__v8sf)(__m256)(W), (__mmask8)(U), \
3503 (int)(R)))
3504
3505#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3506 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3507 (__v8sf)_mm256_setzero_ps(), \
3508 (__mmask8)(U), (int)(R)))
3509
3510static __inline__ __m256 __DEFAULT_FN_ATTRS512
3511_mm512_cvtpd_ps (__m512d __A)
3512{
3513 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3514 (__v8sf) _mm256_undefined_ps (),
3515 (__mmask8) -1,
3517}
3518
3519static __inline__ __m256 __DEFAULT_FN_ATTRS512
3520_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3521{
3522 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3523 (__v8sf) __W,
3524 (__mmask8) __U,
3526}
3527
3528static __inline__ __m256 __DEFAULT_FN_ATTRS512
3530{
3531 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3532 (__v8sf) _mm256_setzero_ps (),
3533 (__mmask8) __U,
3535}
3536
3537static __inline__ __m512 __DEFAULT_FN_ATTRS512
3539{
3540 return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3541 (__v8sf) _mm256_setzero_ps (),
3542 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3543}
3544
3545static __inline__ __m512 __DEFAULT_FN_ATTRS512
3546_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3547{
3548 return (__m512) __builtin_shufflevector (
3550 __U, __A),
3551 (__v8sf) _mm256_setzero_ps (),
3552 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3553}
3554
3555#define _mm512_cvt_roundps_ph(A, I) \
3556 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3557 (__v16hi)_mm256_undefined_si256(), \
3558 (__mmask16)-1))
3559
3560#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3561 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3562 (__v16hi)(__m256i)(U), \
3563 (__mmask16)(W)))
3564
3565#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3566 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3567 (__v16hi)_mm256_setzero_si256(), \
3568 (__mmask16)(W)))
3569
3570#define _mm512_cvtps_ph _mm512_cvt_roundps_ph
3571#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
3572#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3573
3574#define _mm512_cvt_roundph_ps(A, R) \
3575 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3576 (__v16sf)_mm512_undefined_ps(), \
3577 (__mmask16)-1, (int)(R)))
3578
3579#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3580 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3581 (__v16sf)(__m512)(W), \
3582 (__mmask16)(U), (int)(R)))
3583
3584#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3585 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3586 (__v16sf)_mm512_setzero_ps(), \
3587 (__mmask16)(U), (int)(R)))
3588
3589
3590static __inline __m512 __DEFAULT_FN_ATTRS512
3592{
3593 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3594 (__v16sf)
3596 (__mmask16) -1,
3598}
3599
3600static __inline__ __m512 __DEFAULT_FN_ATTRS512
3601_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3602{
3603 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3604 (__v16sf) __W,
3605 (__mmask16) __U,
3607}
3608
3609static __inline__ __m512 __DEFAULT_FN_ATTRS512
3611{
3612 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3613 (__v16sf) _mm512_setzero_ps (),
3614 (__mmask16) __U,
3616}
3617
3618#define _mm512_cvtt_roundpd_epi32(A, R) \
3619 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3620 (__v8si)_mm256_setzero_si256(), \
3621 (__mmask8)-1, (int)(R)))
3622
3623#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3624 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3625 (__v8si)(__m256i)(W), \
3626 (__mmask8)(U), (int)(R)))
3627
3628#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3629 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3630 (__v8si)_mm256_setzero_si256(), \
3631 (__mmask8)(U), (int)(R)))
3632
3633static __inline __m256i __DEFAULT_FN_ATTRS512
3635{
3636 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3637 (__v8si)_mm256_setzero_si256(),
3638 (__mmask8) -1,
3640}
3641
3642static __inline__ __m256i __DEFAULT_FN_ATTRS512
3643_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3644{
3645 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3646 (__v8si) __W,
3647 (__mmask8) __U,
3649}
3650
3651static __inline__ __m256i __DEFAULT_FN_ATTRS512
3653{
3654 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3655 (__v8si) _mm256_setzero_si256 (),
3656 (__mmask8) __U,
3658}
3659
3660#define _mm512_cvtt_roundps_epi32(A, R) \
3661 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3662 (__v16si)_mm512_setzero_si512(), \
3663 (__mmask16)-1, (int)(R)))
3664
3665#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3666 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3667 (__v16si)(__m512i)(W), \
3668 (__mmask16)(U), (int)(R)))
3669
3670#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3671 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3672 (__v16si)_mm512_setzero_si512(), \
3673 (__mmask16)(U), (int)(R)))
3674
3675static __inline __m512i __DEFAULT_FN_ATTRS512
3677{
3678 return (__m512i)
3679 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3680 (__v16si) _mm512_setzero_si512 (),
3682}
3683
3684static __inline__ __m512i __DEFAULT_FN_ATTRS512
3685_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3686{
3687 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3688 (__v16si) __W,
3689 (__mmask16) __U,
3691}
3692
3693static __inline__ __m512i __DEFAULT_FN_ATTRS512
3695{
3696 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3697 (__v16si) _mm512_setzero_si512 (),
3698 (__mmask16) __U,
3700}
3701
3702#define _mm512_cvt_roundps_epi32(A, R) \
3703 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3704 (__v16si)_mm512_setzero_si512(), \
3705 (__mmask16)-1, (int)(R)))
3706
3707#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3708 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3709 (__v16si)(__m512i)(W), \
3710 (__mmask16)(U), (int)(R)))
3711
3712#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3713 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3714 (__v16si)_mm512_setzero_si512(), \
3715 (__mmask16)(U), (int)(R)))
3716
3717static __inline__ __m512i __DEFAULT_FN_ATTRS512
3719{
3720 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3721 (__v16si) _mm512_undefined_epi32 (),
3722 (__mmask16) -1,
3724}
3725
3726static __inline__ __m512i __DEFAULT_FN_ATTRS512
3727_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3728{
3729 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3730 (__v16si) __W,
3731 (__mmask16) __U,
3733}
3734
3735static __inline__ __m512i __DEFAULT_FN_ATTRS512
3737{
3738 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3739 (__v16si)
3741 (__mmask16) __U,
3743}
3744
3745#define _mm512_cvt_roundpd_epi32(A, R) \
3746 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3747 (__v8si)_mm256_setzero_si256(), \
3748 (__mmask8)-1, (int)(R)))
3749
3750#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3751 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3752 (__v8si)(__m256i)(W), \
3753 (__mmask8)(U), (int)(R)))
3754
3755#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3756 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3757 (__v8si)_mm256_setzero_si256(), \
3758 (__mmask8)(U), (int)(R)))
3759
3760static __inline__ __m256i __DEFAULT_FN_ATTRS512
3762{
3763 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3764 (__v8si)
3766 (__mmask8) -1,
3768}
3769
3770static __inline__ __m256i __DEFAULT_FN_ATTRS512
3771_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3772{
3773 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3774 (__v8si) __W,
3775 (__mmask8) __U,
3777}
3778
3779static __inline__ __m256i __DEFAULT_FN_ATTRS512
3781{
3782 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3783 (__v8si)
3785 (__mmask8) __U,
3787}
3788
3789#define _mm512_cvt_roundps_epu32(A, R) \
3790 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3791 (__v16si)_mm512_setzero_si512(), \
3792 (__mmask16)-1, (int)(R)))
3793
3794#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
3795 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3796 (__v16si)(__m512i)(W), \
3797 (__mmask16)(U), (int)(R)))
3798
3799#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
3800 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3801 (__v16si)_mm512_setzero_si512(), \
3802 (__mmask16)(U), (int)(R)))
3803
3804static __inline__ __m512i __DEFAULT_FN_ATTRS512
3806{
3807 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
3808 (__v16si)\
3810 (__mmask16) -1,\
3812}
3813
3814static __inline__ __m512i __DEFAULT_FN_ATTRS512
3815_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3816{
3817 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
3818 (__v16si) __W,
3819 (__mmask16) __U,
3821}
3822
3823static __inline__ __m512i __DEFAULT_FN_ATTRS512
3825{
3826 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
3827 (__v16si)
3829 (__mmask16) __U ,
3831}
3832
3833#define _mm512_cvt_roundpd_epu32(A, R) \
3834 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3835 (__v8si)_mm256_setzero_si256(), \
3836 (__mmask8)-1, (int)(R)))
3837
3838#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
3839 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3840 (__v8si)(__m256i)(W), \
3841 (__mmask8)(U), (int)(R)))
3842
3843#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
3844 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3845 (__v8si)_mm256_setzero_si256(), \
3846 (__mmask8)(U), (int)(R)))
3847
3848static __inline__ __m256i __DEFAULT_FN_ATTRS512
3850{
3851 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3852 (__v8si)
3854 (__mmask8) -1,
3856}
3857
3858static __inline__ __m256i __DEFAULT_FN_ATTRS512
3859_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
3860{
3861 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3862 (__v8si) __W,
3863 (__mmask8) __U,
3865}
3866
3867static __inline__ __m256i __DEFAULT_FN_ATTRS512
3869{
3870 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3871 (__v8si)
3873 (__mmask8) __U,
3875}
3876
3877static __inline__ double __DEFAULT_FN_ATTRS512
3879{
3880 return __a[0];
3881}
3882
3883static __inline__ float __DEFAULT_FN_ATTRS512
3885{
3886 return __a[0];
3887}
3888
3889/* Unpack and Interleave */
3890
3891static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3892_mm512_unpackhi_pd(__m512d __a, __m512d __b) {
3893 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
3894 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
3895}
3896
3897static __inline__ __m512d __DEFAULT_FN_ATTRS512
3898_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
3899{
3900 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3901 (__v8df)_mm512_unpackhi_pd(__A, __B),
3902 (__v8df)__W);
3903}
3904
3905static __inline__ __m512d __DEFAULT_FN_ATTRS512
3906_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
3907{
3908 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3909 (__v8df)_mm512_unpackhi_pd(__A, __B),
3910 (__v8df)_mm512_setzero_pd());
3911}
3912
3913static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3914_mm512_unpacklo_pd(__m512d __a, __m512d __b) {
3915 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
3916 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
3917}
3918
3919static __inline__ __m512d __DEFAULT_FN_ATTRS512
3920_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
3921{
3922 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3923 (__v8df)_mm512_unpacklo_pd(__A, __B),
3924 (__v8df)__W);
3925}
3926
3927static __inline__ __m512d __DEFAULT_FN_ATTRS512
3928_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
3929{
3930 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3931 (__v8df)_mm512_unpacklo_pd(__A, __B),
3932 (__v8df)_mm512_setzero_pd());
3933}
3934
3935static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3936_mm512_unpackhi_ps(__m512 __a, __m512 __b) {
3937 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
3938 2, 18, 3, 19,
3939 2+4, 18+4, 3+4, 19+4,
3940 2+8, 18+8, 3+8, 19+8,
3941 2+12, 18+12, 3+12, 19+12);
3942}
3943
3944static __inline__ __m512 __DEFAULT_FN_ATTRS512
3945_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
3946{
3947 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3948 (__v16sf)_mm512_unpackhi_ps(__A, __B),
3949 (__v16sf)__W);
3950}
3951
3952static __inline__ __m512 __DEFAULT_FN_ATTRS512
3953_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
3954{
3955 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3956 (__v16sf)_mm512_unpackhi_ps(__A, __B),
3957 (__v16sf)_mm512_setzero_ps());
3958}
3959
3960static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3961_mm512_unpacklo_ps(__m512 __a, __m512 __b) {
3962 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
3963 0, 16, 1, 17,
3964 0+4, 16+4, 1+4, 17+4,
3965 0+8, 16+8, 1+8, 17+8,
3966 0+12, 16+12, 1+12, 17+12);
3967}
3968
3969static __inline__ __m512 __DEFAULT_FN_ATTRS512
3970_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
3971{
3972 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3973 (__v16sf)_mm512_unpacklo_ps(__A, __B),
3974 (__v16sf)__W);
3975}
3976
3977static __inline__ __m512 __DEFAULT_FN_ATTRS512
3978_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
3979{
3980 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3981 (__v16sf)_mm512_unpacklo_ps(__A, __B),
3982 (__v16sf)_mm512_setzero_ps());
3983}
3984
3985static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3986_mm512_unpackhi_epi32(__m512i __A, __m512i __B) {
3987 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
3988 2, 18, 3, 19,
3989 2+4, 18+4, 3+4, 19+4,
3990 2+8, 18+8, 3+8, 19+8,
3991 2+12, 18+12, 3+12, 19+12);
3992}
3993
3994static __inline__ __m512i __DEFAULT_FN_ATTRS512
3995_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
3996{
3997 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
3998 (__v16si)_mm512_unpackhi_epi32(__A, __B),
3999 (__v16si)__W);
4000}
4001
4002static __inline__ __m512i __DEFAULT_FN_ATTRS512
4003_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4004{
4005 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4006 (__v16si)_mm512_unpackhi_epi32(__A, __B),
4007 (__v16si)_mm512_setzero_si512());
4008}
4009
4010static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4011_mm512_unpacklo_epi32(__m512i __A, __m512i __B) {
4012 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4013 0, 16, 1, 17,
4014 0+4, 16+4, 1+4, 17+4,
4015 0+8, 16+8, 1+8, 17+8,
4016 0+12, 16+12, 1+12, 17+12);
4017}
4018
4019static __inline__ __m512i __DEFAULT_FN_ATTRS512
4020_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4021{
4022 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4023 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4024 (__v16si)__W);
4025}
4026
4027static __inline__ __m512i __DEFAULT_FN_ATTRS512
4028_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4029{
4030 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4031 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4032 (__v16si)_mm512_setzero_si512());
4033}
4034
4035static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4036_mm512_unpackhi_epi64(__m512i __A, __m512i __B) {
4037 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4038 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4039}
4040
4041static __inline__ __m512i __DEFAULT_FN_ATTRS512
4042_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4043{
4044 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4045 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4046 (__v8di)__W);
4047}
4048
4049static __inline__ __m512i __DEFAULT_FN_ATTRS512
4050_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4051{
4052 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4053 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4054 (__v8di)_mm512_setzero_si512());
4055}
4056
4057static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4058_mm512_unpacklo_epi64(__m512i __A, __m512i __B) {
4059 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4060 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4061}
4062
4063static __inline__ __m512i __DEFAULT_FN_ATTRS512
4064_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4065{
4066 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4067 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4068 (__v8di)__W);
4069}
4070
4071static __inline__ __m512i __DEFAULT_FN_ATTRS512
4072_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4073{
4074 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4075 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4076 (__v8di)_mm512_setzero_si512());
4077}
4078
4079
4080/* SIMD load ops */
4081
4082static __inline __m512i __DEFAULT_FN_ATTRS512
4084{
4085 struct __loadu_si512 {
4086 __m512i_u __v;
4087 } __attribute__((__packed__, __may_alias__));
4088 return ((const struct __loadu_si512*)__P)->__v;
4089}
4090
4091static __inline __m512i __DEFAULT_FN_ATTRS512
4093{
4094 struct __loadu_epi32 {
4095 __m512i_u __v;
4096 } __attribute__((__packed__, __may_alias__));
4097 return ((const struct __loadu_epi32*)__P)->__v;
4098}
4099
4100static __inline __m512i __DEFAULT_FN_ATTRS512
4101_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4102{
4103 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4104 (__v16si) __W,
4105 (__mmask16) __U);
4106}
4107
4108
4109static __inline __m512i __DEFAULT_FN_ATTRS512
4111{
4112 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4113 (__v16si)
4115 (__mmask16) __U);
4116}
4117
4118static __inline __m512i __DEFAULT_FN_ATTRS512
4120{
4121 struct __loadu_epi64 {
4122 __m512i_u __v;
4123 } __attribute__((__packed__, __may_alias__));
4124 return ((const struct __loadu_epi64*)__P)->__v;
4125}
4126
4127static __inline __m512i __DEFAULT_FN_ATTRS512
4128_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4129{
4130 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4131 (__v8di) __W,
4132 (__mmask8) __U);
4133}
4134
4135static __inline __m512i __DEFAULT_FN_ATTRS512
4137{
4138 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4139 (__v8di)
4141 (__mmask8) __U);
4142}
4143
4144static __inline __m512 __DEFAULT_FN_ATTRS512
4145_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4146{
4147 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4148 (__v16sf) __W,
4149 (__mmask16) __U);
4150}
4151
4152static __inline __m512 __DEFAULT_FN_ATTRS512
4154{
4155 return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4156 (__v16sf)
4158 (__mmask16) __U);
4159}
4160
4161static __inline __m512d __DEFAULT_FN_ATTRS512
4162_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4163{
4164 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4165 (__v8df) __W,
4166 (__mmask8) __U);
4167}
4168
4169static __inline __m512d __DEFAULT_FN_ATTRS512
4171{
4172 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4173 (__v8df)
4175 (__mmask8) __U);
4176}
4177
4178static __inline __m512d __DEFAULT_FN_ATTRS512
4180{
4181 struct __loadu_pd {
4182 __m512d_u __v;
4183 } __attribute__((__packed__, __may_alias__));
4184 return ((const struct __loadu_pd*)__p)->__v;
4185}
4186
4187static __inline __m512 __DEFAULT_FN_ATTRS512
4189{
4190 struct __loadu_ps {
4191 __m512_u __v;
4192 } __attribute__((__packed__, __may_alias__));
4193 return ((const struct __loadu_ps*)__p)->__v;
4194}
4195
4196static __inline __m512 __DEFAULT_FN_ATTRS512
4198{
4199 return *(const __m512*)__p;
4200}
4201
4202static __inline __m512 __DEFAULT_FN_ATTRS512
4203_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4204{
4205 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4206 (__v16sf) __W,
4207 (__mmask16) __U);
4208}
4209
4210static __inline __m512 __DEFAULT_FN_ATTRS512
4212{
4213 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4214 (__v16sf)
4216 (__mmask16) __U);
4217}
4218
4219static __inline __m512d __DEFAULT_FN_ATTRS512
4221{
4222 return *(const __m512d*)__p;
4223}
4224
4225static __inline __m512d __DEFAULT_FN_ATTRS512
4226_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4227{
4228 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4229 (__v8df) __W,
4230 (__mmask8) __U);
4231}
4232
4233static __inline __m512d __DEFAULT_FN_ATTRS512
4235{
4236 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4237 (__v8df)
4239 (__mmask8) __U);
4240}
4241
4242static __inline __m512i __DEFAULT_FN_ATTRS512
4244{
4245 return *(const __m512i *) __P;
4246}
4247
4248static __inline __m512i __DEFAULT_FN_ATTRS512
4250{
4251 return *(const __m512i *) __P;
4252}
4253
4254static __inline __m512i __DEFAULT_FN_ATTRS512
4256{
4257 return *(const __m512i *) __P;
4258}
4259
4260/* SIMD store ops */
4261
4262static __inline void __DEFAULT_FN_ATTRS512
4263_mm512_storeu_epi64 (void *__P, __m512i __A)
4264{
4265 struct __storeu_epi64 {
4266 __m512i_u __v;
4267 } __attribute__((__packed__, __may_alias__));
4268 ((struct __storeu_epi64*)__P)->__v = __A;
4269}
4270
4271static __inline void __DEFAULT_FN_ATTRS512
4272_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4273{
4274 __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4275 (__mmask8) __U);
4276}
4277
4278static __inline void __DEFAULT_FN_ATTRS512
4279_mm512_storeu_si512 (void *__P, __m512i __A)
4280{
4281 struct __storeu_si512 {
4282 __m512i_u __v;
4283 } __attribute__((__packed__, __may_alias__));
4284 ((struct __storeu_si512*)__P)->__v = __A;
4285}
4286
4287static __inline void __DEFAULT_FN_ATTRS512
4288_mm512_storeu_epi32 (void *__P, __m512i __A)
4289{
4290 struct __storeu_epi32 {
4291 __m512i_u __v;
4292 } __attribute__((__packed__, __may_alias__));
4293 ((struct __storeu_epi32*)__P)->__v = __A;
4294}
4295
4296static __inline void __DEFAULT_FN_ATTRS512
4298{
4299 __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4300 (__mmask16) __U);
4301}
4302
4303static __inline void __DEFAULT_FN_ATTRS512
4304_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4305{
4306 __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4307}
4308
4309static __inline void __DEFAULT_FN_ATTRS512
4310_mm512_storeu_pd(void *__P, __m512d __A)
4311{
4312 struct __storeu_pd {
4313 __m512d_u __v;
4314 } __attribute__((__packed__, __may_alias__));
4315 ((struct __storeu_pd*)__P)->__v = __A;
4316}
4317
4318static __inline void __DEFAULT_FN_ATTRS512
4319_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4320{
4321 __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4322 (__mmask16) __U);
4323}
4324
4325static __inline void __DEFAULT_FN_ATTRS512
4326_mm512_storeu_ps(void *__P, __m512 __A)
4327{
4328 struct __storeu_ps {
4329 __m512_u __v;
4330 } __attribute__((__packed__, __may_alias__));
4331 ((struct __storeu_ps*)__P)->__v = __A;
4332}
4333
4334static __inline void __DEFAULT_FN_ATTRS512
4335_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4336{
4337 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4338}
4339
4340static __inline void __DEFAULT_FN_ATTRS512
4341_mm512_store_pd(void *__P, __m512d __A)
4342{
4343 *(__m512d*)__P = __A;
4344}
4345
4346static __inline void __DEFAULT_FN_ATTRS512
4347_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4348{
4349 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4350 (__mmask16) __U);
4351}
4352
4353static __inline void __DEFAULT_FN_ATTRS512
4354_mm512_store_ps(void *__P, __m512 __A)
4355{
4356 *(__m512*)__P = __A;
4357}
4358
4359static __inline void __DEFAULT_FN_ATTRS512
4360_mm512_store_si512 (void *__P, __m512i __A)
4361{
4362 *(__m512i *) __P = __A;
4363}
4364
4365static __inline void __DEFAULT_FN_ATTRS512
4366_mm512_store_epi32 (void *__P, __m512i __A)
4367{
4368 *(__m512i *) __P = __A;
4369}
4370
4371static __inline void __DEFAULT_FN_ATTRS512
4372_mm512_store_epi64 (void *__P, __m512i __A)
4373{
4374 *(__m512i *) __P = __A;
4375}
4376
4377/* Mask ops */
4378
4381 return __builtin_ia32_knothi(__M);
4382}
4383
4384/* Integer compare */
4385
4386#define _mm512_cmpeq_epi32_mask(A, B) \
4387 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4388#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4389 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4390#define _mm512_cmpge_epi32_mask(A, B) \
4391 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4392#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4393 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4394#define _mm512_cmpgt_epi32_mask(A, B) \
4395 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4396#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4397 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4398#define _mm512_cmple_epi32_mask(A, B) \
4399 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4400#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4401 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4402#define _mm512_cmplt_epi32_mask(A, B) \
4403 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4404#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4405 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4406#define _mm512_cmpneq_epi32_mask(A, B) \
4407 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4408#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4409 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4410
4411#define _mm512_cmpeq_epu32_mask(A, B) \
4412 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4413#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4414 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4415#define _mm512_cmpge_epu32_mask(A, B) \
4416 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4417#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4418 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4419#define _mm512_cmpgt_epu32_mask(A, B) \
4420 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4421#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4422 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4423#define _mm512_cmple_epu32_mask(A, B) \
4424 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4425#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4426 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4427#define _mm512_cmplt_epu32_mask(A, B) \
4428 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4429#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4430 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4431#define _mm512_cmpneq_epu32_mask(A, B) \
4432 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4433#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4434 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4435
4436#define _mm512_cmpeq_epi64_mask(A, B) \
4437 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4438#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4439 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4440#define _mm512_cmpge_epi64_mask(A, B) \
4441 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4442#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4443 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4444#define _mm512_cmpgt_epi64_mask(A, B) \
4445 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4446#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4447 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4448#define _mm512_cmple_epi64_mask(A, B) \
4449 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4450#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4451 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4452#define _mm512_cmplt_epi64_mask(A, B) \
4453 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4454#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4455 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4456#define _mm512_cmpneq_epi64_mask(A, B) \
4457 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4458#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4459 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4460
4461#define _mm512_cmpeq_epu64_mask(A, B) \
4462 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4463#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4464 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4465#define _mm512_cmpge_epu64_mask(A, B) \
4466 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4467#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4468 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4469#define _mm512_cmpgt_epu64_mask(A, B) \
4470 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4471#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4472 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4473#define _mm512_cmple_epu64_mask(A, B) \
4474 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4475#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4476 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4477#define _mm512_cmplt_epu64_mask(A, B) \
4478 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4479#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4480 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4481#define _mm512_cmpneq_epu64_mask(A, B) \
4482 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4483#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4484 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4485
4486static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4488 /* This function always performs a signed extension, but __v16qi is a char
4489 which may be signed or unsigned, so use __v16qs. */
4490 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4491}
4492
4493static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4494_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) {
4495 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4496 (__v16si)_mm512_cvtepi8_epi32(__A),
4497 (__v16si)__W);
4498}
4499
4500static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4502 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4503 (__v16si)_mm512_cvtepi8_epi32(__A),
4504 (__v16si)_mm512_setzero_si512());
4505}
4506
4507static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4509 /* This function always performs a signed extension, but __v16qi is a char
4510 which may be signed or unsigned, so use __v16qs. */
4511 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4512}
4513
4514static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4515_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4516 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4517 (__v8di)_mm512_cvtepi8_epi64(__A),
4518 (__v8di)__W);
4519}
4520
4521static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4523 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4524 (__v8di)_mm512_cvtepi8_epi64(__A),
4525 (__v8di)_mm512_setzero_si512 ());
4526}
4527
4528static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4530 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4531}
4532
4533static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4534_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
4535 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4536 (__v8di)_mm512_cvtepi32_epi64(__X),
4537 (__v8di)__W);
4538}
4539
4540static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4542 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4543 (__v8di)_mm512_cvtepi32_epi64(__X),
4544 (__v8di)_mm512_setzero_si512());
4545}
4546
4547static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4549 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4550}
4551
4552static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4553_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
4554 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4555 (__v16si)_mm512_cvtepi16_epi32(__A),
4556 (__v16si)__W);
4557}
4558
4559static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4561 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4562 (__v16si)_mm512_cvtepi16_epi32(__A),
4563 (__v16si)_mm512_setzero_si512 ());
4564}
4565
4566static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4568 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4569}
4570
4571static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4572_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4573 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4574 (__v8di)_mm512_cvtepi16_epi64(__A),
4575 (__v8di)__W);
4576}
4577
4578static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4580 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4581 (__v8di)_mm512_cvtepi16_epi64(__A),
4582 (__v8di)_mm512_setzero_si512());
4583}
4584
4585static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4587 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4588}
4589
4590static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4591_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) {
4592 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4593 (__v16si)_mm512_cvtepu8_epi32(__A),
4594 (__v16si)__W);
4595}
4596
4597static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4599 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4600 (__v16si)_mm512_cvtepu8_epi32(__A),
4601 (__v16si)_mm512_setzero_si512());
4602}
4603
4604static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4606 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4607}
4608
4609static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4610_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4611 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4612 (__v8di)_mm512_cvtepu8_epi64(__A),
4613 (__v8di)__W);
4614}
4615
4616static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4618 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4619 (__v8di)_mm512_cvtepu8_epi64(__A),
4620 (__v8di)_mm512_setzero_si512());
4621}
4622
4623static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4625 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4626}
4627
4628static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4629_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
4630 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4631 (__v8di)_mm512_cvtepu32_epi64(__X),
4632 (__v8di)__W);
4633}
4634
4635static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4637 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4638 (__v8di)_mm512_cvtepu32_epi64(__X),
4639 (__v8di)_mm512_setzero_si512());
4640}
4641
4642static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4644 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4645}
4646
4647static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4648_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
4649 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4650 (__v16si)_mm512_cvtepu16_epi32(__A),
4651 (__v16si)__W);
4652}
4653
4654static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4656 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4657 (__v16si)_mm512_cvtepu16_epi32(__A),
4658 (__v16si)_mm512_setzero_si512());
4659}
4660
4661static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4663 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4664}
4665
4666static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4667_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4668 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4669 (__v8di)_mm512_cvtepu16_epi64(__A),
4670 (__v8di)__W);
4671}
4672
4673static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4675 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4676 (__v8di)_mm512_cvtepu16_epi64(__A),
4677 (__v8di)_mm512_setzero_si512());
4678}
4679
4680static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4681_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4682{
4683 return (__m512i)__builtin_elementwise_fshr((__v16su)__A,(__v16su)__A, (__v16su)__B);
4684}
4685
4686static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4687_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4688{
4689 return (__m512i)__builtin_ia32_selectd_512(__U,
4690 (__v16si)_mm512_rorv_epi32(__A, __B),
4691 (__v16si)__W);
4692}
4693
4694static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4695_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4696{
4697 return (__m512i)__builtin_ia32_selectd_512(__U,
4698 (__v16si)_mm512_rorv_epi32(__A, __B),
4699 (__v16si)_mm512_setzero_si512());
4700}
4701
4702static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4703_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4704{
4705 return (__m512i)__builtin_elementwise_fshr((__v8du)__A, (__v8du)__A, (__v8du)__B);
4706}
4707
4708static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4709_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4710{
4711 return (__m512i)__builtin_ia32_selectq_512(__U,
4712 (__v8di)_mm512_rorv_epi64(__A, __B),
4713 (__v8di)__W);
4714}
4715
4716static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4717_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4718{
4719 return (__m512i)__builtin_ia32_selectq_512(__U,
4720 (__v8di)_mm512_rorv_epi64(__A, __B),
4721 (__v8di)_mm512_setzero_si512());
4722}
4723
4724
4725
4726#define _mm512_cmp_epi32_mask(a, b, p) \
4727 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4728 (__v16si)(__m512i)(b), (int)(p), \
4729 (__mmask16)-1))
4730
4731#define _mm512_cmp_epu32_mask(a, b, p) \
4732 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4733 (__v16si)(__m512i)(b), (int)(p), \
4734 (__mmask16)-1))
4735
4736#define _mm512_cmp_epi64_mask(a, b, p) \
4737 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4738 (__v8di)(__m512i)(b), (int)(p), \
4739 (__mmask8)-1))
4740
4741#define _mm512_cmp_epu64_mask(a, b, p) \
4742 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4743 (__v8di)(__m512i)(b), (int)(p), \
4744 (__mmask8)-1))
4745
4746#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
4747 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4748 (__v16si)(__m512i)(b), (int)(p), \
4749 (__mmask16)(m)))
4750
4751#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
4752 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4753 (__v16si)(__m512i)(b), (int)(p), \
4754 (__mmask16)(m)))
4755
4756#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
4757 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4758 (__v8di)(__m512i)(b), (int)(p), \
4759 (__mmask8)(m)))
4760
4761#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
4762 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4763 (__v8di)(__m512i)(b), (int)(p), \
4764 (__mmask8)(m)))
4765
4766#define _mm512_rol_epi32(a, b) \
4767 ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)))
4768
4769#define _mm512_mask_rol_epi32(W, U, a, b) \
4770 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4771 (__v16si)_mm512_rol_epi32((a), (b)), \
4772 (__v16si)(__m512i)(W)))
4773
4774#define _mm512_maskz_rol_epi32(U, a, b) \
4775 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4776 (__v16si)_mm512_rol_epi32((a), (b)), \
4777 (__v16si)_mm512_setzero_si512()))
4778
4779#define _mm512_rol_epi64(a, b) \
4780 ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)))
4781
4782#define _mm512_mask_rol_epi64(W, U, a, b) \
4783 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4784 (__v8di)_mm512_rol_epi64((a), (b)), \
4785 (__v8di)(__m512i)(W)))
4786
4787#define _mm512_maskz_rol_epi64(U, a, b) \
4788 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4789 (__v8di)_mm512_rol_epi64((a), (b)), \
4790 (__v8di)_mm512_setzero_si512()))
4791
4792static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4793_mm512_rolv_epi32 (__m512i __A, __m512i __B)
4794{
4795 return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__A, (__v16su)__B);
4796}
4797
4798static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4799_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4800{
4801 return (__m512i)__builtin_ia32_selectd_512(__U,
4802 (__v16si)_mm512_rolv_epi32(__A, __B),
4803 (__v16si)__W);
4804}
4805
4806static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4807_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4808{
4809 return (__m512i)__builtin_ia32_selectd_512(__U,
4810 (__v16si)_mm512_rolv_epi32(__A, __B),
4811 (__v16si)_mm512_setzero_si512());
4812}
4813
4814static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4815_mm512_rolv_epi64 (__m512i __A, __m512i __B)
4816{
4817 return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__A, (__v8du)__B);
4818}
4819
4820static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4821_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4822{
4823 return (__m512i)__builtin_ia32_selectq_512(__U,
4824 (__v8di)_mm512_rolv_epi64(__A, __B),
4825 (__v8di)__W);
4826}
4827
4828static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4829_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4830{
4831 return (__m512i)__builtin_ia32_selectq_512(__U,
4832 (__v8di)_mm512_rolv_epi64(__A, __B),
4833 (__v8di)_mm512_setzero_si512());
4834}
4835
4836#define _mm512_ror_epi32(A, B) \
4837 ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)))
4838
4839#define _mm512_mask_ror_epi32(W, U, A, B) \
4840 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4841 (__v16si)_mm512_ror_epi32((A), (B)), \
4842 (__v16si)(__m512i)(W)))
4843
4844#define _mm512_maskz_ror_epi32(U, A, B) \
4845 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4846 (__v16si)_mm512_ror_epi32((A), (B)), \
4847 (__v16si)_mm512_setzero_si512()))
4848
4849#define _mm512_ror_epi64(A, B) \
4850 ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)))
4851
4852#define _mm512_mask_ror_epi64(W, U, A, B) \
4853 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4854 (__v8di)_mm512_ror_epi64((A), (B)), \
4855 (__v8di)(__m512i)(W)))
4856
4857#define _mm512_maskz_ror_epi64(U, A, B) \
4858 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4859 (__v8di)_mm512_ror_epi64((A), (B)), \
4860 (__v8di)_mm512_setzero_si512()))
4861
4862static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4863_mm512_slli_epi32(__m512i __A, unsigned int __B) {
4864 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (int)__B);
4865}
4866
4867static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4868_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
4869 unsigned int __B) {
4870 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4871 (__v16si)_mm512_slli_epi32(__A, __B),
4872 (__v16si)__W);
4873}
4874
4875static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4876_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
4877 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4878 (__v16si)_mm512_slli_epi32(__A, __B),
4879 (__v16si)_mm512_setzero_si512());
4880}
4881
4882static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4883_mm512_slli_epi64(__m512i __A, unsigned int __B) {
4884 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (int)__B);
4885}
4886
4887static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4888_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
4889 unsigned int __B) {
4890 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4891 (__v8di)_mm512_slli_epi64(__A, __B),
4892 (__v8di)__W);
4893}
4894
4895static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4896_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
4897 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4898 (__v8di)_mm512_slli_epi64(__A, __B),
4899 (__v8di)_mm512_setzero_si512());
4900}
4901
4902static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4903_mm512_srli_epi32(__m512i __A, unsigned int __B) {
4904 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (int)__B);
4905}
4906
4907static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4908_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
4909 unsigned int __B) {
4910 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4911 (__v16si)_mm512_srli_epi32(__A, __B),
4912 (__v16si)__W);
4913}
4914
4915static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4916_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
4917 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4918 (__v16si)_mm512_srli_epi32(__A, __B),
4919 (__v16si)_mm512_setzero_si512());
4920}
4921
4922static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4923_mm512_srli_epi64(__m512i __A, unsigned int __B) {
4924 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (int)__B);
4925}
4926
4927static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4928_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
4929 unsigned int __B) {
4930 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4931 (__v8di)_mm512_srli_epi64(__A, __B),
4932 (__v8di)__W);
4933}
4934
4935static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4936_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
4937 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4938 (__v8di)_mm512_srli_epi64(__A, __B),
4939 (__v8di)_mm512_setzero_si512());
4940}
4941
4942static __inline__ __m512i __DEFAULT_FN_ATTRS512
4943_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4944{
4945 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4946 (__v16si) __W,
4947 (__mmask16) __U);
4948}
4949
4950static __inline__ __m512i __DEFAULT_FN_ATTRS512
4952{
4953 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4954 (__v16si)
4956 (__mmask16) __U);
4957}
4958
4959static __inline__ void __DEFAULT_FN_ATTRS512
4960_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
4961{
4962 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
4963 (__mmask16) __U);
4964}
4965
4966static __inline__ __m512i __DEFAULT_FN_ATTRS512
4967_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
4968{
4969 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
4970 (__v16si) __A,
4971 (__v16si) __W);
4972}
4973
4974static __inline__ __m512i __DEFAULT_FN_ATTRS512
4976{
4977 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
4978 (__v16si) __A,
4979 (__v16si) _mm512_setzero_si512 ());
4980}
4981
4982static __inline__ __m512i __DEFAULT_FN_ATTRS512
4983_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
4984{
4985 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
4986 (__v8di) __A,
4987 (__v8di) __W);
4988}
4989
4990static __inline__ __m512i __DEFAULT_FN_ATTRS512
4992{
4993 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
4994 (__v8di) __A,
4995 (__v8di) _mm512_setzero_si512 ());
4996}
4997
4998static __inline__ __m512i __DEFAULT_FN_ATTRS512
4999_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5000{
5001 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5002 (__v8di) __W,
5003 (__mmask8) __U);
5004}
5005
5006static __inline__ __m512i __DEFAULT_FN_ATTRS512
5008{
5009 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5010 (__v8di)
5012 (__mmask8) __U);
5013}
5014
5015static __inline__ void __DEFAULT_FN_ATTRS512
5016_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5017{
5018 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5019 (__mmask8) __U);
5020}
5021
5022static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5024{
5025 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5026 0, 0, 2, 2, 4, 4, 6, 6);
5027}
5028
5029static __inline__ __m512d __DEFAULT_FN_ATTRS512
5030_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5031{
5032 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5033 (__v8df)_mm512_movedup_pd(__A),
5034 (__v8df)__W);
5035}
5036
5037static __inline__ __m512d __DEFAULT_FN_ATTRS512
5039{
5040 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5041 (__v8df)_mm512_movedup_pd(__A),
5042 (__v8df)_mm512_setzero_pd());
5043}
5044
5045#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5046 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5047 (__v8df)(__m512d)(B), \
5048 (__v8di)(__m512i)(C), (int)(imm), \
5049 (__mmask8)-1, (int)(R)))
5050
5051#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5052 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5053 (__v8df)(__m512d)(B), \
5054 (__v8di)(__m512i)(C), (int)(imm), \
5055 (__mmask8)(U), (int)(R)))
5056
5057#define _mm512_fixupimm_pd(A, B, C, imm) \
5058 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5059 (__v8df)(__m512d)(B), \
5060 (__v8di)(__m512i)(C), (int)(imm), \
5061 (__mmask8)-1, \
5062 _MM_FROUND_CUR_DIRECTION))
5063
5064#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5065 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5066 (__v8df)(__m512d)(B), \
5067 (__v8di)(__m512i)(C), (int)(imm), \
5068 (__mmask8)(U), \
5069 _MM_FROUND_CUR_DIRECTION))
5070
5071#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5072 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5073 (__v8df)(__m512d)(B), \
5074 (__v8di)(__m512i)(C), \
5075 (int)(imm), (__mmask8)(U), \
5076 (int)(R)))
5077
5078#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5079 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5080 (__v8df)(__m512d)(B), \
5081 (__v8di)(__m512i)(C), \
5082 (int)(imm), (__mmask8)(U), \
5083 _MM_FROUND_CUR_DIRECTION))
5084
5085#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5086 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5087 (__v16sf)(__m512)(B), \
5088 (__v16si)(__m512i)(C), (int)(imm), \
5089 (__mmask16)-1, (int)(R)))
5090
5091#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5092 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5093 (__v16sf)(__m512)(B), \
5094 (__v16si)(__m512i)(C), (int)(imm), \
5095 (__mmask16)(U), (int)(R)))
5096
5097#define _mm512_fixupimm_ps(A, B, C, imm) \
5098 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5099 (__v16sf)(__m512)(B), \
5100 (__v16si)(__m512i)(C), (int)(imm), \
5101 (__mmask16)-1, \
5102 _MM_FROUND_CUR_DIRECTION))
5103
5104#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5105 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5106 (__v16sf)(__m512)(B), \
5107 (__v16si)(__m512i)(C), (int)(imm), \
5108 (__mmask16)(U), \
5109 _MM_FROUND_CUR_DIRECTION))
5110
5111#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5112 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5113 (__v16sf)(__m512)(B), \
5114 (__v16si)(__m512i)(C), \
5115 (int)(imm), (__mmask16)(U), \
5116 (int)(R)))
5117
5118#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5119 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5120 (__v16sf)(__m512)(B), \
5121 (__v16si)(__m512i)(C), \
5122 (int)(imm), (__mmask16)(U), \
5123 _MM_FROUND_CUR_DIRECTION))
5124
5125#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5126 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5127 (__v2df)(__m128d)(B), \
5128 (__v2di)(__m128i)(C), (int)(imm), \
5129 (__mmask8)-1, (int)(R)))
5130
5131#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5132 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5133 (__v2df)(__m128d)(B), \
5134 (__v2di)(__m128i)(C), (int)(imm), \
5135 (__mmask8)(U), (int)(R)))
5136
5137#define _mm_fixupimm_sd(A, B, C, imm) \
5138 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5139 (__v2df)(__m128d)(B), \
5140 (__v2di)(__m128i)(C), (int)(imm), \
5141 (__mmask8)-1, \
5142 _MM_FROUND_CUR_DIRECTION))
5143
5144#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5145 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5146 (__v2df)(__m128d)(B), \
5147 (__v2di)(__m128i)(C), (int)(imm), \
5148 (__mmask8)(U), \
5149 _MM_FROUND_CUR_DIRECTION))
5150
5151#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5152 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5153 (__v2df)(__m128d)(B), \
5154 (__v2di)(__m128i)(C), (int)(imm), \
5155 (__mmask8)(U), (int)(R)))
5156
5157#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5158 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5159 (__v2df)(__m128d)(B), \
5160 (__v2di)(__m128i)(C), (int)(imm), \
5161 (__mmask8)(U), \
5162 _MM_FROUND_CUR_DIRECTION))
5163
5164#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5165 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5166 (__v4sf)(__m128)(B), \
5167 (__v4si)(__m128i)(C), (int)(imm), \
5168 (__mmask8)-1, (int)(R)))
5169
5170#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5171 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5172 (__v4sf)(__m128)(B), \
5173 (__v4si)(__m128i)(C), (int)(imm), \
5174 (__mmask8)(U), (int)(R)))
5175
5176#define _mm_fixupimm_ss(A, B, C, imm) \
5177 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5178 (__v4sf)(__m128)(B), \
5179 (__v4si)(__m128i)(C), (int)(imm), \
5180 (__mmask8)-1, \
5181 _MM_FROUND_CUR_DIRECTION))
5182
5183#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5184 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5185 (__v4sf)(__m128)(B), \
5186 (__v4si)(__m128i)(C), (int)(imm), \
5187 (__mmask8)(U), \
5188 _MM_FROUND_CUR_DIRECTION))
5189
5190#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5191 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5192 (__v4sf)(__m128)(B), \
5193 (__v4si)(__m128i)(C), (int)(imm), \
5194 (__mmask8)(U), (int)(R)))
5195
5196#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5197 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5198 (__v4sf)(__m128)(B), \
5199 (__v4si)(__m128i)(C), (int)(imm), \
5200 (__mmask8)(U), \
5201 _MM_FROUND_CUR_DIRECTION))
5202
5203#define _mm_getexp_round_sd(A, B, R) \
5204 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5205 (__v2df)(__m128d)(B), \
5206 (__v2df)_mm_setzero_pd(), \
5207 (__mmask8)-1, (int)(R)))
5208
5209
5210static __inline__ __m128d __DEFAULT_FN_ATTRS128
5211_mm_getexp_sd (__m128d __A, __m128d __B)
5212{
5213 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5214 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5215}
5216
5217static __inline__ __m128d __DEFAULT_FN_ATTRS128
5218_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5219{
5220 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5221 (__v2df) __B,
5222 (__v2df) __W,
5223 (__mmask8) __U,
5225}
5226
5227#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5228 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5229 (__v2df)(__m128d)(B), \
5230 (__v2df)(__m128d)(W), \
5231 (__mmask8)(U), (int)(R)))
5232
5233static __inline__ __m128d __DEFAULT_FN_ATTRS128
5234_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5235{
5236 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5237 (__v2df) __B,
5238 (__v2df) _mm_setzero_pd (),
5239 (__mmask8) __U,
5241}
5242
5243#define _mm_maskz_getexp_round_sd(U, A, B, R) \
5244 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5245 (__v2df)(__m128d)(B), \
5246 (__v2df)_mm_setzero_pd(), \
5247 (__mmask8)(U), (int)(R)))
5248
5249#define _mm_getexp_round_ss(A, B, R) \
5250 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5251 (__v4sf)(__m128)(B), \
5252 (__v4sf)_mm_setzero_ps(), \
5253 (__mmask8)-1, (int)(R)))
5254
5255static __inline__ __m128 __DEFAULT_FN_ATTRS128
5256_mm_getexp_ss (__m128 __A, __m128 __B)
5257{
5258 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5259 (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5260}
5261
5262static __inline__ __m128 __DEFAULT_FN_ATTRS128
5263_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5264{
5265 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5266 (__v4sf) __B,
5267 (__v4sf) __W,
5268 (__mmask8) __U,
5270}
5271
5272#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5273 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5274 (__v4sf)(__m128)(B), \
5275 (__v4sf)(__m128)(W), \
5276 (__mmask8)(U), (int)(R)))
5277
5278static __inline__ __m128 __DEFAULT_FN_ATTRS128
5279_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5280{
5281 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5282 (__v4sf) __B,
5283 (__v4sf) _mm_setzero_ps (),
5284 (__mmask8) __U,
5286}
5287
5288#define _mm_maskz_getexp_round_ss(U, A, B, R) \
5289 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5290 (__v4sf)(__m128)(B), \
5291 (__v4sf)_mm_setzero_ps(), \
5292 (__mmask8)(U), (int)(R)))
5293
5294#define _mm_getmant_round_sd(A, B, C, D, R) \
5295 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5296 (__v2df)(__m128d)(B), \
5297 (int)(((D)<<2) | (C)), \
5298 (__v2df)_mm_setzero_pd(), \
5299 (__mmask8)-1, (int)(R)))
5300
5301#define _mm_getmant_sd(A, B, C, D) \
5302 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5303 (__v2df)(__m128d)(B), \
5304 (int)(((D)<<2) | (C)), \
5305 (__v2df)_mm_setzero_pd(), \
5306 (__mmask8)-1, \
5307 _MM_FROUND_CUR_DIRECTION))
5308
5309#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5310 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5311 (__v2df)(__m128d)(B), \
5312 (int)(((D)<<2) | (C)), \
5313 (__v2df)(__m128d)(W), \
5314 (__mmask8)(U), \
5315 _MM_FROUND_CUR_DIRECTION))
5316
5317#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5318 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5319 (__v2df)(__m128d)(B), \
5320 (int)(((D)<<2) | (C)), \
5321 (__v2df)(__m128d)(W), \
5322 (__mmask8)(U), (int)(R)))
5323
5324#define _mm_maskz_getmant_sd(U, A, B, C, D) \
5325 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5326 (__v2df)(__m128d)(B), \
5327 (int)(((D)<<2) | (C)), \
5328 (__v2df)_mm_setzero_pd(), \
5329 (__mmask8)(U), \
5330 _MM_FROUND_CUR_DIRECTION))
5331
5332#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5333 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5334 (__v2df)(__m128d)(B), \
5335 (int)(((D)<<2) | (C)), \
5336 (__v2df)_mm_setzero_pd(), \
5337 (__mmask8)(U), (int)(R)))
5338
5339#define _mm_getmant_round_ss(A, B, C, D, R) \
5340 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5341 (__v4sf)(__m128)(B), \
5342 (int)(((D)<<2) | (C)), \
5343 (__v4sf)_mm_setzero_ps(), \
5344 (__mmask8)-1, (int)(R)))
5345
5346#define _mm_getmant_ss(A, B, C, D) \
5347 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5348 (__v4sf)(__m128)(B), \
5349 (int)(((D)<<2) | (C)), \
5350 (__v4sf)_mm_setzero_ps(), \
5351 (__mmask8)-1, \
5352 _MM_FROUND_CUR_DIRECTION))
5353
5354#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5355 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5356 (__v4sf)(__m128)(B), \
5357 (int)(((D)<<2) | (C)), \
5358 (__v4sf)(__m128)(W), \
5359 (__mmask8)(U), \
5360 _MM_FROUND_CUR_DIRECTION))
5361
5362#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5363 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5364 (__v4sf)(__m128)(B), \
5365 (int)(((D)<<2) | (C)), \
5366 (__v4sf)(__m128)(W), \
5367 (__mmask8)(U), (int)(R)))
5368
5369#define _mm_maskz_getmant_ss(U, A, B, C, D) \
5370 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5371 (__v4sf)(__m128)(B), \
5372 (int)(((D)<<2) | (C)), \
5373 (__v4sf)_mm_setzero_ps(), \
5374 (__mmask8)(U), \
5375 _MM_FROUND_CUR_DIRECTION))
5376
5377#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5378 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5379 (__v4sf)(__m128)(B), \
5380 (int)(((D)<<2) | (C)), \
5381 (__v4sf)_mm_setzero_ps(), \
5382 (__mmask8)(U), (int)(R)))
5383
5384static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5386{
5387 return __A;
5388}
5389
5390#define _mm_comi_round_sd(A, B, P, R) \
5391 ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5392 (int)(P), (int)(R)))
5393
5394#define _mm_comi_round_ss(A, B, P, R) \
5395 ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5396 (int)(P), (int)(R)))
5397
5398#ifdef __x86_64__
5399#define _mm_cvt_roundsd_si64(A, R) \
5400 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5401#endif
5402
5403static __inline__ __m512i __DEFAULT_FN_ATTRS512
5404_mm512_sll_epi32(__m512i __A, __m128i __B)
5405{
5406 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5407}
5408
5409static __inline__ __m512i __DEFAULT_FN_ATTRS512
5410_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5411{
5412 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5413 (__v16si)_mm512_sll_epi32(__A, __B),
5414 (__v16si)__W);
5415}
5416
5417static __inline__ __m512i __DEFAULT_FN_ATTRS512
5418_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5419{
5420 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5421 (__v16si)_mm512_sll_epi32(__A, __B),
5422 (__v16si)_mm512_setzero_si512());
5423}
5424
5425static __inline__ __m512i __DEFAULT_FN_ATTRS512
5426_mm512_sll_epi64(__m512i __A, __m128i __B)
5427{
5428 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5429}
5430
5431static __inline__ __m512i __DEFAULT_FN_ATTRS512
5432_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5433{
5434 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5435 (__v8di)_mm512_sll_epi64(__A, __B),
5436 (__v8di)__W);
5437}
5438
5439static __inline__ __m512i __DEFAULT_FN_ATTRS512
5440_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5441{
5442 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5443 (__v8di)_mm512_sll_epi64(__A, __B),
5444 (__v8di)_mm512_setzero_si512());
5445}
5446
5447static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5448_mm512_sllv_epi32(__m512i __X, __m512i __Y) {
5449 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5450}
5451
5452static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5453_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5454 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5455 (__v16si)_mm512_sllv_epi32(__X, __Y),
5456 (__v16si)__W);
5457}
5458
5459static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5460_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5461 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5462 (__v16si)_mm512_sllv_epi32(__X, __Y),
5463 (__v16si)_mm512_setzero_si512());
5464}
5465
5466static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5467_mm512_sllv_epi64(__m512i __X, __m512i __Y)
5468{
5469 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5470}
5471
5472static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5473_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5474{
5475 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5476 (__v8di)_mm512_sllv_epi64(__X, __Y),
5477 (__v8di)__W);
5478}
5479
5480static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5481_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5482{
5483 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5484 (__v8di)_mm512_sllv_epi64(__X, __Y),
5485 (__v8di)_mm512_setzero_si512());
5486}
5487
5488static __inline__ __m512i __DEFAULT_FN_ATTRS512
5489_mm512_sra_epi32(__m512i __A, __m128i __B)
5490{
5491 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5492}
5493
5494static __inline__ __m512i __DEFAULT_FN_ATTRS512
5495_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5496{
5497 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5498 (__v16si)_mm512_sra_epi32(__A, __B),
5499 (__v16si)__W);
5500}
5501
5502static __inline__ __m512i __DEFAULT_FN_ATTRS512
5503_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5504{
5505 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5506 (__v16si)_mm512_sra_epi32(__A, __B),
5507 (__v16si)_mm512_setzero_si512());
5508}
5509
5510static __inline__ __m512i __DEFAULT_FN_ATTRS512
5511_mm512_sra_epi64(__m512i __A, __m128i __B)
5512{
5513 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5514}
5515
5516static __inline__ __m512i __DEFAULT_FN_ATTRS512
5517_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5518{
5519 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5520 (__v8di)_mm512_sra_epi64(__A, __B),
5521 (__v8di)__W);
5522}
5523
5524static __inline__ __m512i __DEFAULT_FN_ATTRS512
5525_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5526{
5527 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5528 (__v8di)_mm512_sra_epi64(__A, __B),
5529 (__v8di)_mm512_setzero_si512());
5530}
5531
5532static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5533_mm512_srav_epi32(__m512i __X, __m512i __Y) {
5534 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5535}
5536
5537static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5538_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5539 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5540 (__v16si)_mm512_srav_epi32(__X, __Y),
5541 (__v16si)__W);
5542}
5543
5544static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5545_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5546 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5547 (__v16si)_mm512_srav_epi32(__X, __Y),
5548 (__v16si)_mm512_setzero_si512());
5549}
5550
5551static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5552_mm512_srav_epi64(__m512i __X, __m512i __Y)
5553{
5554 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5555}
5556
5557static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5558_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5559{
5560 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5561 (__v8di)_mm512_srav_epi64(__X, __Y),
5562 (__v8di)__W);
5563}
5564
5565static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5566_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5567{
5568 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5569 (__v8di)_mm512_srav_epi64(__X, __Y),
5570 (__v8di)_mm512_setzero_si512());
5571}
5572
5573static __inline__ __m512i __DEFAULT_FN_ATTRS512
5574_mm512_srl_epi32(__m512i __A, __m128i __B)
5575{
5576 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5577}
5578
5579static __inline__ __m512i __DEFAULT_FN_ATTRS512
5580_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5581{
5582 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5583 (__v16si)_mm512_srl_epi32(__A, __B),
5584 (__v16si)__W);
5585}
5586
5587static __inline__ __m512i __DEFAULT_FN_ATTRS512
5588_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5589{
5590 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5591 (__v16si)_mm512_srl_epi32(__A, __B),
5592 (__v16si)_mm512_setzero_si512());
5593}
5594
5595static __inline__ __m512i __DEFAULT_FN_ATTRS512
5596_mm512_srl_epi64(__m512i __A, __m128i __B)
5597{
5598 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5599}
5600
5601static __inline__ __m512i __DEFAULT_FN_ATTRS512
5602_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5603{
5604 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5605 (__v8di)_mm512_srl_epi64(__A, __B),
5606 (__v8di)__W);
5607}
5608
5609static __inline__ __m512i __DEFAULT_FN_ATTRS512
5610_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5611{
5612 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5613 (__v8di)_mm512_srl_epi64(__A, __B),
5614 (__v8di)_mm512_setzero_si512());
5615}
5616
5617static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5618_mm512_srlv_epi32(__m512i __X, __m512i __Y) {
5619 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5620}
5621
5622static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5623_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5624 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5625 (__v16si)_mm512_srlv_epi32(__X, __Y),
5626 (__v16si)__W);
5627}
5628
5629static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5630_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5631 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5632 (__v16si)_mm512_srlv_epi32(__X, __Y),
5633 (__v16si)_mm512_setzero_si512());
5634}
5635
5636static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5637_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
5638{
5639 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5640}
5641
5642static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5643_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5644{
5645 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5646 (__v8di)_mm512_srlv_epi64(__X, __Y),
5647 (__v8di)__W);
5648}
5649
5650static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5651_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5652{
5653 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5654 (__v8di)_mm512_srlv_epi64(__X, __Y),
5655 (__v8di)_mm512_setzero_si512());
5656}
5657
5658/// \enum _MM_TERNLOG_ENUM
5659/// A helper to represent the ternary logic operations among vector \a A,
5660/// \a B and \a C. The representation is passed to \a imm.
5666
5667#define _mm512_ternarylogic_epi32(A, B, C, imm) \
5668 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5669 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5670 (unsigned char)(imm), (__mmask16)-1))
5671
5672#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
5673 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5674 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5675 (unsigned char)(imm), (__mmask16)(U)))
5676
5677#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
5678 ((__m512i)__builtin_ia32_pternlogd512_maskz( \
5679 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5680 (unsigned char)(imm), (__mmask16)(U)))
5681
5682#define _mm512_ternarylogic_epi64(A, B, C, imm) \
5683 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5684 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5685 (unsigned char)(imm), (__mmask8)-1))
5686
5687#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5688 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5689 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5690 (unsigned char)(imm), (__mmask8)(U)))
5691
5692#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5693 ((__m512i)__builtin_ia32_pternlogq512_maskz( \
5694 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5695 (unsigned char)(imm), (__mmask8)(U)))
5696
5697#ifdef __x86_64__
5698#define _mm_cvt_roundsd_i64(A, R) \
5699 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5700#endif
5701
5702#define _mm_cvt_roundsd_si32(A, R) \
5703 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5704
5705#define _mm_cvt_roundsd_i32(A, R) \
5706 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5707
5708#define _mm_cvt_roundsd_u32(A, R) \
5709 ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5710
5711static __inline__ unsigned __DEFAULT_FN_ATTRS128
5712_mm_cvtsd_u32 (__m128d __A)
5713{
5714 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5716}
5717
5718#ifdef __x86_64__
5719#define _mm_cvt_roundsd_u64(A, R) \
5720 ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
5721 (int)(R)))
5722
5723static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5724_mm_cvtsd_u64 (__m128d __A)
5725{
5726 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
5727 __A,
5729}
5730#endif
5731
5732#define _mm_cvt_roundss_si32(A, R) \
5733 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5734
5735#define _mm_cvt_roundss_i32(A, R) \
5736 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5737
5738#ifdef __x86_64__
5739#define _mm_cvt_roundss_si64(A, R) \
5740 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5741
5742#define _mm_cvt_roundss_i64(A, R) \
5743 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5744#endif
5745
5746#define _mm_cvt_roundss_u32(A, R) \
5747 ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)))
5748
5749static __inline__ unsigned __DEFAULT_FN_ATTRS128
5750_mm_cvtss_u32 (__m128 __A)
5751{
5752 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
5754}
5755
5756#ifdef __x86_64__
5757#define _mm_cvt_roundss_u64(A, R) \
5758 ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
5759 (int)(R)))
5760
5761static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5762_mm_cvtss_u64 (__m128 __A)
5763{
5764 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
5765 __A,
5767}
5768#endif
5769
5770#define _mm_cvtt_roundsd_i32(A, R) \
5771 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
5772
5773#define _mm_cvtt_roundsd_si32(A, R) \
5774 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
5775
5776static __inline__ int __DEFAULT_FN_ATTRS128
5777_mm_cvttsd_i32 (__m128d __A)
5778{
5779 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
5781}
5782
5783#ifdef __x86_64__
5784#define _mm_cvtt_roundsd_si64(A, R) \
5785 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
5786
5787#define _mm_cvtt_roundsd_i64(A, R) \
5788 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
5789
5790static __inline__ long long __DEFAULT_FN_ATTRS128
5791_mm_cvttsd_i64 (__m128d __A)
5792{
5793 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
5795}
5796#endif
5797
5798#define _mm_cvtt_roundsd_u32(A, R) \
5799 ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5800
5801static __inline__ unsigned __DEFAULT_FN_ATTRS128
5802_mm_cvttsd_u32 (__m128d __A)
5803{
5804 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
5806}
5807
5808#ifdef __x86_64__
5809#define _mm_cvtt_roundsd_u64(A, R) \
5810 ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
5811 (int)(R)))
5812
5813static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5814_mm_cvttsd_u64 (__m128d __A)
5815{
5816 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
5817 __A,
5819}
5820#endif
5821
5822#define _mm_cvtt_roundss_i32(A, R) \
5823 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
5824
5825#define _mm_cvtt_roundss_si32(A, R) \
5826 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
5827
5828static __inline__ int __DEFAULT_FN_ATTRS128
5829_mm_cvttss_i32 (__m128 __A)
5830{
5831 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
5833}
5834
5835#ifdef __x86_64__
5836#define _mm_cvtt_roundss_i64(A, R) \
5837 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
5838
5839#define _mm_cvtt_roundss_si64(A, R) \
5840 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
5841
5842static __inline__ long long __DEFAULT_FN_ATTRS128
5843_mm_cvttss_i64 (__m128 __A)
5844{
5845 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
5847}
5848#endif
5849
5850#define _mm_cvtt_roundss_u32(A, R) \
5851 ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)))
5852
5853static __inline__ unsigned __DEFAULT_FN_ATTRS128
5854_mm_cvttss_u32 (__m128 __A)
5855{
5856 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
5858}
5859
5860#ifdef __x86_64__
5861#define _mm_cvtt_roundss_u64(A, R) \
5862 ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
5863 (int)(R)))
5864
5865static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5866_mm_cvttss_u64 (__m128 __A)
5867{
5868 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
5869 __A,
5871}
5872#endif
5873
5874#define _mm512_permute_pd(X, C) \
5875 ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)))
5876
5877#define _mm512_mask_permute_pd(W, U, X, C) \
5878 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
5879 (__v8df)_mm512_permute_pd((X), (C)), \
5880 (__v8df)(__m512d)(W)))
5881
5882#define _mm512_maskz_permute_pd(U, X, C) \
5883 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
5884 (__v8df)_mm512_permute_pd((X), (C)), \
5885 (__v8df)_mm512_setzero_pd()))
5886
5887#define _mm512_permute_ps(X, C) \
5888 ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)))
5889
5890#define _mm512_mask_permute_ps(W, U, X, C) \
5891 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
5892 (__v16sf)_mm512_permute_ps((X), (C)), \
5893 (__v16sf)(__m512)(W)))
5894
5895#define _mm512_maskz_permute_ps(U, X, C) \
5896 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
5897 (__v16sf)_mm512_permute_ps((X), (C)), \
5898 (__v16sf)_mm512_setzero_ps()))
5899
5900static __inline__ __m512d __DEFAULT_FN_ATTRS512
5901_mm512_permutevar_pd(__m512d __A, __m512i __C)
5902{
5903 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
5904}
5905
5906static __inline__ __m512d __DEFAULT_FN_ATTRS512
5907_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5908{
5909 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5910 (__v8df)_mm512_permutevar_pd(__A, __C),
5911 (__v8df)__W);
5912}
5913
5914static __inline__ __m512d __DEFAULT_FN_ATTRS512
5915_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
5916{
5917 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5918 (__v8df)_mm512_permutevar_pd(__A, __C),
5919 (__v8df)_mm512_setzero_pd());
5920}
5921
5922static __inline__ __m512 __DEFAULT_FN_ATTRS512
5923_mm512_permutevar_ps(__m512 __A, __m512i __C)
5924{
5925 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
5926}
5927
5928static __inline__ __m512 __DEFAULT_FN_ATTRS512
5929_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5930{
5931 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
5932 (__v16sf)_mm512_permutevar_ps(__A, __C),
5933 (__v16sf)__W);
5934}
5935
5936static __inline__ __m512 __DEFAULT_FN_ATTRS512
5937_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
5938{
5939 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
5940 (__v16sf)_mm512_permutevar_ps(__A, __C),
5941 (__v16sf)_mm512_setzero_ps());
5942}
5943
5944static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5945_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) {
5946 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
5947 (__v8df)__B);
5948}
5949
5950static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5951_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I,
5952 __m512d __B) {
5953 return (__m512d)__builtin_ia32_selectpd_512(__U,
5954 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5955 (__v8df)__A);
5956}
5957
5958static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5959_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
5960 __m512d __B) {
5961 return (__m512d)__builtin_ia32_selectpd_512(__U,
5962 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5963 (__v8df)(__m512d)__I);
5964}
5965
5966static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5967_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
5968 __m512d __B) {
5969 return (__m512d)__builtin_ia32_selectpd_512(__U,
5970 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5971 (__v8df)_mm512_setzero_pd());
5972}
5973
5974static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5975_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) {
5976 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
5977 (__v16sf) __B);
5978}
5979
5980static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5981_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I,
5982 __m512 __B) {
5983 return (__m512)__builtin_ia32_selectps_512(__U,
5984 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
5985 (__v16sf)__A);
5986}
5987
5988static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5989_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U,
5990 __m512 __B) {
5991 return (__m512)__builtin_ia32_selectps_512(__U,
5992 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
5993 (__v16sf)(__m512)__I);
5994}
5995
5996static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5997_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I,
5998 __m512 __B) {
5999 return (__m512)__builtin_ia32_selectps_512(__U,
6000 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6001 (__v16sf)_mm512_setzero_ps());
6002}
6003
6004#define _mm512_cvtt_roundpd_epu32(A, R) \
6005 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6006 (__v8si)_mm256_undefined_si256(), \
6007 (__mmask8)-1, (int)(R)))
6008
6009#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
6010 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6011 (__v8si)(__m256i)(W), \
6012 (__mmask8)(U), (int)(R)))
6013
6014#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
6015 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6016 (__v8si)_mm256_setzero_si256(), \
6017 (__mmask8)(U), (int)(R)))
6018
6019static __inline__ __m256i __DEFAULT_FN_ATTRS512
6021{
6022 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6023 (__v8si)
6025 (__mmask8) -1,
6027}
6028
6029static __inline__ __m256i __DEFAULT_FN_ATTRS512
6030_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6031{
6032 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6033 (__v8si) __W,
6034 (__mmask8) __U,
6036}
6037
6038static __inline__ __m256i __DEFAULT_FN_ATTRS512
6040{
6041 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6042 (__v8si)
6044 (__mmask8) __U,
6046}
6047
6048#define _mm_roundscale_round_sd(A, B, imm, R) \
6049 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6050 (__v2df)(__m128d)(B), \
6051 (__v2df)_mm_setzero_pd(), \
6052 (__mmask8)-1, (int)(imm), \
6053 (int)(R)))
6054
6055#define _mm_roundscale_sd(A, B, imm) \
6056 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6057 (__v2df)(__m128d)(B), \
6058 (__v2df)_mm_setzero_pd(), \
6059 (__mmask8)-1, (int)(imm), \
6060 _MM_FROUND_CUR_DIRECTION))
6061
6062#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6063 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6064 (__v2df)(__m128d)(B), \
6065 (__v2df)(__m128d)(W), \
6066 (__mmask8)(U), (int)(imm), \
6067 _MM_FROUND_CUR_DIRECTION))
6068
6069#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6070 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6071 (__v2df)(__m128d)(B), \
6072 (__v2df)(__m128d)(W), \
6073 (__mmask8)(U), (int)(I), \
6074 (int)(R)))
6075
6076#define _mm_maskz_roundscale_sd(U, A, B, I) \
6077 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6078 (__v2df)(__m128d)(B), \
6079 (__v2df)_mm_setzero_pd(), \
6080 (__mmask8)(U), (int)(I), \
6081 _MM_FROUND_CUR_DIRECTION))
6082
6083#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6084 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6085 (__v2df)(__m128d)(B), \
6086 (__v2df)_mm_setzero_pd(), \
6087 (__mmask8)(U), (int)(I), \
6088 (int)(R)))
6089
6090#define _mm_roundscale_round_ss(A, B, imm, R) \
6091 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6092 (__v4sf)(__m128)(B), \
6093 (__v4sf)_mm_setzero_ps(), \
6094 (__mmask8)-1, (int)(imm), \
6095 (int)(R)))
6096
6097#define _mm_roundscale_ss(A, B, imm) \
6098 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6099 (__v4sf)(__m128)(B), \
6100 (__v4sf)_mm_setzero_ps(), \
6101 (__mmask8)-1, (int)(imm), \
6102 _MM_FROUND_CUR_DIRECTION))
6103
6104#define _mm_mask_roundscale_ss(W, U, A, B, I) \
6105 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6106 (__v4sf)(__m128)(B), \
6107 (__v4sf)(__m128)(W), \
6108 (__mmask8)(U), (int)(I), \
6109 _MM_FROUND_CUR_DIRECTION))
6110
6111#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6112 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6113 (__v4sf)(__m128)(B), \
6114 (__v4sf)(__m128)(W), \
6115 (__mmask8)(U), (int)(I), \
6116 (int)(R)))
6117
6118#define _mm_maskz_roundscale_ss(U, A, B, I) \
6119 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6120 (__v4sf)(__m128)(B), \
6121 (__v4sf)_mm_setzero_ps(), \
6122 (__mmask8)(U), (int)(I), \
6123 _MM_FROUND_CUR_DIRECTION))
6124
6125#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6126 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6127 (__v4sf)(__m128)(B), \
6128 (__v4sf)_mm_setzero_ps(), \
6129 (__mmask8)(U), (int)(I), \
6130 (int)(R)))
6131
6132#define _mm512_scalef_round_pd(A, B, R) \
6133 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6134 (__v8df)(__m512d)(B), \
6135 (__v8df)_mm512_undefined_pd(), \
6136 (__mmask8)-1, (int)(R)))
6137
6138#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6139 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6140 (__v8df)(__m512d)(B), \
6141 (__v8df)(__m512d)(W), \
6142 (__mmask8)(U), (int)(R)))
6143
6144#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6145 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6146 (__v8df)(__m512d)(B), \
6147 (__v8df)_mm512_setzero_pd(), \
6148 (__mmask8)(U), (int)(R)))
6149
6150static __inline__ __m512d __DEFAULT_FN_ATTRS512
6151_mm512_scalef_pd (__m512d __A, __m512d __B)
6152{
6153 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6154 (__v8df) __B,
6155 (__v8df)
6157 (__mmask8) -1,
6159}
6160
6161static __inline__ __m512d __DEFAULT_FN_ATTRS512
6162_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6163{
6164 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6165 (__v8df) __B,
6166 (__v8df) __W,
6167 (__mmask8) __U,
6169}
6170
6171static __inline__ __m512d __DEFAULT_FN_ATTRS512
6172_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6173{
6174 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6175 (__v8df) __B,
6176 (__v8df)
6178 (__mmask8) __U,
6180}
6181
6182#define _mm512_scalef_round_ps(A, B, R) \
6183 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6184 (__v16sf)(__m512)(B), \
6185 (__v16sf)_mm512_undefined_ps(), \
6186 (__mmask16)-1, (int)(R)))
6187
6188#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6189 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6190 (__v16sf)(__m512)(B), \
6191 (__v16sf)(__m512)(W), \
6192 (__mmask16)(U), (int)(R)))
6193
6194#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6195 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6196 (__v16sf)(__m512)(B), \
6197 (__v16sf)_mm512_setzero_ps(), \
6198 (__mmask16)(U), (int)(R)))
6199
6200static __inline__ __m512 __DEFAULT_FN_ATTRS512
6201_mm512_scalef_ps (__m512 __A, __m512 __B)
6202{
6203 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6204 (__v16sf) __B,
6205 (__v16sf)
6207 (__mmask16) -1,
6209}
6210
6211static __inline__ __m512 __DEFAULT_FN_ATTRS512
6212_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6213{
6214 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6215 (__v16sf) __B,
6216 (__v16sf) __W,
6217 (__mmask16) __U,
6219}
6220
6221static __inline__ __m512 __DEFAULT_FN_ATTRS512
6222_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6223{
6224 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6225 (__v16sf) __B,
6226 (__v16sf)
6228 (__mmask16) __U,
6230}
6231
6232#define _mm_scalef_round_sd(A, B, R) \
6233 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6234 (__v2df)(__m128d)(B), \
6235 (__v2df)_mm_setzero_pd(), \
6236 (__mmask8)-1, (int)(R)))
6237
6238static __inline__ __m128d __DEFAULT_FN_ATTRS128
6239_mm_scalef_sd (__m128d __A, __m128d __B)
6240{
6241 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6242 (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6243 (__mmask8) -1,
6245}
6246
6247static __inline__ __m128d __DEFAULT_FN_ATTRS128
6248_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6249{
6250 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6251 (__v2df) __B,
6252 (__v2df) __W,
6253 (__mmask8) __U,
6255}
6256
6257#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6258 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6259 (__v2df)(__m128d)(B), \
6260 (__v2df)(__m128d)(W), \
6261 (__mmask8)(U), (int)(R)))
6262
6263static __inline__ __m128d __DEFAULT_FN_ATTRS128
6264_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6265{
6266 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6267 (__v2df) __B,
6268 (__v2df) _mm_setzero_pd (),
6269 (__mmask8) __U,
6271}
6272
6273#define _mm_maskz_scalef_round_sd(U, A, B, R) \
6274 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6275 (__v2df)(__m128d)(B), \
6276 (__v2df)_mm_setzero_pd(), \
6277 (__mmask8)(U), (int)(R)))
6278
6279#define _mm_scalef_round_ss(A, B, R) \
6280 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6281 (__v4sf)(__m128)(B), \
6282 (__v4sf)_mm_setzero_ps(), \
6283 (__mmask8)-1, (int)(R)))
6284
6285static __inline__ __m128 __DEFAULT_FN_ATTRS128
6286_mm_scalef_ss (__m128 __A, __m128 __B)
6287{
6288 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6289 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6290 (__mmask8) -1,
6292}
6293
6294static __inline__ __m128 __DEFAULT_FN_ATTRS128
6295_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6296{
6297 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6298 (__v4sf) __B,
6299 (__v4sf) __W,
6300 (__mmask8) __U,
6302}
6303
6304#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6305 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6306 (__v4sf)(__m128)(B), \
6307 (__v4sf)(__m128)(W), \
6308 (__mmask8)(U), (int)(R)))
6309
6310static __inline__ __m128 __DEFAULT_FN_ATTRS128
6311_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6312{
6313 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6314 (__v4sf) __B,
6315 (__v4sf) _mm_setzero_ps (),
6316 (__mmask8) __U,
6318}
6319
6320#define _mm_maskz_scalef_round_ss(U, A, B, R) \
6321 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6322 (__v4sf)(__m128)(B), \
6323 (__v4sf)_mm_setzero_ps(), \
6324 (__mmask8)(U), \
6325 (int)(R)))
6326
6327static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6328_mm512_srai_epi32(__m512i __A, unsigned int __B) {
6329 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (int)__B);
6330}
6331
6332static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6333_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A,
6334 unsigned int __B) {
6335 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6336 (__v16si)_mm512_srai_epi32(__A, __B),
6337 (__v16si)__W);
6338}
6339
6340static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6341_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
6342 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6343 (__v16si)_mm512_srai_epi32(__A, __B),
6344 (__v16si)_mm512_setzero_si512());
6345}
6346
6347static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6348_mm512_srai_epi64(__m512i __A, unsigned int __B) {
6349 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (int)__B);
6350}
6351
6352static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6353_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A,
6354 unsigned int __B) {
6355 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6356 (__v8di)_mm512_srai_epi64(__A, __B),
6357 (__v8di)__W);
6358}
6359
6360static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6361_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
6362 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6363 (__v8di)_mm512_srai_epi64(__A, __B),
6364 (__v8di)_mm512_setzero_si512());
6365}
6366
6367#define _mm512_shuffle_f32x4(A, B, imm) \
6368 ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6369 (__v16sf)(__m512)(B), (int)(imm)))
6370
6371#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6372 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6373 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6374 (__v16sf)(__m512)(W)))
6375
6376#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6377 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6378 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6379 (__v16sf)_mm512_setzero_ps()))
6380
6381#define _mm512_shuffle_f64x2(A, B, imm) \
6382 ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6383 (__v8df)(__m512d)(B), (int)(imm)))
6384
6385#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6386 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6387 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6388 (__v8df)(__m512d)(W)))
6389
6390#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6391 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6392 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6393 (__v8df)_mm512_setzero_pd()))
6394
6395#define _mm512_shuffle_i32x4(A, B, imm) \
6396 ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6397 (__v16si)(__m512i)(B), (int)(imm)))
6398
6399#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6400 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6401 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6402 (__v16si)(__m512i)(W)))
6403
6404#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6405 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6406 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6407 (__v16si)_mm512_setzero_si512()))
6408
6409#define _mm512_shuffle_i64x2(A, B, imm) \
6410 ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6411 (__v8di)(__m512i)(B), (int)(imm)))
6412
6413#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6414 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6415 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6416 (__v8di)(__m512i)(W)))
6417
6418#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6419 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6420 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6421 (__v8di)_mm512_setzero_si512()))
6422
6423#define _mm512_shuffle_pd(A, B, M) \
6424 ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6425 (__v8df)(__m512d)(B), (int)(M)))
6426
6427#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6428 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6429 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6430 (__v8df)(__m512d)(W)))
6431
6432#define _mm512_maskz_shuffle_pd(U, A, B, M) \
6433 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6434 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6435 (__v8df)_mm512_setzero_pd()))
6436
6437#define _mm512_shuffle_ps(A, B, M) \
6438 ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6439 (__v16sf)(__m512)(B), (int)(M)))
6440
6441#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6442 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6443 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6444 (__v16sf)(__m512)(W)))
6445
6446#define _mm512_maskz_shuffle_ps(U, A, B, M) \
6447 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6448 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6449 (__v16sf)_mm512_setzero_ps()))
6450
6451#define _mm_sqrt_round_sd(A, B, R) \
6452 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6453 (__v2df)(__m128d)(B), \
6454 (__v2df)_mm_setzero_pd(), \
6455 (__mmask8)-1, (int)(R)))
6456
6457static __inline__ __m128d __DEFAULT_FN_ATTRS128
6458_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6459{
6460 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6461 (__v2df) __B,
6462 (__v2df) __W,
6463 (__mmask8) __U,
6465}
6466
6467#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6468 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6469 (__v2df)(__m128d)(B), \
6470 (__v2df)(__m128d)(W), \
6471 (__mmask8)(U), (int)(R)))
6472
6473static __inline__ __m128d __DEFAULT_FN_ATTRS128
6474_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
6475{
6476 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6477 (__v2df) __B,
6478 (__v2df) _mm_setzero_pd (),
6479 (__mmask8) __U,
6481}
6482
6483#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6484 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6485 (__v2df)(__m128d)(B), \
6486 (__v2df)_mm_setzero_pd(), \
6487 (__mmask8)(U), (int)(R)))
6488
6489#define _mm_sqrt_round_ss(A, B, R) \
6490 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6491 (__v4sf)(__m128)(B), \
6492 (__v4sf)_mm_setzero_ps(), \
6493 (__mmask8)-1, (int)(R)))
6494
6495static __inline__ __m128 __DEFAULT_FN_ATTRS128
6496_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6497{
6498 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6499 (__v4sf) __B,
6500 (__v4sf) __W,
6501 (__mmask8) __U,
6503}
6504
6505#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6506 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6507 (__v4sf)(__m128)(B), \
6508 (__v4sf)(__m128)(W), (__mmask8)(U), \
6509 (int)(R)))
6510
6511static __inline__ __m128 __DEFAULT_FN_ATTRS128
6512_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
6513{
6514 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6515 (__v4sf) __B,
6516 (__v4sf) _mm_setzero_ps (),
6517 (__mmask8) __U,
6519}
6520
6521#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6522 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6523 (__v4sf)(__m128)(B), \
6524 (__v4sf)_mm_setzero_ps(), \
6525 (__mmask8)(U), (int)(R)))
6526
6527static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6529 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6530 0, 1, 2, 3, 0, 1, 2, 3,
6531 0, 1, 2, 3, 0, 1, 2, 3);
6532}
6533
6534static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6535_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) {
6536 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6537 (__v16sf)_mm512_broadcast_f32x4(__A),
6538 (__v16sf)__O);
6539}
6540
6541static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6543 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6544 (__v16sf)_mm512_broadcast_f32x4(__A),
6545 (__v16sf)_mm512_setzero_ps());
6546}
6547
6548static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6550 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6551 0, 1, 2, 3, 0, 1, 2, 3);
6552}
6553
6554static __inline__ __m512d __DEFAULT_FN_ATTRS512
6555_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
6556{
6557 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6558 (__v8df)_mm512_broadcast_f64x4(__A),
6559 (__v8df)__O);
6560}
6561
6562static __inline__ __m512d __DEFAULT_FN_ATTRS512
6564{
6565 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6566 (__v8df)_mm512_broadcast_f64x4(__A),
6567 (__v8df)_mm512_setzero_pd());
6568}
6569
6570static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6572 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6573 0, 1, 2, 3, 0, 1, 2, 3,
6574 0, 1, 2, 3, 0, 1, 2, 3);
6575}
6576
6577static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6578_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) {
6579 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6580 (__v16si)_mm512_broadcast_i32x4(__A),
6581 (__v16si)__O);
6582}
6583
6584static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6586 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6587 (__v16si)_mm512_broadcast_i32x4(__A),
6588 (__v16si)_mm512_setzero_si512());
6589}
6590
6591static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6593 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6594 0, 1, 2, 3, 0, 1, 2, 3);
6595}
6596
6597static __inline__ __m512i __DEFAULT_FN_ATTRS512
6598_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
6599{
6600 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6601 (__v8di)_mm512_broadcast_i64x4(__A),
6602 (__v8di)__O);
6603}
6604
6605static __inline__ __m512i __DEFAULT_FN_ATTRS512
6607{
6608 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6609 (__v8di)_mm512_broadcast_i64x4(__A),
6610 (__v8di)_mm512_setzero_si512());
6611}
6612
6613static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6614_mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A) {
6615 return (__m512d)__builtin_ia32_selectpd_512(__M,
6616 (__v8df) _mm512_broadcastsd_pd(__A),
6617 (__v8df) __O);
6618}
6619
6620static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6622 return (__m512d)__builtin_ia32_selectpd_512(__M,
6623 (__v8df) _mm512_broadcastsd_pd(__A),
6624 (__v8df) _mm512_setzero_pd());
6625}
6626
6627static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6628_mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A) {
6629 return (__m512)__builtin_ia32_selectps_512(__M,
6630 (__v16sf) _mm512_broadcastss_ps(__A),
6631 (__v16sf) __O);
6632}
6633
6634static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6636 return (__m512)__builtin_ia32_selectps_512(__M,
6637 (__v16sf) _mm512_broadcastss_ps(__A),
6638 (__v16sf) _mm512_setzero_ps());
6639}
6640
6641static __inline__ __m128i __DEFAULT_FN_ATTRS512
6643{
6644 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6645 (__v16qi) _mm_undefined_si128 (),
6646 (__mmask16) -1);
6647}
6648
6649static __inline__ __m128i __DEFAULT_FN_ATTRS512
6650_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6651{
6652 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6653 (__v16qi) __O, __M);
6654}
6655
6656static __inline__ __m128i __DEFAULT_FN_ATTRS512
6658{
6659 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6660 (__v16qi) _mm_setzero_si128 (),
6661 __M);
6662}
6663
6664static __inline__ void __DEFAULT_FN_ATTRS512
6666{
6667 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6668}
6669
6670static __inline__ __m256i __DEFAULT_FN_ATTRS512
6672{
6673 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6674 (__v16hi) _mm256_undefined_si256 (),
6675 (__mmask16) -1);
6676}
6677
6678static __inline__ __m256i __DEFAULT_FN_ATTRS512
6679_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6680{
6681 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6682 (__v16hi) __O, __M);
6683}
6684
6685static __inline__ __m256i __DEFAULT_FN_ATTRS512
6687{
6688 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6689 (__v16hi) _mm256_setzero_si256 (),
6690 __M);
6691}
6692
6693static __inline__ void __DEFAULT_FN_ATTRS512
6695{
6696 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6697}
6698
6699static __inline__ __m128i __DEFAULT_FN_ATTRS512
6701{
6702 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6703 (__v16qi) _mm_undefined_si128 (),
6704 (__mmask8) -1);
6705}
6706
6707static __inline__ __m128i __DEFAULT_FN_ATTRS512
6708_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6709{
6710 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6711 (__v16qi) __O, __M);
6712}
6713
6714static __inline__ __m128i __DEFAULT_FN_ATTRS512
6716{
6717 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6718 (__v16qi) _mm_setzero_si128 (),
6719 __M);
6720}
6721
6722static __inline__ void __DEFAULT_FN_ATTRS512
6724{
6725 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6726}
6727
6728static __inline__ __m256i __DEFAULT_FN_ATTRS512
6730{
6731 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6732 (__v8si) _mm256_undefined_si256 (),
6733 (__mmask8) -1);
6734}
6735
6736static __inline__ __m256i __DEFAULT_FN_ATTRS512
6737_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6738{
6739 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6740 (__v8si) __O, __M);
6741}
6742
6743static __inline__ __m256i __DEFAULT_FN_ATTRS512
6745{
6746 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6747 (__v8si) _mm256_setzero_si256 (),
6748 __M);
6749}
6750
6751static __inline__ void __DEFAULT_FN_ATTRS512
6753{
6754 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
6755}
6756
6757static __inline__ __m128i __DEFAULT_FN_ATTRS512
6759{
6760 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6761 (__v8hi) _mm_undefined_si128 (),
6762 (__mmask8) -1);
6763}
6764
6765static __inline__ __m128i __DEFAULT_FN_ATTRS512
6766_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6767{
6768 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6769 (__v8hi) __O, __M);
6770}
6771
6772static __inline__ __m128i __DEFAULT_FN_ATTRS512
6774{
6775 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6776 (__v8hi) _mm_setzero_si128 (),
6777 __M);
6778}
6779
6780static __inline__ void __DEFAULT_FN_ATTRS512
6782{
6783 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
6784}
6785
6786static __inline__ __m128i __DEFAULT_FN_ATTRS512
6788{
6789 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6790 (__v16qi) _mm_undefined_si128 (),
6791 (__mmask16) -1);
6792}
6793
6794static __inline__ __m128i __DEFAULT_FN_ATTRS512
6795_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6796{
6797 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6798 (__v16qi) __O,
6799 __M);
6800}
6801
6802static __inline__ __m128i __DEFAULT_FN_ATTRS512
6804{
6805 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6806 (__v16qi) _mm_setzero_si128 (),
6807 __M);
6808}
6809
6810static __inline__ void __DEFAULT_FN_ATTRS512
6812{
6813 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6814}
6815
6816static __inline__ __m256i __DEFAULT_FN_ATTRS512
6818{
6819 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6820 (__v16hi) _mm256_undefined_si256 (),
6821 (__mmask16) -1);
6822}
6823
6824static __inline__ __m256i __DEFAULT_FN_ATTRS512
6825_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6826{
6827 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6828 (__v16hi) __O,
6829 __M);
6830}
6831
6832static __inline__ __m256i __DEFAULT_FN_ATTRS512
6834{
6835 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6836 (__v16hi) _mm256_setzero_si256 (),
6837 __M);
6838}
6839
6840static __inline__ void __DEFAULT_FN_ATTRS512
6842{
6843 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6844}
6845
6846static __inline__ __m128i __DEFAULT_FN_ATTRS512
6848{
6849 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6850 (__v16qi) _mm_undefined_si128 (),
6851 (__mmask8) -1);
6852}
6853
6854static __inline__ __m128i __DEFAULT_FN_ATTRS512
6855_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6856{
6857 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6858 (__v16qi) __O,
6859 __M);
6860}
6861
6862static __inline__ __m128i __DEFAULT_FN_ATTRS512
6864{
6865 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6866 (__v16qi) _mm_setzero_si128 (),
6867 __M);
6868}
6869
6870static __inline__ void __DEFAULT_FN_ATTRS512
6872{
6873 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6874}
6875
6876static __inline__ __m256i __DEFAULT_FN_ATTRS512
6878{
6879 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6880 (__v8si) _mm256_undefined_si256 (),
6881 (__mmask8) -1);
6882}
6883
6884static __inline__ __m256i __DEFAULT_FN_ATTRS512
6885_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6886{
6887 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6888 (__v8si) __O, __M);
6889}
6890
6891static __inline__ __m256i __DEFAULT_FN_ATTRS512
6893{
6894 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6895 (__v8si) _mm256_setzero_si256 (),
6896 __M);
6897}
6898
6899static __inline__ void __DEFAULT_FN_ATTRS512
6901{
6902 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
6903}
6904
6905static __inline__ __m128i __DEFAULT_FN_ATTRS512
6907{
6908 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6909 (__v8hi) _mm_undefined_si128 (),
6910 (__mmask8) -1);
6911}
6912
6913static __inline__ __m128i __DEFAULT_FN_ATTRS512
6914_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6915{
6916 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6917 (__v8hi) __O, __M);
6918}
6919
6920static __inline__ __m128i __DEFAULT_FN_ATTRS512
6922{
6923 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6924 (__v8hi) _mm_setzero_si128 (),
6925 __M);
6926}
6927
6928static __inline__ void __DEFAULT_FN_ATTRS512
6930{
6931 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
6932}
6933
6934static __inline__ __m128i __DEFAULT_FN_ATTRS512
6936{
6937 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6938 (__v16qi) _mm_undefined_si128 (),
6939 (__mmask16) -1);
6940}
6941
6942static __inline__ __m128i __DEFAULT_FN_ATTRS512
6943_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6944{
6945 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6946 (__v16qi) __O, __M);
6947}
6948
6949static __inline__ __m128i __DEFAULT_FN_ATTRS512
6951{
6952 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6953 (__v16qi) _mm_setzero_si128 (),
6954 __M);
6955}
6956
6957static __inline__ void __DEFAULT_FN_ATTRS512
6959{
6960 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6961}
6962
6963static __inline__ __m256i __DEFAULT_FN_ATTRS512
6965{
6966 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6967 (__v16hi) _mm256_undefined_si256 (),
6968 (__mmask16) -1);
6969}
6970
6971static __inline__ __m256i __DEFAULT_FN_ATTRS512
6972_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6973{
6974 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6975 (__v16hi) __O, __M);
6976}
6977
6978static __inline__ __m256i __DEFAULT_FN_ATTRS512
6980{
6981 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6982 (__v16hi) _mm256_setzero_si256 (),
6983 __M);
6984}
6985
6986static __inline__ void __DEFAULT_FN_ATTRS512
6988{
6989 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
6990}
6991
6992static __inline__ __m128i __DEFAULT_FN_ATTRS512
6994{
6995 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6996 (__v16qi) _mm_undefined_si128 (),
6997 (__mmask8) -1);
6998}
6999
7000static __inline__ __m128i __DEFAULT_FN_ATTRS512
7001_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7002{
7003 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7004 (__v16qi) __O, __M);
7005}
7006
7007static __inline__ __m128i __DEFAULT_FN_ATTRS512
7009{
7010 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7011 (__v16qi) _mm_setzero_si128 (),
7012 __M);
7013}
7014
7015static __inline__ void __DEFAULT_FN_ATTRS512
7017{
7018 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7019}
7020
7021static __inline__ __m256i __DEFAULT_FN_ATTRS512
7023{
7024 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7025 (__v8si) _mm256_undefined_si256 (),
7026 (__mmask8) -1);
7027}
7028
7029static __inline__ __m256i __DEFAULT_FN_ATTRS512
7030_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7031{
7032 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7033 (__v8si) __O, __M);
7034}
7035
7036static __inline__ __m256i __DEFAULT_FN_ATTRS512
7038{
7039 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7040 (__v8si) _mm256_setzero_si256 (),
7041 __M);
7042}
7043
7044static __inline__ void __DEFAULT_FN_ATTRS512
7046{
7047 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7048}
7049
7050static __inline__ __m128i __DEFAULT_FN_ATTRS512
7052{
7053 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7054 (__v8hi) _mm_undefined_si128 (),
7055 (__mmask8) -1);
7056}
7057
7058static __inline__ __m128i __DEFAULT_FN_ATTRS512
7059_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7060{
7061 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7062 (__v8hi) __O, __M);
7063}
7064
7065static __inline__ __m128i __DEFAULT_FN_ATTRS512
7067{
7068 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7069 (__v8hi) _mm_setzero_si128 (),
7070 __M);
7071}
7072
7073static __inline__ void __DEFAULT_FN_ATTRS512
7075{
7076 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7077}
7078
7079#define _mm512_extracti32x4_epi32(A, imm) \
7080 ((__m128i)__builtin_ia32_extracti32x4_mask( \
7081 (__v16si)(__m512i)(A), (int)(imm), (__v4si)_mm_setzero_si128(), \
7082 (__mmask8) - 1))
7083
7084#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7085 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7086 (__v4si)(__m128i)(W), \
7087 (__mmask8)(U)))
7088
7089#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7090 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7091 (__v4si)_mm_setzero_si128(), \
7092 (__mmask8)(U)))
7093
7094#define _mm512_extracti64x4_epi64(A, imm) \
7095 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7096 (__v4di)_mm256_setzero_si256(), \
7097 (__mmask8) - 1))
7098
7099#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7100 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7101 (__v4di)(__m256i)(W), \
7102 (__mmask8)(U)))
7103
7104#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7105 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7106 (__v4di)_mm256_setzero_si256(), \
7107 (__mmask8)(U)))
7108
7109#define _mm512_insertf64x4(A, B, imm) \
7110 ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7111 (__v4df)(__m256d)(B), (int)(imm)))
7112
7113#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7114 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7115 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7116 (__v8df)(__m512d)(W)))
7117
7118#define _mm512_maskz_insertf64x4(U, A, B, imm) \
7119 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7120 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7121 (__v8df)_mm512_setzero_pd()))
7122
7123#define _mm512_inserti64x4(A, B, imm) \
7124 ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7125 (__v4di)(__m256i)(B), (int)(imm)))
7126
7127#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7128 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7129 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7130 (__v8di)(__m512i)(W)))
7131
7132#define _mm512_maskz_inserti64x4(U, A, B, imm) \
7133 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7134 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7135 (__v8di)_mm512_setzero_si512()))
7136
7137#define _mm512_insertf32x4(A, B, imm) \
7138 ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7139 (__v4sf)(__m128)(B), (int)(imm)))
7140
7141#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7142 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7143 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7144 (__v16sf)(__m512)(W)))
7145
7146#define _mm512_maskz_insertf32x4(U, A, B, imm) \
7147 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7148 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7149 (__v16sf)_mm512_setzero_ps()))
7150
7151#define _mm512_inserti32x4(A, B, imm) \
7152 ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7153 (__v4si)(__m128i)(B), (int)(imm)))
7154
7155#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7156 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7157 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7158 (__v16si)(__m512i)(W)))
7159
7160#define _mm512_maskz_inserti32x4(U, A, B, imm) \
7161 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7162 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7163 (__v16si)_mm512_setzero_si512()))
7164
7165#define _mm512_getmant_round_pd(A, B, C, R) \
7166 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7167 (int)(((C)<<2) | (B)), \
7168 (__v8df)_mm512_undefined_pd(), \
7169 (__mmask8)-1, (int)(R)))
7170
7171#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7172 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7173 (int)(((C)<<2) | (B)), \
7174 (__v8df)(__m512d)(W), \
7175 (__mmask8)(U), (int)(R)))
7176
7177#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7178 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7179 (int)(((C)<<2) | (B)), \
7180 (__v8df)_mm512_setzero_pd(), \
7181 (__mmask8)(U), (int)(R)))
7182
7183#define _mm512_getmant_pd(A, B, C) \
7184 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7185 (int)(((C)<<2) | (B)), \
7186 (__v8df)_mm512_setzero_pd(), \
7187 (__mmask8)-1, \
7188 _MM_FROUND_CUR_DIRECTION))
7189
7190#define _mm512_mask_getmant_pd(W, U, A, B, C) \
7191 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7192 (int)(((C)<<2) | (B)), \
7193 (__v8df)(__m512d)(W), \
7194 (__mmask8)(U), \
7195 _MM_FROUND_CUR_DIRECTION))
7196
7197#define _mm512_maskz_getmant_pd(U, A, B, C) \
7198 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7199 (int)(((C)<<2) | (B)), \
7200 (__v8df)_mm512_setzero_pd(), \
7201 (__mmask8)(U), \
7202 _MM_FROUND_CUR_DIRECTION))
7203
7204#define _mm512_getmant_round_ps(A, B, C, R) \
7205 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7206 (int)(((C)<<2) | (B)), \
7207 (__v16sf)_mm512_undefined_ps(), \
7208 (__mmask16)-1, (int)(R)))
7209
7210#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7211 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7212 (int)(((C)<<2) | (B)), \
7213 (__v16sf)(__m512)(W), \
7214 (__mmask16)(U), (int)(R)))
7215
7216#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7217 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7218 (int)(((C)<<2) | (B)), \
7219 (__v16sf)_mm512_setzero_ps(), \
7220 (__mmask16)(U), (int)(R)))
7221
7222#define _mm512_getmant_ps(A, B, C) \
7223 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7224 (int)(((C)<<2)|(B)), \
7225 (__v16sf)_mm512_undefined_ps(), \
7226 (__mmask16)-1, \
7227 _MM_FROUND_CUR_DIRECTION))
7228
7229#define _mm512_mask_getmant_ps(W, U, A, B, C) \
7230 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7231 (int)(((C)<<2)|(B)), \
7232 (__v16sf)(__m512)(W), \
7233 (__mmask16)(U), \
7234 _MM_FROUND_CUR_DIRECTION))
7235
7236#define _mm512_maskz_getmant_ps(U, A, B, C) \
7237 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7238 (int)(((C)<<2)|(B)), \
7239 (__v16sf)_mm512_setzero_ps(), \
7240 (__mmask16)(U), \
7241 _MM_FROUND_CUR_DIRECTION))
7242
7243#define _mm512_getexp_round_pd(A, R) \
7244 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7245 (__v8df)_mm512_undefined_pd(), \
7246 (__mmask8)-1, (int)(R)))
7247
7248#define _mm512_mask_getexp_round_pd(W, U, A, R) \
7249 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7250 (__v8df)(__m512d)(W), \
7251 (__mmask8)(U), (int)(R)))
7252
7253#define _mm512_maskz_getexp_round_pd(U, A, R) \
7254 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7255 (__v8df)_mm512_setzero_pd(), \
7256 (__mmask8)(U), (int)(R)))
7257
7258static __inline__ __m512d __DEFAULT_FN_ATTRS512
7259_mm512_getexp_pd (__m512d __A)
7260{
7261 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7262 (__v8df) _mm512_undefined_pd (),
7263 (__mmask8) -1,
7265}
7266
7267static __inline__ __m512d __DEFAULT_FN_ATTRS512
7268_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7269{
7270 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7271 (__v8df) __W,
7272 (__mmask8) __U,
7274}
7275
7276static __inline__ __m512d __DEFAULT_FN_ATTRS512
7278{
7279 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7280 (__v8df) _mm512_setzero_pd (),
7281 (__mmask8) __U,
7283}
7284
7285#define _mm512_getexp_round_ps(A, R) \
7286 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7287 (__v16sf)_mm512_undefined_ps(), \
7288 (__mmask16)-1, (int)(R)))
7289
7290#define _mm512_mask_getexp_round_ps(W, U, A, R) \
7291 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7292 (__v16sf)(__m512)(W), \
7293 (__mmask16)(U), (int)(R)))
7294
7295#define _mm512_maskz_getexp_round_ps(U, A, R) \
7296 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7297 (__v16sf)_mm512_setzero_ps(), \
7298 (__mmask16)(U), (int)(R)))
7299
7300static __inline__ __m512 __DEFAULT_FN_ATTRS512
7302{
7303 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7304 (__v16sf) _mm512_undefined_ps (),
7305 (__mmask16) -1,
7307}
7308
7309static __inline__ __m512 __DEFAULT_FN_ATTRS512
7310_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
7311{
7312 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7313 (__v16sf) __W,
7314 (__mmask16) __U,
7316}
7317
7318static __inline__ __m512 __DEFAULT_FN_ATTRS512
7320{
7321 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7322 (__v16sf) _mm512_setzero_ps (),
7323 (__mmask16) __U,
7325}
7326
7327#define _mm512_i64gather_ps(index, addr, scale) \
7328 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7329 (void const *)(addr), \
7330 (__v8di)(__m512i)(index), (__mmask8)-1, \
7331 (int)(scale)))
7332
7333#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7334 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7335 (void const *)(addr), \
7336 (__v8di)(__m512i)(index), \
7337 (__mmask8)(mask), (int)(scale)))
7338
7339#define _mm512_i64gather_epi32(index, addr, scale) \
7340 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7341 (void const *)(addr), \
7342 (__v8di)(__m512i)(index), \
7343 (__mmask8)-1, (int)(scale)))
7344
7345#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7346 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7347 (void const *)(addr), \
7348 (__v8di)(__m512i)(index), \
7349 (__mmask8)(mask), (int)(scale)))
7350
7351#define _mm512_i64gather_pd(index, addr, scale) \
7352 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7353 (void const *)(addr), \
7354 (__v8di)(__m512i)(index), (__mmask8)-1, \
7355 (int)(scale)))
7356
7357#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7358 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7359 (void const *)(addr), \
7360 (__v8di)(__m512i)(index), \
7361 (__mmask8)(mask), (int)(scale)))
7362
7363#define _mm512_i64gather_epi64(index, addr, scale) \
7364 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7365 (void const *)(addr), \
7366 (__v8di)(__m512i)(index), (__mmask8)-1, \
7367 (int)(scale)))
7368
7369#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7370 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7371 (void const *)(addr), \
7372 (__v8di)(__m512i)(index), \
7373 (__mmask8)(mask), (int)(scale)))
7374
7375#define _mm512_i32gather_ps(index, addr, scale) \
7376 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7377 (void const *)(addr), \
7378 (__v16si)(__m512)(index), \
7379 (__mmask16)-1, (int)(scale)))
7380
7381#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7382 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7383 (void const *)(addr), \
7384 (__v16si)(__m512)(index), \
7385 (__mmask16)(mask), (int)(scale)))
7386
7387#define _mm512_i32gather_epi32(index, addr, scale) \
7388 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7389 (void const *)(addr), \
7390 (__v16si)(__m512i)(index), \
7391 (__mmask16)-1, (int)(scale)))
7392
7393#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7394 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7395 (void const *)(addr), \
7396 (__v16si)(__m512i)(index), \
7397 (__mmask16)(mask), (int)(scale)))
7398
7399#define _mm512_i32gather_pd(index, addr, scale) \
7400 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7401 (void const *)(addr), \
7402 (__v8si)(__m256i)(index), (__mmask8)-1, \
7403 (int)(scale)))
7404
7405#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7406 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7407 (void const *)(addr), \
7408 (__v8si)(__m256i)(index), \
7409 (__mmask8)(mask), (int)(scale)))
7410
7411#define _mm512_i32gather_epi64(index, addr, scale) \
7412 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7413 (void const *)(addr), \
7414 (__v8si)(__m256i)(index), (__mmask8)-1, \
7415 (int)(scale)))
7416
7417#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7418 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7419 (void const *)(addr), \
7420 (__v8si)(__m256i)(index), \
7421 (__mmask8)(mask), (int)(scale)))
7422
7423#define _mm512_i64scatter_ps(addr, index, v1, scale) \
7424 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7425 (__v8di)(__m512i)(index), \
7426 (__v8sf)(__m256)(v1), (int)(scale))
7427
7428#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7429 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7430 (__v8di)(__m512i)(index), \
7431 (__v8sf)(__m256)(v1), (int)(scale))
7432
7433#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7434 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7435 (__v8di)(__m512i)(index), \
7436 (__v8si)(__m256i)(v1), (int)(scale))
7437
7438#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7439 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7440 (__v8di)(__m512i)(index), \
7441 (__v8si)(__m256i)(v1), (int)(scale))
7442
7443#define _mm512_i64scatter_pd(addr, index, v1, scale) \
7444 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7445 (__v8di)(__m512i)(index), \
7446 (__v8df)(__m512d)(v1), (int)(scale))
7447
7448#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7449 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7450 (__v8di)(__m512i)(index), \
7451 (__v8df)(__m512d)(v1), (int)(scale))
7452
7453#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7454 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7455 (__v8di)(__m512i)(index), \
7456 (__v8di)(__m512i)(v1), (int)(scale))
7457
7458#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7459 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7460 (__v8di)(__m512i)(index), \
7461 (__v8di)(__m512i)(v1), (int)(scale))
7462
7463#define _mm512_i32scatter_ps(addr, index, v1, scale) \
7464 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7465 (__v16si)(__m512i)(index), \
7466 (__v16sf)(__m512)(v1), (int)(scale))
7467
7468#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7469 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7470 (__v16si)(__m512i)(index), \
7471 (__v16sf)(__m512)(v1), (int)(scale))
7472
7473#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7474 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7475 (__v16si)(__m512i)(index), \
7476 (__v16si)(__m512i)(v1), (int)(scale))
7477
7478#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7479 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7480 (__v16si)(__m512i)(index), \
7481 (__v16si)(__m512i)(v1), (int)(scale))
7482
7483#define _mm512_i32scatter_pd(addr, index, v1, scale) \
7484 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7485 (__v8si)(__m256i)(index), \
7486 (__v8df)(__m512d)(v1), (int)(scale))
7487
7488#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7489 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7490 (__v8si)(__m256i)(index), \
7491 (__v8df)(__m512d)(v1), (int)(scale))
7492
7493#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7494 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7495 (__v8si)(__m256i)(index), \
7496 (__v8di)(__m512i)(v1), (int)(scale))
7497
7498#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7499 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7500 (__v8si)(__m256i)(index), \
7501 (__v8di)(__m512i)(v1), (int)(scale))
7502
7503static __inline__ __m128 __DEFAULT_FN_ATTRS128
7504_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7505{
7506 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7507 (__v4sf)__A,
7508 (__v4sf)__B,
7509 (__mmask8)__U,
7511}
7512
7513#define _mm_fmadd_round_ss(A, B, C, R) \
7514 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7515 (__v4sf)(__m128)(B), \
7516 (__v4sf)(__m128)(C), (__mmask8)-1, \
7517 (int)(R)))
7518
7519#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7520 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7521 (__v4sf)(__m128)(A), \
7522 (__v4sf)(__m128)(B), (__mmask8)(U), \
7523 (int)(R)))
7524
7525static __inline__ __m128 __DEFAULT_FN_ATTRS128
7526_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7527{
7528 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7529 (__v4sf)__B,
7530 (__v4sf)__C,
7531 (__mmask8)__U,
7533}
7534
7535#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7536 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7537 (__v4sf)(__m128)(B), \
7538 (__v4sf)(__m128)(C), (__mmask8)(U), \
7539 (int)(R)))
7540
7541static __inline__ __m128 __DEFAULT_FN_ATTRS128
7542_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7543{
7544 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7545 (__v4sf)__X,
7546 (__v4sf)__Y,
7547 (__mmask8)__U,
7549}
7550
7551#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7552 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7553 (__v4sf)(__m128)(X), \
7554 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7555 (int)(R)))
7556
7557static __inline__ __m128 __DEFAULT_FN_ATTRS128
7558_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7559{
7560 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7561 (__v4sf)__A,
7562 -(__v4sf)__B,
7563 (__mmask8)__U,
7565}
7566
7567#define _mm_fmsub_round_ss(A, B, C, R) \
7568 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7569 (__v4sf)(__m128)(B), \
7570 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7571 (int)(R)))
7572
7573#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7574 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7575 (__v4sf)(__m128)(A), \
7576 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7577 (int)(R)))
7578
7579static __inline__ __m128 __DEFAULT_FN_ATTRS128
7580_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7581{
7582 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7583 (__v4sf)__B,
7584 -(__v4sf)__C,
7585 (__mmask8)__U,
7587}
7588
7589#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7590 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7591 (__v4sf)(__m128)(B), \
7592 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7593 (int)(R)))
7594
7595static __inline__ __m128 __DEFAULT_FN_ATTRS128
7596_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7597{
7598 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7599 (__v4sf)__X,
7600 (__v4sf)__Y,
7601 (__mmask8)__U,
7603}
7604
7605#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7606 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7607 (__v4sf)(__m128)(X), \
7608 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7609 (int)(R)))
7610
7611static __inline__ __m128 __DEFAULT_FN_ATTRS128
7612_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7613{
7614 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7615 -(__v4sf)__A,
7616 (__v4sf)__B,
7617 (__mmask8)__U,
7619}
7620
7621#define _mm_fnmadd_round_ss(A, B, C, R) \
7622 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7623 -(__v4sf)(__m128)(B), \
7624 (__v4sf)(__m128)(C), (__mmask8)-1, \
7625 (int)(R)))
7626
7627#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7628 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7629 -(__v4sf)(__m128)(A), \
7630 (__v4sf)(__m128)(B), (__mmask8)(U), \
7631 (int)(R)))
7632
7633static __inline__ __m128 __DEFAULT_FN_ATTRS128
7634_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7635{
7636 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7637 -(__v4sf)__B,
7638 (__v4sf)__C,
7639 (__mmask8)__U,
7641}
7642
7643#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7644 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7645 -(__v4sf)(__m128)(B), \
7646 (__v4sf)(__m128)(C), (__mmask8)(U), \
7647 (int)(R)))
7648
7649static __inline__ __m128 __DEFAULT_FN_ATTRS128
7650_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7651{
7652 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7653 -(__v4sf)__X,
7654 (__v4sf)__Y,
7655 (__mmask8)__U,
7657}
7658
7659#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7660 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7661 -(__v4sf)(__m128)(X), \
7662 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7663 (int)(R)))
7664
7665static __inline__ __m128 __DEFAULT_FN_ATTRS128
7666_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7667{
7668 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7669 -(__v4sf)__A,
7670 -(__v4sf)__B,
7671 (__mmask8)__U,
7673}
7674
7675#define _mm_fnmsub_round_ss(A, B, C, R) \
7676 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7677 -(__v4sf)(__m128)(B), \
7678 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7679 (int)(R)))
7680
7681#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7682 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7683 -(__v4sf)(__m128)(A), \
7684 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7685 (int)(R)))
7686
7687static __inline__ __m128 __DEFAULT_FN_ATTRS128
7688_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7689{
7690 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7691 -(__v4sf)__B,
7692 -(__v4sf)__C,
7693 (__mmask8)__U,
7695}
7696
7697#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
7698 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7699 -(__v4sf)(__m128)(B), \
7700 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7701 (int)(R)))
7702
7703static __inline__ __m128 __DEFAULT_FN_ATTRS128
7704_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7705{
7706 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7707 -(__v4sf)__X,
7708 (__v4sf)__Y,
7709 (__mmask8)__U,
7711}
7712
7713#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
7714 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7715 -(__v4sf)(__m128)(X), \
7716 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7717 (int)(R)))
7718
7719static __inline__ __m128d __DEFAULT_FN_ATTRS128
7720_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7721{
7722 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7723 (__v2df)__A,
7724 (__v2df)__B,
7725 (__mmask8)__U,
7727}
7728
7729#define _mm_fmadd_round_sd(A, B, C, R) \
7730 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7731 (__v2df)(__m128d)(B), \
7732 (__v2df)(__m128d)(C), (__mmask8)-1, \
7733 (int)(R)))
7734
7735#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
7736 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7737 (__v2df)(__m128d)(A), \
7738 (__v2df)(__m128d)(B), (__mmask8)(U), \
7739 (int)(R)))
7740
7741static __inline__ __m128d __DEFAULT_FN_ATTRS128
7742_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7743{
7744 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7745 (__v2df)__B,
7746 (__v2df)__C,
7747 (__mmask8)__U,
7749}
7750
7751#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
7752 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7753 (__v2df)(__m128d)(B), \
7754 (__v2df)(__m128d)(C), (__mmask8)(U), \
7755 (int)(R)))
7756
7757static __inline__ __m128d __DEFAULT_FN_ATTRS128
7758_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7759{
7760 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
7761 (__v2df)__X,
7762 (__v2df)__Y,
7763 (__mmask8)__U,
7765}
7766
7767#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
7768 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
7769 (__v2df)(__m128d)(X), \
7770 (__v2df)(__m128d)(Y), (__mmask8)(U), \
7771 (int)(R)))
7772
7773static __inline__ __m128d __DEFAULT_FN_ATTRS128
7774_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7775{
7776 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7777 (__v2df)__A,
7778 -(__v2df)__B,
7779 (__mmask8)__U,
7781}
7782
7783#define _mm_fmsub_round_sd(A, B, C, R) \
7784 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7785 (__v2df)(__m128d)(B), \
7786 -(__v2df)(__m128d)(C), (__mmask8)-1, \
7787 (int)(R)))
7788
7789#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
7790 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7791 (__v2df)(__m128d)(A), \
7792 -(__v2df)(__m128d)(B), (__mmask8)(U), \
7793 (int)(R)))
7794
7795static __inline__ __m128d __DEFAULT_FN_ATTRS128
7796_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7797{
7798 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7799 (__v2df)__B,
7800 -(__v2df)__C,
7801 (__mmask8)__U,
7803}
7804
7805#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
7806 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7807 (__v2df)(__m128d)(B), \
7808 -(__v2df)(__m128d)(C), \
7809 (__mmask8)(U), (int)(R)))
7810
7811static __inline__ __m128d __DEFAULT_FN_ATTRS128
7812_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7813{
7814 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
7815 (__v2df)__X,
7816 (__v2df)__Y,
7817 (__mmask8)__U,
7819}
7820
7821#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
7822 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
7823 (__v2df)(__m128d)(X), \
7824 (__v2df)(__m128d)(Y), \
7825 (__mmask8)(U), (int)(R)))
7826
7827static __inline__ __m128d __DEFAULT_FN_ATTRS128
7828_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7829{
7830 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7831 -(__v2df)__A,
7832 (__v2df)__B,
7833 (__mmask8)__U,
7835}
7836
7837#define _mm_fnmadd_round_sd(A, B, C, R) \
7838 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7839 -(__v2df)(__m128d)(B), \
7840 (__v2df)(__m128d)(C), (__mmask8)-1, \
7841 (int)(R)))
7842
7843#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
7844 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7845 -(__v2df)(__m128d)(A), \
7846 (__v2df)(__m128d)(B), (__mmask8)(U), \
7847 (int)(R)))
7848
7849static __inline__ __m128d __DEFAULT_FN_ATTRS128
7850_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7851{
7852 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7853 -(__v2df)__B,
7854 (__v2df)__C,
7855 (__mmask8)__U,
7857}
7858
7859#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
7860 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7861 -(__v2df)(__m128d)(B), \
7862 (__v2df)(__m128d)(C), (__mmask8)(U), \
7863 (int)(R)))
7864
7865static __inline__ __m128d __DEFAULT_FN_ATTRS128
7866_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7867{
7868 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
7869 -(__v2df)__X,
7870 (__v2df)__Y,
7871 (__mmask8)__U,
7873}
7874
7875#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
7876 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
7877 -(__v2df)(__m128d)(X), \
7878 (__v2df)(__m128d)(Y), (__mmask8)(U), \
7879 (int)(R)))
7880
7881static __inline__ __m128d __DEFAULT_FN_ATTRS128
7882_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7883{
7884 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7885 -(__v2df)__A,
7886 -(__v2df)__B,
7887 (__mmask8)__U,
7889}
7890
7891#define _mm_fnmsub_round_sd(A, B, C, R) \
7892 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7893 -(__v2df)(__m128d)(B), \
7894 -(__v2df)(__m128d)(C), (__mmask8)-1, \
7895 (int)(R)))
7896
7897#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
7898 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7899 -(__v2df)(__m128d)(A), \
7900 -(__v2df)(__m128d)(B), (__mmask8)(U), \
7901 (int)(R)))
7902
7903static __inline__ __m128d __DEFAULT_FN_ATTRS128
7904_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7905{
7906 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7907 -(__v2df)__B,
7908 -(__v2df)__C,
7909 (__mmask8)__U,
7911}
7912
7913#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
7914 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7915 -(__v2df)(__m128d)(B), \
7916 -(__v2df)(__m128d)(C), \
7917 (__mmask8)(U), \
7918 (int)(R)))
7919
7920static __inline__ __m128d __DEFAULT_FN_ATTRS128
7921_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7922{
7923 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
7924 -(__v2df)__X,
7925 (__v2df)__Y,
7926 (__mmask8)__U,
7928}
7929
7930#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
7931 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
7932 -(__v2df)(__m128d)(X), \
7933 (__v2df)(__m128d)(Y), \
7934 (__mmask8)(U), (int)(R)))
7935
7936#define _mm512_permutex_pd(X, C) \
7937 ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)))
7938
7939#define _mm512_mask_permutex_pd(W, U, X, C) \
7940 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7941 (__v8df)_mm512_permutex_pd((X), (C)), \
7942 (__v8df)(__m512d)(W)))
7943
7944#define _mm512_maskz_permutex_pd(U, X, C) \
7945 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7946 (__v8df)_mm512_permutex_pd((X), (C)), \
7947 (__v8df)_mm512_setzero_pd()))
7948
7949#define _mm512_permutex_epi64(X, C) \
7950 ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)))
7951
7952#define _mm512_mask_permutex_epi64(W, U, X, C) \
7953 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7954 (__v8di)_mm512_permutex_epi64((X), (C)), \
7955 (__v8di)(__m512i)(W)))
7956
7957#define _mm512_maskz_permutex_epi64(U, X, C) \
7958 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7959 (__v8di)_mm512_permutex_epi64((X), (C)), \
7960 (__v8di)_mm512_setzero_si512()))
7961
7962static __inline__ __m512d __DEFAULT_FN_ATTRS512
7963_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
7964{
7965 return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
7966}
7967
7968static __inline__ __m512d __DEFAULT_FN_ATTRS512
7969_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
7970{
7971 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
7972 (__v8df)_mm512_permutexvar_pd(__X, __Y),
7973 (__v8df)__W);
7974}
7975
7976static __inline__ __m512d __DEFAULT_FN_ATTRS512
7977_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
7978{
7979 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
7980 (__v8df)_mm512_permutexvar_pd(__X, __Y),
7981 (__v8df)_mm512_setzero_pd());
7982}
7983
7984static __inline__ __m512i __DEFAULT_FN_ATTRS512
7985_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
7986{
7987 return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
7988}
7989
7990static __inline__ __m512i __DEFAULT_FN_ATTRS512
7992{
7993 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7994 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
7995 (__v8di)_mm512_setzero_si512());
7996}
7997
7998static __inline__ __m512i __DEFAULT_FN_ATTRS512
7999_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8000 __m512i __Y)
8001{
8002 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8003 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8004 (__v8di)__W);
8005}
8006
8007static __inline__ __m512 __DEFAULT_FN_ATTRS512
8008_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8009{
8010 return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
8011}
8012
8013static __inline__ __m512 __DEFAULT_FN_ATTRS512
8014_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8015{
8016 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8017 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8018 (__v16sf)__W);
8019}
8020
8021static __inline__ __m512 __DEFAULT_FN_ATTRS512
8023{
8024 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8025 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8026 (__v16sf)_mm512_setzero_ps());
8027}
8028
8029static __inline__ __m512i __DEFAULT_FN_ATTRS512
8030_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8031{
8032 return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
8033}
8034
8035#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8036
8037static __inline__ __m512i __DEFAULT_FN_ATTRS512
8039{
8040 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8041 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8042 (__v16si)_mm512_setzero_si512());
8043}
8044
8045static __inline__ __m512i __DEFAULT_FN_ATTRS512
8046_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8047 __m512i __Y)
8048{
8049 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8050 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8051 (__v16si)__W);
8052}
8053
8054#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8055
8056static __inline__ __mmask16
8058 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8059}
8060
8063 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8064}
8065
8068 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8069}
8070
8071static __inline__ int __DEFAULT_FN_ATTRS
8073{
8074 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8075}
8076
8077static __inline__ int __DEFAULT_FN_ATTRS
8079{
8080 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8081}
8082
8083static __inline__ unsigned char __DEFAULT_FN_ATTRS
8085{
8086 return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8087}
8088
8089static __inline__ unsigned char __DEFAULT_FN_ATTRS
8091{
8092 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8093}
8094
8095static __inline__ unsigned char __DEFAULT_FN_ATTRS
8096_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
8097 *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8098 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8099}
8100
8101static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8103{
8104 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8105}
8106
8109 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8110}
8111
8114 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8115}
8116
8117#define _kand_mask16 _mm512_kand
8118#define _kandn_mask16 _mm512_kandn
8119#define _knot_mask16 _mm512_knot
8120#define _kor_mask16 _mm512_kor
8121#define _kxnor_mask16 _mm512_kxnor
8122#define _kxor_mask16 _mm512_kxor
8123
8124#define _kshiftli_mask16(A, I) \
8125 ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)))
8126
8127#define _kshiftri_mask16(A, I) \
8128 ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)))
8129
8130static __inline__ unsigned int __DEFAULT_FN_ATTRS
8132 return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
8133}
8134
8135static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8136_cvtu32_mask16(unsigned int __A) {
8137 return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
8138}
8139
8140static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8142 return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
8143}
8144
8145static __inline__ void __DEFAULT_FN_ATTRS
8147 *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
8148}
8149
8150static __inline__ void __DEFAULT_FN_ATTRS512
8151_mm512_stream_si512 (void * __P, __m512i __A)
8152{
8153 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8154 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8155}
8156
8157static __inline__ __m512i __DEFAULT_FN_ATTRS512
8159{
8160 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8161 return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
8162}
8163
8164static __inline__ void __DEFAULT_FN_ATTRS512
8165_mm512_stream_pd (void *__P, __m512d __A)
8166{
8167 typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8168 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8169}
8170
8171static __inline__ void __DEFAULT_FN_ATTRS512
8172_mm512_stream_ps (void *__P, __m512 __A)
8173{
8174 typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8175 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8176}
8177
8178static __inline__ __m512d __DEFAULT_FN_ATTRS512
8179_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8180{
8181 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8182 (__v8df) __W,
8183 (__mmask8) __U);
8184}
8185
8186static __inline__ __m512d __DEFAULT_FN_ATTRS512
8188{
8189 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8190 (__v8df)
8192 (__mmask8) __U);
8193}
8194
8195static __inline__ __m512i __DEFAULT_FN_ATTRS512
8196_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8197{
8198 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8199 (__v8di) __W,
8200 (__mmask8) __U);
8201}
8202
8203static __inline__ __m512i __DEFAULT_FN_ATTRS512
8205{
8206 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8207 (__v8di)
8209 (__mmask8) __U);
8210}
8211
8212static __inline__ __m512 __DEFAULT_FN_ATTRS512
8213_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8214{
8215 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8216 (__v16sf) __W,
8217 (__mmask16) __U);
8218}
8219
8220static __inline__ __m512 __DEFAULT_FN_ATTRS512
8222{
8223 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8224 (__v16sf)
8226 (__mmask16) __U);
8227}
8228
8229static __inline__ __m512i __DEFAULT_FN_ATTRS512
8230_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8231{
8232 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8233 (__v16si) __W,
8234 (__mmask16) __U);
8235}
8236
8237static __inline__ __m512i __DEFAULT_FN_ATTRS512
8239{
8240 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8241 (__v16si)
8243 (__mmask16) __U);
8244}
8245
8246#define _mm_cmp_round_ss_mask(X, Y, P, R) \
8247 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8248 (__v4sf)(__m128)(Y), (int)(P), \
8249 (__mmask8)-1, (int)(R)))
8250
8251#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8252 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8253 (__v4sf)(__m128)(Y), (int)(P), \
8254 (__mmask8)(M), (int)(R)))
8255
8256#define _mm_cmp_ss_mask(X, Y, P) \
8257 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8258 (__v4sf)(__m128)(Y), (int)(P), \
8259 (__mmask8)-1, \
8260 _MM_FROUND_CUR_DIRECTION))
8261
8262#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8263 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8264 (__v4sf)(__m128)(Y), (int)(P), \
8265 (__mmask8)(M), \
8266 _MM_FROUND_CUR_DIRECTION))
8267
8268#define _mm_cmp_round_sd_mask(X, Y, P, R) \
8269 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8270 (__v2df)(__m128d)(Y), (int)(P), \
8271 (__mmask8)-1, (int)(R)))
8272
8273#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8274 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8275 (__v2df)(__m128d)(Y), (int)(P), \
8276 (__mmask8)(M), (int)(R)))
8277
8278#define _mm_cmp_sd_mask(X, Y, P) \
8279 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8280 (__v2df)(__m128d)(Y), (int)(P), \
8281 (__mmask8)-1, \
8282 _MM_FROUND_CUR_DIRECTION))
8283
8284#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8285 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8286 (__v2df)(__m128d)(Y), (int)(P), \
8287 (__mmask8)(M), \
8288 _MM_FROUND_CUR_DIRECTION))
8289
8290/* Bit Test */
8291
8292static __inline __mmask16 __DEFAULT_FN_ATTRS512
8293_mm512_test_epi32_mask (__m512i __A, __m512i __B)
8294{
8297}
8298
8299static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8300_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8301{
8302 return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8304}
8305
8306static __inline __mmask8 __DEFAULT_FN_ATTRS512
8307_mm512_test_epi64_mask (__m512i __A, __m512i __B)
8308{
8309 return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8311}
8312
8313static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8314_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8315{
8316 return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8318}
8319
8320static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8321_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8322{
8323 return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8325}
8326
8327static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8328_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8329{
8330 return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8332}
8333
8334static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8335_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8336{
8337 return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8339}
8340
8341static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8342_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8343{
8344 return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8346}
8347
8348static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8350{
8351 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8352 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8353}
8354
8355static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8356_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8357 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8358 (__v16sf)_mm512_movehdup_ps(__A),
8359 (__v16sf)__W);
8360}
8361
8362static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8364 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8365 (__v16sf)_mm512_movehdup_ps(__A),
8366 (__v16sf)_mm512_setzero_ps());
8367}
8368
8369static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8371{
8372 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8373 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8374}
8375
8376static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8377_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8378 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8379 (__v16sf)_mm512_moveldup_ps(__A),
8380 (__v16sf)__W);
8381}
8382
8383static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8385 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8386 (__v16sf)_mm512_moveldup_ps(__A),
8387 (__v16sf)_mm512_setzero_ps());
8388}
8389
8390static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8391_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
8392 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
8393}
8394
8395static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8396_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) {
8397 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
8398 _mm_setzero_ps());
8399}
8400
8401static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
8402_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
8403 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
8404}
8405
8406static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
8407_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) {
8408 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
8409 _mm_setzero_pd());
8410}
8411
8412static __inline__ void __DEFAULT_FN_ATTRS128
8413_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
8414{
8415 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8416}
8417
8418static __inline__ void __DEFAULT_FN_ATTRS128
8419_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
8420{
8421 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8422}
8423
8424static __inline__ __m128 __DEFAULT_FN_ATTRS128
8425_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
8426{
8427 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8428 (__v4sf)_mm_setzero_ps(),
8429 0, 4, 4, 4);
8430
8431 return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1);
8432}
8433
8434static __inline__ __m128 __DEFAULT_FN_ATTRS128
8435_mm_maskz_load_ss (__mmask8 __U, const float* __A)
8436{
8437 return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A,
8438 (__v4sf) _mm_setzero_ps(),
8439 __U & 1);
8440}
8441
8442static __inline__ __m128d __DEFAULT_FN_ATTRS128
8443_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
8444{
8445 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8446 (__v2df)_mm_setzero_pd(),
8447 0, 2);
8448
8449 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1);
8450}
8451
8452static __inline__ __m128d __DEFAULT_FN_ATTRS128
8453_mm_maskz_load_sd (__mmask8 __U, const double* __A)
8454{
8455 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A,
8456 (__v2df) _mm_setzero_pd(),
8457 __U & 1);
8458}
8459
8460#define _mm512_shuffle_epi32(A, I) \
8461 ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)))
8462
8463#define _mm512_mask_shuffle_epi32(W, U, A, I) \
8464 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8465 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8466 (__v16si)(__m512i)(W)))
8467
8468#define _mm512_maskz_shuffle_epi32(U, A, I) \
8469 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8470 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8471 (__v16si)_mm512_setzero_si512()))
8472
8473static __inline__ __m512d __DEFAULT_FN_ATTRS512
8474_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
8475{
8476 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8477 (__v8df) __W,
8478 (__mmask8) __U);
8479}
8480
8481static __inline__ __m512d __DEFAULT_FN_ATTRS512
8483{
8484 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8485 (__v8df) _mm512_setzero_pd (),
8486 (__mmask8) __U);
8487}
8488
8489static __inline__ __m512i __DEFAULT_FN_ATTRS512
8490_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8491{
8492 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8493 (__v8di) __W,
8494 (__mmask8) __U);
8495}
8496
8497static __inline__ __m512i __DEFAULT_FN_ATTRS512
8499{
8500 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8501 (__v8di) _mm512_setzero_si512 (),
8502 (__mmask8) __U);
8503}
8504
8505static __inline__ __m512d __DEFAULT_FN_ATTRS512
8506_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
8507{
8508 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8509 (__v8df) __W,
8510 (__mmask8) __U);
8511}
8512
8513static __inline__ __m512d __DEFAULT_FN_ATTRS512
8515{
8516 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8517 (__v8df) _mm512_setzero_pd(),
8518 (__mmask8) __U);
8519}
8520
8521static __inline__ __m512i __DEFAULT_FN_ATTRS512
8522_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
8523{
8524 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8525 (__v8di) __W,
8526 (__mmask8) __U);
8527}
8528
8529static __inline__ __m512i __DEFAULT_FN_ATTRS512
8531{
8532 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8533 (__v8di) _mm512_setzero_si512(),
8534 (__mmask8) __U);
8535}
8536
8537static __inline__ __m512 __DEFAULT_FN_ATTRS512
8538_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
8539{
8540 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8541 (__v16sf) __W,
8542 (__mmask16) __U);
8543}
8544
8545static __inline__ __m512 __DEFAULT_FN_ATTRS512
8547{
8548 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8549 (__v16sf) _mm512_setzero_ps(),
8550 (__mmask16) __U);
8551}
8552
8553static __inline__ __m512i __DEFAULT_FN_ATTRS512
8554_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
8555{
8556 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8557 (__v16si) __W,
8558 (__mmask16) __U);
8559}
8560
8561static __inline__ __m512i __DEFAULT_FN_ATTRS512
8563{
8564 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8565 (__v16si) _mm512_setzero_si512(),
8566 (__mmask16) __U);
8567}
8568
8569static __inline__ __m512 __DEFAULT_FN_ATTRS512
8570_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
8571{
8572 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8573 (__v16sf) __W,
8574 (__mmask16) __U);
8575}
8576
8577static __inline__ __m512 __DEFAULT_FN_ATTRS512
8579{
8580 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8581 (__v16sf) _mm512_setzero_ps(),
8582 (__mmask16) __U);
8583}
8584
8585static __inline__ __m512i __DEFAULT_FN_ATTRS512
8586_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8587{
8588 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8589 (__v16si) __W,
8590 (__mmask16) __U);
8591}
8592
8593static __inline__ __m512i __DEFAULT_FN_ATTRS512
8595{
8596 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8597 (__v16si) _mm512_setzero_si512(),
8598 (__mmask16) __U);
8599}
8600
8601#define _mm512_cvt_roundps_pd(A, R) \
8602 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8603 (__v8df)_mm512_undefined_pd(), \
8604 (__mmask8)-1, (int)(R)))
8605
8606#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8607 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8608 (__v8df)(__m512d)(W), \
8609 (__mmask8)(U), (int)(R)))
8610
8611#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8612 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8613 (__v8df)_mm512_setzero_pd(), \
8614 (__mmask8)(U), (int)(R)))
8615
8616static __inline__ __m512d
8618 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8619}
8620
8621static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8622_mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A) {
8623 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8624 (__v8df)_mm512_cvtps_pd(__A),
8625 (__v8df)__W);
8626}
8627
8628static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8630 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8631 (__v8df)_mm512_cvtps_pd(__A),
8632 (__v8df)_mm512_setzero_pd());
8633}
8634
8635static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8637 return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
8638}
8639
8640static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8641_mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A) {
8642 return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
8643}
8644
8645static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8646_mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A) {
8647 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
8648 (__v8df)__W);
8649}
8650
8651static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8653 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
8654 (__v8df)_mm512_setzero_pd());
8655}
8656
8657static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8658_mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8659 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
8660 (__v16sf)__W);
8661}
8662
8663static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8665 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
8666 (__v16sf)_mm512_setzero_ps());
8667}
8668
8669static __inline__ void __DEFAULT_FN_ATTRS512
8671{
8672 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
8673 (__mmask8) __U);
8674}
8675
8676static __inline__ void __DEFAULT_FN_ATTRS512
8678{
8679 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
8680 (__mmask8) __U);
8681}
8682
8683static __inline__ void __DEFAULT_FN_ATTRS512
8685{
8686 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
8687 (__mmask16) __U);
8688}
8689
8690static __inline__ void __DEFAULT_FN_ATTRS512
8692{
8693 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
8694 (__mmask16) __U);
8695}
8696
8697#define _mm_cvt_roundsd_ss(A, B, R) \
8698 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8699 (__v2df)(__m128d)(B), \
8700 (__v4sf)_mm_undefined_ps(), \
8701 (__mmask8)-1, (int)(R)))
8702
8703#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
8704 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8705 (__v2df)(__m128d)(B), \
8706 (__v4sf)(__m128)(W), \
8707 (__mmask8)(U), (int)(R)))
8708
8709#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
8710 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8711 (__v2df)(__m128d)(B), \
8712 (__v4sf)_mm_setzero_ps(), \
8713 (__mmask8)(U), (int)(R)))
8714
8715static __inline__ __m128 __DEFAULT_FN_ATTRS128
8716_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
8717{
8718 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8719 (__v2df)__B,
8720 (__v4sf)__W,
8722}
8723
8724static __inline__ __m128 __DEFAULT_FN_ATTRS128
8725_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
8726{
8727 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8728 (__v2df)__B,
8729 (__v4sf)_mm_setzero_ps(),
8731}
8732
8733#define _mm_cvtss_i32 _mm_cvtss_si32
8734#define _mm_cvtsd_i32 _mm_cvtsd_si32
8735#define _mm_cvti32_sd _mm_cvtsi32_sd
8736#define _mm_cvti32_ss _mm_cvtsi32_ss
8737#ifdef __x86_64__
8738#define _mm_cvtss_i64 _mm_cvtss_si64
8739#define _mm_cvtsd_i64 _mm_cvtsd_si64
8740#define _mm_cvti64_sd _mm_cvtsi64_sd
8741#define _mm_cvti64_ss _mm_cvtsi64_ss
8742#endif
8743
8744#ifdef __x86_64__
8745#define _mm_cvt_roundi64_sd(A, B, R) \
8746 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
8747 (int)(R)))
8748
8749#define _mm_cvt_roundsi64_sd(A, B, R) \
8750 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
8751 (int)(R)))
8752#endif
8753
8754#define _mm_cvt_roundsi32_ss(A, B, R) \
8755 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
8756
8757#define _mm_cvt_roundi32_ss(A, B, R) \
8758 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
8759
8760#ifdef __x86_64__
8761#define _mm_cvt_roundsi64_ss(A, B, R) \
8762 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
8763 (int)(R)))
8764
8765#define _mm_cvt_roundi64_ss(A, B, R) \
8766 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
8767 (int)(R)))
8768#endif
8769
8770#define _mm_cvt_roundss_sd(A, B, R) \
8771 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8772 (__v4sf)(__m128)(B), \
8773 (__v2df)_mm_undefined_pd(), \
8774 (__mmask8)-1, (int)(R)))
8775
8776#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
8777 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8778 (__v4sf)(__m128)(B), \
8779 (__v2df)(__m128d)(W), \
8780 (__mmask8)(U), (int)(R)))
8781
8782#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
8783 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8784 (__v4sf)(__m128)(B), \
8785 (__v2df)_mm_setzero_pd(), \
8786 (__mmask8)(U), (int)(R)))
8787
8788static __inline__ __m128d __DEFAULT_FN_ATTRS128
8789_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
8790{
8791 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
8792 (__v4sf)__B,
8793 (__v2df)__W,
8795}
8796
8797static __inline__ __m128d __DEFAULT_FN_ATTRS128
8798_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
8799{
8800 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
8801 (__v4sf)__B,
8802 (__v2df)_mm_setzero_pd(),
8804}
8805
8806static __inline__ __m128d __DEFAULT_FN_ATTRS128
8807_mm_cvtu32_sd (__m128d __A, unsigned __B)
8808{
8809 __A[0] = __B;
8810 return __A;
8811}
8812
8813#ifdef __x86_64__
8814#define _mm_cvt_roundu64_sd(A, B, R) \
8815 ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
8816 (unsigned long long)(B), (int)(R)))
8817
8818static __inline__ __m128d __DEFAULT_FN_ATTRS128
8819_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
8820{
8821 __A[0] = __B;
8822 return __A;
8823}
8824#endif
8825
8826#define _mm_cvt_roundu32_ss(A, B, R) \
8827 ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
8828 (int)(R)))
8829
8830static __inline__ __m128 __DEFAULT_FN_ATTRS128
8831_mm_cvtu32_ss (__m128 __A, unsigned __B)
8832{
8833 __A[0] = __B;
8834 return __A;
8835}
8836
8837#ifdef __x86_64__
8838#define _mm_cvt_roundu64_ss(A, B, R) \
8839 ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
8840 (unsigned long long)(B), (int)(R)))
8841
8842static __inline__ __m128 __DEFAULT_FN_ATTRS128
8843_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
8844{
8845 __A[0] = __B;
8846 return __A;
8847}
8848#endif
8849
8850static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8851_mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A) {
8852 return (__m512i) __builtin_ia32_selectd_512(__M,
8853 (__v16si) _mm512_set1_epi32(__A),
8854 (__v16si) __O);
8855}
8856
8857static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8858_mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A) {
8859 return (__m512i) __builtin_ia32_selectq_512(__M,
8860 (__v8di) _mm512_set1_epi64(__A),
8861 (__v8di) __O);
8862}
8863
8865 char __e63, char __e62, char __e61, char __e60, char __e59, char __e58,
8866 char __e57, char __e56, char __e55, char __e54, char __e53, char __e52,
8867 char __e51, char __e50, char __e49, char __e48, char __e47, char __e46,
8868 char __e45, char __e44, char __e43, char __e42, char __e41, char __e40,
8869 char __e39, char __e38, char __e37, char __e36, char __e35, char __e34,
8870 char __e33, char __e32, char __e31, char __e30, char __e29, char __e28,
8871 char __e27, char __e26, char __e25, char __e24, char __e23, char __e22,
8872 char __e21, char __e20, char __e19, char __e18, char __e17, char __e16,
8873 char __e15, char __e14, char __e13, char __e12, char __e11, char __e10,
8874 char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3,
8875 char __e2, char __e1, char __e0) {
8876
8877 return __extension__ (__m512i)(__v64qi)
8878 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
8879 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
8880 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
8881 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
8882 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
8883 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
8884 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
8885 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
8886}
8887
8889 short __e31, short __e30, short __e29, short __e28, short __e27,
8890 short __e26, short __e25, short __e24, short __e23, short __e22,
8891 short __e21, short __e20, short __e19, short __e18, short __e17,
8892 short __e16, short __e15, short __e14, short __e13, short __e12,
8893 short __e11, short __e10, short __e9, short __e8, short __e7, short __e6,
8894 short __e5, short __e4, short __e3, short __e2, short __e1, short __e0) {
8895 return __extension__ (__m512i)(__v32hi)
8896 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
8897 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
8898 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
8899 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
8900}
8901
8903 int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H,
8904 int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P) {
8905 return __extension__ (__m512i)(__v16si)
8906 { __P, __O, __N, __M, __L, __K, __J, __I,
8907 __H, __G, __F, __E, __D, __C, __B, __A };
8908}
8909
8911 int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7, int e8,
8912 int e9, int e10, int e11, int e12, int e13, int e14, int e15) {
8913 return _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4,
8914 e3, e2, e1, e0);
8915}
8916
8917static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8918_mm512_set_epi64(long long __A, long long __B, long long __C, long long __D,
8919 long long __E, long long __F, long long __G, long long __H) {
8920 return __extension__ (__m512i) (__v8di)
8921 { __H, __G, __F, __E, __D, __C, __B, __A };
8922}
8923
8924static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8925_mm512_setr_epi64(long long e0, long long e1, long long e2, long long e3,
8926 long long e4, long long e5, long long e6, long long e7) {
8927 return _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0);
8928}
8929
8930static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8931_mm512_set_pd(double __A, double __B, double __C, double __D, double __E,
8932 double __F, double __G, double __H) {
8933 return __extension__ (__m512d)
8934 { __H, __G, __F, __E, __D, __C, __B, __A };
8935}
8936
8937static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8938_mm512_setr_pd(double e0, double e1, double e2, double e3, double e4, double e5,
8939 double e6, double e7) {
8940 return _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0);
8941}
8942
8943static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8944_mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F,
8945 float __G, float __H, float __I, float __J, float __K, float __L,
8946 float __M, float __N, float __O, float __P) {
8947 return __extension__ (__m512)
8948 { __P, __O, __N, __M, __L, __K, __J, __I,
8949 __H, __G, __F, __E, __D, __C, __B, __A };
8950}
8951
8952static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8953_mm512_setr_ps(float e0, float e1, float e2, float e3, float e4, float e5,
8954 float e6, float e7, float e8, float e9, float e10, float e11,
8955 float e12, float e13, float e14, float e15) {
8956 return _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3,
8957 e2, e1, e0);
8958}
8959
8960static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8961_mm512_abs_ps(__m512 __A) {
8962 return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
8963}
8964
8965static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8966_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) {
8967 return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
8968}
8969
8970static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8971_mm512_abs_pd(__m512d __A) {
8972 return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
8973}
8974
8975static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8976_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) {
8977 return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
8978}
8979
8980/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
8981 * outputs. This class of vector operation forms the basis of many scientific
8982 * computations. In vector-reduction arithmetic, the evaluation order is
8983 * independent of the order of the input elements of V.
8984
8985 * For floating-point intrinsics:
8986 * 1. When using fadd/fmul intrinsics, the order of operations within the
8987 * vector is unspecified (associative math).
8988 * 2. When using fmin/fmax intrinsics, NaN or -0.0 elements within the vector
8989 * produce unspecified results.
8990
8991 * Used bisection method. At each step, we partition the vector with previous
8992 * step in half, and the operation is performed on its two halves.
8993 * This takes log2(n) steps where n is the number of elements in the vector.
8994 */
8995
8996static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8998 return __builtin_reduce_add((__v8di)__W);
8999}
9000
9001static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9003 return __builtin_reduce_mul((__v8di)__W);
9004}
9005
9006static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9008 return __builtin_reduce_and((__v8di)__W);
9009}
9010
9011static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9013 return __builtin_reduce_or((__v8di)__W);
9014}
9015
9016static __inline__ long long __DEFAULT_FN_ATTRS512
9018 __W = _mm512_maskz_mov_epi64(__M, __W);
9019 return __builtin_reduce_add((__v8di)__W);
9020}
9021
9022static __inline__ long long __DEFAULT_FN_ATTRS512
9024 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W);
9025 return __builtin_reduce_mul((__v8di)__W);
9026}
9027
9028static __inline__ long long __DEFAULT_FN_ATTRS512
9030 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __W);
9031 return __builtin_reduce_and((__v8di)__W);
9032}
9033
9034static __inline__ long long __DEFAULT_FN_ATTRS512
9036 __W = _mm512_maskz_mov_epi64(__M, __W);
9037 return __builtin_reduce_or((__v8di)__W);
9038}
9039
9040// -0.0 is used to ignore the start value since it is the neutral value of
9041// floating point addition. For more information, please refer to
9042// https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic
9043static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
9044 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
9045}
9046
9047static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
9048 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
9049}
9050
9051static __inline__ double __DEFAULT_FN_ATTRS512
9053 __W = _mm512_maskz_mov_pd(__M, __W);
9054 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
9055}
9056
9057static __inline__ double __DEFAULT_FN_ATTRS512
9059 __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
9060 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
9061}
9062
9063static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9065 return __builtin_reduce_add((__v16si)__W);
9066}
9067
9068static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9070 return __builtin_reduce_mul((__v16si)__W);
9071}
9072
9073static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9075 return __builtin_reduce_and((__v16si)__W);
9076}
9077
9078static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9080 return __builtin_reduce_or((__v16si)__W);
9081}
9082
9083static __inline__ int __DEFAULT_FN_ATTRS512
9085 __W = _mm512_maskz_mov_epi32(__M, __W);
9086 return __builtin_reduce_add((__v16si)__W);
9087}
9088
9089static __inline__ int __DEFAULT_FN_ATTRS512
9091 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W);
9092 return __builtin_reduce_mul((__v16si)__W);
9093}
9094
9095static __inline__ int __DEFAULT_FN_ATTRS512
9097 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __W);
9098 return __builtin_reduce_and((__v16si)__W);
9099}
9100
9101static __inline__ int __DEFAULT_FN_ATTRS512
9103 __W = _mm512_maskz_mov_epi32(__M, __W);
9104 return __builtin_reduce_or((__v16si)__W);
9105}
9106
9107static __inline__ float __DEFAULT_FN_ATTRS512
9109 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9110}
9111
9112static __inline__ float __DEFAULT_FN_ATTRS512
9114 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9115}
9116
9117static __inline__ float __DEFAULT_FN_ATTRS512
9119 __W = _mm512_maskz_mov_ps(__M, __W);
9120 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9121}
9122
9123static __inline__ float __DEFAULT_FN_ATTRS512
9125 __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W);
9126 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9127}
9128
9129static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9131 return __builtin_reduce_max((__v8di)__V);
9132}
9133
9134static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9136 return __builtin_reduce_max((__v8du)__V);
9137}
9138
9139static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9141 return __builtin_reduce_min((__v8di)__V);
9142}
9143
9144static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9146 return __builtin_reduce_min((__v8du)__V);
9147}
9148
9149static __inline__ long long __DEFAULT_FN_ATTRS512
9151 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
9152 return __builtin_reduce_max((__v8di)__V);
9153}
9154
9155static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9157 __V = _mm512_maskz_mov_epi64(__M, __V);
9158 return __builtin_reduce_max((__v8du)__V);
9159}
9160
9161static __inline__ long long __DEFAULT_FN_ATTRS512
9163 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
9164 return __builtin_reduce_min((__v8di)__V);
9165}
9166
9167static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9169 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V);
9170 return __builtin_reduce_min((__v8du)__V);
9171}
9172static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9174 return __builtin_reduce_max((__v16si)__V);
9175}
9176
9177static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
9179 return __builtin_reduce_max((__v16su)__V);
9180}
9181
9182static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9184 return __builtin_reduce_min((__v16si)__V);
9185}
9186
9187static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
9189 return __builtin_reduce_min((__v16su)__V);
9190}
9191
9192static __inline__ int __DEFAULT_FN_ATTRS512
9194 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
9195 return __builtin_reduce_max((__v16si)__V);
9196}
9197
9198static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9200 __V = _mm512_maskz_mov_epi32(__M, __V);
9201 return __builtin_reduce_max((__v16su)__V);
9202}
9203
9204static __inline__ int __DEFAULT_FN_ATTRS512
9206 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
9207 return __builtin_reduce_min((__v16si)__V);
9208}
9209
9210static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9212 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __V);
9213 return __builtin_reduce_min((__v16su)__V);
9214}
9215
9216static __inline__ double __DEFAULT_FN_ATTRS512
9218 return __builtin_ia32_reduce_fmax_pd512(__V);
9219}
9220
9221static __inline__ double __DEFAULT_FN_ATTRS512
9223 return __builtin_ia32_reduce_fmin_pd512(__V);
9224}
9225
9226static __inline__ double __DEFAULT_FN_ATTRS512
9228 __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
9229 return __builtin_ia32_reduce_fmax_pd512(__V);
9230}
9231
9232static __inline__ double __DEFAULT_FN_ATTRS512
9234 __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
9235 return __builtin_ia32_reduce_fmin_pd512(__V);
9236}
9237
9238static __inline__ float __DEFAULT_FN_ATTRS512
9240 return __builtin_ia32_reduce_fmax_ps512(__V);
9241}
9242
9243static __inline__ float __DEFAULT_FN_ATTRS512
9245 return __builtin_ia32_reduce_fmin_ps512(__V);
9246}
9247
9248static __inline__ float __DEFAULT_FN_ATTRS512
9250 __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
9251 return __builtin_ia32_reduce_fmax_ps512(__V);
9252}
9253
9254static __inline__ float __DEFAULT_FN_ATTRS512
9256 __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
9257 return __builtin_ia32_reduce_fmin_ps512(__V);
9258}
9259
9260/// Moves the least significant 32 bits of a vector of [16 x i32] to a
9261/// 32-bit signed integer value.
9262///
9263/// \headerfile <x86intrin.h>
9264///
9265/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
9266///
9267/// \param __A
9268/// A vector of [16 x i32]. The least significant 32 bits are moved to the
9269/// destination.
9270/// \returns A 32-bit signed integer containing the moved value.
9271static __inline__ int __DEFAULT_FN_ATTRS512
9273 __v16si __b = (__v16si)__A;
9274 return __b[0];
9275}
9276
9277/// Loads 8 double-precision (64-bit) floating-point elements stored at memory
9278/// locations starting at location \a base_addr at packed 32-bit integer indices
9279/// stored in the lower half of \a vindex scaled by \a scale them in dst.
9280///
9281/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9282///
9283/// \code{.operation}
9284/// FOR j := 0 to 7
9285/// i := j*64
9286/// m := j*32
9287/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9288/// dst[i+63:i] := MEM[addr+63:addr]
9289/// ENDFOR
9290/// dst[MAX:512] := 0
9291/// \endcode
9292#define _mm512_i32logather_pd(vindex, base_addr, scale) \
9293 _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9294
9295/// Loads 8 double-precision (64-bit) floating-point elements from memory
9296/// starting at location \a base_addr at packed 32-bit integer indices stored in
9297/// the lower half of \a vindex scaled by \a scale into dst using writemask
9298/// \a mask (elements are copied from \a src when the corresponding mask bit is
9299/// not set).
9300///
9301/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9302///
9303/// \code{.operation}
9304/// FOR j := 0 to 7
9305/// i := j*64
9306/// m := j*32
9307/// IF mask[j]
9308/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9309/// dst[i+63:i] := MEM[addr+63:addr]
9310/// ELSE
9311/// dst[i+63:i] := src[i+63:i]
9312/// FI
9313/// ENDFOR
9314/// dst[MAX:512] := 0
9315/// \endcode
9316#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale) \
9317 _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex), \
9318 (base_addr), (scale))
9319
9320/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9321/// at packed 32-bit integer indices stored in the lower half of \a vindex
9322/// scaled by \a scale and stores them in dst.
9323///
9324/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9325///
9326/// \code{.operation}
9327/// FOR j := 0 to 7
9328/// i := j*64
9329/// m := j*32
9330/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9331/// dst[i+63:i] := MEM[addr+63:addr]
9332/// ENDFOR
9333/// dst[MAX:512] := 0
9334/// \endcode
9335#define _mm512_i32logather_epi64(vindex, base_addr, scale) \
9336 _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9337
9338/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9339/// at packed 32-bit integer indices stored in the lower half of \a vindex
9340/// scaled by \a scale and stores them in dst using writemask \a mask (elements
9341/// are copied from \a src when the corresponding mask bit is not set).
9342///
9343/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9344///
9345/// \code{.operation}
9346/// FOR j := 0 to 7
9347/// i := j*64
9348/// m := j*32
9349/// IF mask[j]
9350/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9351/// dst[i+63:i] := MEM[addr+63:addr]
9352/// ELSE
9353/// dst[i+63:i] := src[i+63:i]
9354/// FI
9355/// ENDFOR
9356/// dst[MAX:512] := 0
9357/// \endcode
9358#define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale) \
9359 _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex), \
9360 (base_addr), (scale))
9361
9362/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9363/// and to memory locations starting at location \a base_addr at packed 32-bit
9364/// integer indices stored in \a vindex scaled by \a scale.
9365///
9366/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9367///
9368/// \code{.operation}
9369/// FOR j := 0 to 7
9370/// i := j*64
9371/// m := j*32
9372/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9373/// MEM[addr+63:addr] := v1[i+63:i]
9374/// ENDFOR
9375/// \endcode
9376#define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale) \
9377 _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale))
9378
9379/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9380/// to memory locations starting at location \a base_addr at packed 32-bit
9381/// integer indices stored in \a vindex scaled by \a scale. Only those elements
9382/// whose corresponding mask bit is set in writemask \a mask are written to
9383/// memory.
9384///
9385/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9386///
9387/// \code{.operation}
9388/// FOR j := 0 to 7
9389/// i := j*64
9390/// m := j*32
9391/// IF mask[j]
9392/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9393/// MEM[addr+63:addr] := a[i+63:i]
9394/// FI
9395/// ENDFOR
9396/// \endcode
9397#define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale) \
9398 _mm512_mask_i32scatter_pd((base_addr), (mask), \
9399 _mm512_castsi512_si256(vindex), (v1), (scale))
9400
9401/// Stores 8 packed 64-bit integer elements located in \a v1 and stores them in
9402/// memory locations starting at location \a base_addr at packed 32-bit integer
9403/// indices stored in \a vindex scaled by \a scale.
9404///
9405/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9406///
9407/// \code{.operation}
9408/// FOR j := 0 to 7
9409/// i := j*64
9410/// m := j*32
9411/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9412/// MEM[addr+63:addr] := a[i+63:i]
9413/// ENDFOR
9414/// \endcode
9415#define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale) \
9416 _mm512_i32scatter_epi64((base_addr), \
9417 _mm512_castsi512_si256(vindex), (v1), (scale))
9418
9419/// Stores 8 packed 64-bit integer elements located in a and stores them in
9420/// memory locations starting at location \a base_addr at packed 32-bit integer
9421/// indices stored in \a vindex scaled by scale using writemask \a mask (elements
9422/// whose corresponding mask bit is not set are not written to memory).
9423///
9424/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9425///
9426/// \code{.operation}
9427/// FOR j := 0 to 7
9428/// i := j*64
9429/// m := j*32
9430/// IF mask[j]
9431/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9432/// MEM[addr+63:addr] := a[i+63:i]
9433/// FI
9434/// ENDFOR
9435/// \endcode
9436#define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale) \
9437 _mm512_mask_i32scatter_epi64((base_addr), (mask), \
9438 _mm512_castsi512_si256(vindex), (v1), (scale))
9439
9440#undef __DEFAULT_FN_ATTRS512
9441#undef __DEFAULT_FN_ATTRS128
9442#undef __DEFAULT_FN_ATTRS
9443#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
9444#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
9445#undef __DEFAULT_FN_ATTRS_CONSTEXPR
9446
9447#endif /* __AVX512FINTRIN_H */
#define __L(__X)
#define __DEFAULT_FN_ATTRS
static __inline__ vector float vector float __b
Definition altivec.h:578
static __inline__ uint32_t volatile uint32_t * __p
Definition arm_acle.h:57
return __v
Definition arm_acle.h:88
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
Definition avx2intrin.h:30
#define __DEFAULT_FN_ATTRS_CONSTEXPR
#define __DEFAULT_FN_ATTRS512_CONSTEXPR
#define __DEFAULT_FN_ATTRS512
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi64(long long __d)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castpd_ps(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi64(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f32x4(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castps_si512(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi64(__m512i __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi64(__m512i __W)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi64(__m512i __A, unsigned int __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov(__mmask16 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_cvtsi512_si32(__m512i __A)
Moves the least significant 32 bits of a vector of [16 x i32] to a 32-bit signed integer value.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_ps(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512(__m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi32(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi64(__m512i __W)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS _store_mask16(__mmask16 *__A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd(void *__P, __m512d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss(__mmask8 __U, const float *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi32(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m256d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd256(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullox_epi64(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64(void *__P, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi32(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi32(int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7, int e8, int e9, int e10, int e11, int e12, int e13, int e14, int e15)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi64(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline __m256i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_si256(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movehdup_ps(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi64(__m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_ps(float e0, float e1, float e2, float e3, float e4, float e5, float e6, float e7, float e8, float e9, float e10, float e11, float e12, float e13, float e14, float e15)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_pd(double __w)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi64(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastd_epi32(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
_MM_MANTISSA_NORM_ENUM
@ _MM_MANT_NORM_p5_1
@ _MM_MANT_NORM_p5_2
@ _MM_MANT_NORM_1_2
@ _MM_MANT_NORM_p75_1p5
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_ps(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i64x4(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_pd(double e0, double e1, double e2, double e3, double e4, double e5, double e6, double e7)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps512_ps256(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castps_pd(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi8(char __w)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi64(long long e0, long long e1, long long e2, long long e3, long long e4, long long e5, long long e6, long long e7)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32(__m512i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_pslo(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_int2mask(int __a)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline __m128i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si128(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask2int(__mmask16 __a)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_pd(__m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_min_ps(__m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
char __v64qi __attribute__((__vector_size__(64)))
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi64(__m128i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512(void *__P, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movedup_pd(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_max_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_pd(__m512d __a, __m512d __b)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtps_pd(__m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
_MM_CMPINT_ENUM
@ _MM_CMPINT_NE
@ _MM_CMPINT_NLT
@ _MM_CMPINT_LE
@ _MM_CMPINT_EQ
@ _MM_CMPINT_LT
@ _MM_CMPINT_UNUSED
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_knot(__mmask16 __M)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastq_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64(void *__P, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16(__m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_pd(__m256i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _load_mask16(__mmask16 *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_pd(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a)
unsigned char __mmask8
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
_MM_TERNLOG_ENUM
A helper to represent the ternary logic operations among vector A, B and C.
@ _MM_TERNLOG_A
@ _MM_TERNLOG_B
@ _MM_TERNLOG_C
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi32(__m512i __W)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps(void *__P, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
_MM_MANTISSA_SIGN_ENUM
@ _MM_MANT_SIGN_zero
@ _MM_MANT_SIGN_src
@ _MM_MANT_SIGN_nan
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castpd_si512(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_ps(__m512 __a, __m512 __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_epi32(int e0, int e1, int e2, int e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512(void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi32(__m512i __A, unsigned int __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestc(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu64(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castsi512_ps(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_ps(float __w)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu64(__m512i __V)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f64x4(__m256d __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi32(__m512i __A, __m512i __B)
_MM_PERM_ENUM
@ _MM_PERM_BBCA
@ _MM_PERM_BCBD
@ _MM_PERM_DAAB
@ _MM_PERM_CBBD
@ _MM_PERM_DCCC
@ _MM_PERM_CDBB
@ _MM_PERM_DDDD
@ _MM_PERM_CCCC
@ _MM_PERM_CADA
@ _MM_PERM_BACD
@ _MM_PERM_CCAD
@ _MM_PERM_ABDB
@ _MM_PERM_BBBD
@ _MM_PERM_DCAB
@ _MM_PERM_BABC
@ _MM_PERM_AACD
@ _MM_PERM_BBAB
@ _MM_PERM_DCDB
@ _MM_PERM_BACC
@ _MM_PERM_ABDA
@ _MM_PERM_ACCC
@ _MM_PERM_ADAC
@ _MM_PERM_CCCD
@ _MM_PERM_CADD
@ _MM_PERM_ACCB
@ _MM_PERM_BBDB
@ _MM_PERM_ABBB
@ _MM_PERM_BACB
@ _MM_PERM_CDCA
@ _MM_PERM_ACBC
@ _MM_PERM_ADCB
@ _MM_PERM_BBBC
@ _MM_PERM_DBBA
@ _MM_PERM_BBBB
@ _MM_PERM_DDDB
@ _MM_PERM_CAAA
@ _MM_PERM_ADBB
@ _MM_PERM_ACDB
@ _MM_PERM_DCAD
@ _MM_PERM_DBBC
@ _MM_PERM_BAAB
@ _MM_PERM_BDDD
@ _MM_PERM_BBAD
@ _MM_PERM_DDBA
@ _MM_PERM_CDCD
@ _MM_PERM_CCCA
@ _MM_PERM_DBBB
@ _MM_PERM_DAAD
@ _MM_PERM_DCBA
@ _MM_PERM_CCBC
@ _MM_PERM_ADDD
@ _MM_PERM_DBAC
@ _MM_PERM_ABAB
@ _MM_PERM_CBDB
@ _MM_PERM_CDBC
@ _MM_PERM_AABC
@ _MM_PERM_DABD
@ _MM_PERM_CBBA
@ _MM_PERM_CBAA
@ _MM_PERM_BDDB
@ _MM_PERM_CABC
@ _MM_PERM_BDBD
@ _MM_PERM_BCAD
@ _MM_PERM_ACBA
@ _MM_PERM_ADBA
@ _MM_PERM_ADBC
@ _MM_PERM_DBCB
@ _MM_PERM_CBDC
@ _MM_PERM_CBAD
@ _MM_PERM_ABCC
@ _MM_PERM_AAAD
@ _MM_PERM_CBAC
@ _MM_PERM_CCDA
@ _MM_PERM_CDAC
@ _MM_PERM_BADD
@ _MM_PERM_DAAC
@ _MM_PERM_BCCC
@ _MM_PERM_DBBD
@ _MM_PERM_DDAC
@ _MM_PERM_DACD
@ _MM_PERM_BAAC
@ _MM_PERM_ACCA
@ _MM_PERM_ABDD
@ _MM_PERM_BBCC
@ _MM_PERM_DAAA
@ _MM_PERM_CAAB
@ _MM_PERM_BCDB
@ _MM_PERM_ACBB
@ _MM_PERM_CDAB
@ _MM_PERM_DBDB
@ _MM_PERM_AABB
@ _MM_PERM_DBDA
@ _MM_PERM_BCBA
@ _MM_PERM_CBAB
@ _MM_PERM_DCDC
@ _MM_PERM_BBCB
@ _MM_PERM_CDCB
@ _MM_PERM_AACA
@ _MM_PERM_ACBD
@ _MM_PERM_AAAB
@ _MM_PERM_DCCB
@ _MM_PERM_ADDB
@ _MM_PERM_AAAA
@ _MM_PERM_AACC
@ _MM_PERM_BDDC
@ _MM_PERM_CBBC
@ _MM_PERM_DDCC
@ _MM_PERM_CABD
@ _MM_PERM_AADC
@ _MM_PERM_BCBC
@ _MM_PERM_BCCA
@ _MM_PERM_CCBD
@ _MM_PERM_CBBB
@ _MM_PERM_CDBA
@ _MM_PERM_CACD
@ _MM_PERM_BDAD
@ _MM_PERM_ADCA
@ _MM_PERM_AAAC
@ _MM_PERM_BDDA
@ _MM_PERM_CCAC
@ _MM_PERM_ACDC
@ _MM_PERM_DBCA
@ _MM_PERM_DBAA
@ _MM_PERM_AABD
@ _MM_PERM_CDCC
@ _MM_PERM_DCAA
@ _MM_PERM_DDDC
@ _MM_PERM_CDDB
@ _MM_PERM_AABA
@ _MM_PERM_DDBB
@ _MM_PERM_CDDA
@ _MM_PERM_AADD
@ _MM_PERM_BADC
@ _MM_PERM_BDBA
@ _MM_PERM_DBDD
@ _MM_PERM_BDAC
@ _MM_PERM_DBDC
@ _MM_PERM_BBBA
@ _MM_PERM_DDBC
@ _MM_PERM_BAAA
@ _MM_PERM_BDCC
@ _MM_PERM_DDAB
@ _MM_PERM_BCCB
@ _MM_PERM_BCCD
@ _MM_PERM_ADBD
@ _MM_PERM_ADCC
@ _MM_PERM_CCBB
@ _MM_PERM_CDAA
@ _MM_PERM_BBDA
@ _MM_PERM_CACC
@ _MM_PERM_DCBB
@ _MM_PERM_DABA
@ _MM_PERM_BADB
@ _MM_PERM_ABCA
@ _MM_PERM_CBCC
@ _MM_PERM_ABAD
@ _MM_PERM_BDBC
@ _MM_PERM_DDDA
@ _MM_PERM_ADAB
@ _MM_PERM_CADB
@ _MM_PERM_ADAA
@ _MM_PERM_ACAC
@ _MM_PERM_DADD
@ _MM_PERM_BABD
@ _MM_PERM_ACCD
@ _MM_PERM_CCAA
@ _MM_PERM_AADA
@ _MM_PERM_BDCA
@ _MM_PERM_CDDD
@ _MM_PERM_ABBD
@ _MM_PERM_ACAA
@ _MM_PERM_ACDD
@ _MM_PERM_DABB
@ _MM_PERM_CCCB
@ _MM_PERM_AADB
@ _MM_PERM_DBAD
@ _MM_PERM_BBDD
@ _MM_PERM_BCDC
@ _MM_PERM_CABA
@ _MM_PERM_BBAA
@ _MM_PERM_ADAD
@ _MM_PERM_BADA
@ _MM_PERM_DCDA
@ _MM_PERM_ABBA
@ _MM_PERM_ACAB
@ _MM_PERM_CCDD
@ _MM_PERM_CADC
@ _MM_PERM_DDCB
@ _MM_PERM_BABB
@ _MM_PERM_CCDB
@ _MM_PERM_DDAD
@ _MM_PERM_DBCC
@ _MM_PERM_BCBB
@ _MM_PERM_ADDC
@ _MM_PERM_CCBA
@ _MM_PERM_ABCD
@ _MM_PERM_BCAB
@ _MM_PERM_DCBC
@ _MM_PERM_BCDD
@ _MM_PERM_CCDC
@ _MM_PERM_ABAC
@ _MM_PERM_CBCB
@ _MM_PERM_CCAB
@ _MM_PERM_DDCD
@ _MM_PERM_DACA
@ _MM_PERM_ACAD
@ _MM_PERM_BABA
@ _MM_PERM_CBCD
@ _MM_PERM_CAAD
@ _MM_PERM_DCDD
@ _MM_PERM_BDBB
@ _MM_PERM_BCAA
@ _MM_PERM_ABDC
@ _MM_PERM_BBCD
@ _MM_PERM_CAAC
@ _MM_PERM_BBAC
@ _MM_PERM_CBCA
@ _MM_PERM_DCAC
@ _MM_PERM_ABAA
@ _MM_PERM_CACB
@ _MM_PERM_BBDC
@ _MM_PERM_CDAD
@ _MM_PERM_ADCD
@ _MM_PERM_DADB
@ _MM_PERM_DBCD
@ _MM_PERM_DACC
@ _MM_PERM_DACB
@ _MM_PERM_DCBD
@ _MM_PERM_CACA
@ _MM_PERM_ABBC
@ _MM_PERM_DCCA
@ _MM_PERM_DABC
@ _MM_PERM_CBDD
@ _MM_PERM_DDBD
@ _MM_PERM_DDCA
@ _MM_PERM_BDCD
@ _MM_PERM_CDBD
@ _MM_PERM_ABCB
@ _MM_PERM_CDDC
@ _MM_PERM_AACB
@ _MM_PERM_DDAA
@ _MM_PERM_ADDA
@ _MM_PERM_DADA
@ _MM_PERM_BCDA
@ _MM_PERM_BDAB
@ _MM_PERM_BAAD
@ _MM_PERM_DBAB
@ _MM_PERM_DCCD
@ _MM_PERM_CABB
@ _MM_PERM_BDAA
@ _MM_PERM_BDCB
@ _MM_PERM_ACDA
@ _MM_PERM_DADC
@ _MM_PERM_CBDA
@ _MM_PERM_BCAC
@ _MM_PERM_BACA
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd(__m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi32(int __s)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64(void const *__P)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32(void *__P, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_pd(double e0, double e1, double e2, double e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastss_ps(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _cvtu32_mask16(unsigned int __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ps(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i32x4(__m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi32(__m512i __A, __m128i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32(__m128 __A)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi32(__m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32lo_pd(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
unsigned short __mmask16
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi32(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_si512(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32lo_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_si512(__m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu32(__m512i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64(void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_ps(float e0, float e1, float e2, float e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m128d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd128(__m512d __a)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi32(__m512i __W)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask16_u32(__mmask16 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline __m128 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps128(__m512 __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi16(short __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castsi512_pd(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_moveldup_ps(__m512 __A)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi32(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
Definition avxintrin.h:3620
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
Definition avxintrin.h:3633
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition avxintrin.h:4304
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition avxintrin.h:4292
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition avxintrin.h:4316
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
Definition emmintrin.h:120
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
Definition emmintrin.h:199
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
Definition emmintrin.h:80
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition emmintrin.h:3878
static __inline__ void int __a
Definition emmintrin.h:4077
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition emmintrin.h:3493
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
Definition emmintrin.h:1887
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition emmintrin.h:1867
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
Definition emmintrin.h:159
static __inline__ void short __D
Definition immintrin.h:342
static __inline__ void const void * __src
__inline unsigned int unsigned int unsigned int * __P
Definition bmi2intrin.h:25
__inline unsigned int unsigned int __Y
Definition bmi2intrin.h:19
#define _MM_FROUND_FLOOR
Definition smmintrin.h:41
#define _MM_FROUND_CEIL
Definition smmintrin.h:42
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
Definition xmmintrin.h:2804
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
Definition xmmintrin.h:160
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
Definition xmmintrin.h:119
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
Definition xmmintrin.h:79
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition xmmintrin.h:2021
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...
Definition xmmintrin.h:200